Accepts a list of prism tokens and converts them into the expected format for the parser gem.
The direct translating of types between the two lexers.
These constants represent flags in our lex state. We really, really don’t want to be using them and we really, really don’t want to be exposing them as part of our public API. Unfortunately, we don’t have another way of matching the exact tokens that the parser gem expects without them. We should find another way to do this, but in the meantime we’ll hide them from the documentation and mark them as private constants.
The Parser::Source::Buffer that the tokens were lexed from.
An array of tuples that contain prism tokens and their associated lex state when they were lexed.
A hash that maps offsets in bytes to offsets in characters.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 204
def initialize(source_buffer, lexed, offset_cache)
@source_buffer = source_buffer
@lexed = lexed
@offset_cache = offset_cache
end
Initialize the lexer with the given source buffer, prism tokens, and offset cache.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 387
def parse_complex(value)
value.chomp!("i")
if value.end_with?("r")
Complex(0, parse_rational(value))
elsif value.start_with?(/0[BbOoDdXx]/)
Complex(0, parse_integer(value))
else
Complex(0, value)
end
rescue ArgumentError
0i
end
Parse a complex from the string representation.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 380
def parse_float(value)
Float(value)
rescue ArgumentError
0.0
end
Parse a float from the string representation.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 373
def parse_integer(value)
Integer(value)
rescue ArgumentError
0
end
Parse an integer from the string representation.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 402
def parse_rational(value)
value.chomp!("r")
if value.start_with?(/0[BbOoDdXx]/)
Rational(parse_integer(value))
else
Rational(value)
end
rescue ArgumentError
0r
end
Parse a rational from the string representation.
# File tmp/rubies/ruby-3.4.0-preview1/lib/prism/translation/parser/lexer.rb, line 214
def to_a
tokens = []
index = 0
length = lexed.length
heredoc_identifier_stack = []
while index < length
token, state = lexed[index]
index += 1
next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)
type = TYPES.fetch(token.type)
value = token.value
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
case type
when :tCHARACTER
value.delete_prefix!("?")
when :tCOMMENT
if token.type == :EMBDOC_BEGIN
start_index = index
while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
value += next_token.value
index += 1
end
if start_index != index
value += next_token.value
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
index += 1
end
else
value.chomp!
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
end
when :tNL
value = nil
when :tFLOAT
value = parse_float(value)
when :tIMAGINARY
value = parse_complex(value)
when :tINTEGER
if value.start_with?("+")
tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
end
value = parse_integer(value)
when :tLABEL
value.chomp!(":")
when :tLABEL_END
value.chomp!(":")
when :tLCURLY
type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
when :tNTH_REF
value = parse_integer(value.delete_prefix("$"))
when :tOP_ASGN
value.chomp!("=")
when :tRATIONAL
value = parse_rational(value)
when :tSPACE
value = nil
when :tSTRING_BEG
if token.type == :HEREDOC_START
heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
end
if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
next_location = token.location.join(next_token.location)
type = :tSTRING
value = ""
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 1
elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
next_location = token.location.join(next_next_token.location)
type = :tSTRING
value = next_token.value.gsub("\\\\", "\\")
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 2
elsif value.start_with?("<<")
quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
if quote == "`"
type = :tXSTRING_BEG
value = "<<`"
else
value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
end
end
when :tSTRING_CONTENT
unless (lines = token.value.lines).one?
start_offset = offset_cache[token.location.start_offset]
lines.map do |line|
newline = line.end_with?("\r\n") ? "\r\n" : "\n"
chomped_line = line.chomp
if match = chomped_line.match(/(?<backslashes>\\+)\z/)
adjustment = match[:backslashes].size / 2
adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
if match[:backslashes].size.odd?
adjusted_line.delete_suffix!("\\")
adjustment += 2
else
adjusted_line << newline
end
else
adjusted_line = line
adjustment = 0
end
end_offset = start_offset + adjusted_line.length + adjustment
tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
start_offset = end_offset
end
next
end
when :tSTRING_DVAR
value = nil
when :tSTRING_END
if token.type == :HEREDOC_END && value.end_with?("\n")
newline_length = value.end_with?("\r\n") ? 2 : 1
value = heredoc_identifier_stack.pop
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
elsif token.type == :REGEXP_END
value = value[0]
location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
end
when :tSYMBEG
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR
next_location = token.location.join(next_token.location)
type = :tSYMBOL
value = next_token.value
value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
index += 1
end
when :tFID
if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
type = :tIDENTIFIER
end
when :tXSTRING_BEG
if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
type = :tBACK_REF2
end
end
tokens << [type, [value, location]]
if token.type == :REGEXP_END
tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
end
end
tokens
end
Convert the prism tokens into the expected format for the parser gem.