Prism::Translation::Parser::Lexer

Class

Accepts a list of prism tokens and converts them into the expected format for the parser gem.

Constants

TYPES

The direct translating of types between the two lexers.

These constants represent flags in our lex state. We really, really don’t want to be using them and we really, really don’t want to be exposing them as part of our public API. Unfortunately, we don’t have another way of matching the exact tokens that the parser gem expects without them. We should find another way to do this, but in the meantime we’ll hide them from the documentation and mark them as private constants.

EXPR_LABEL

No documentation available

LAMBDA_TOKEN_TYPES

It is used to determine whether ‘do` is of the token type `kDO` or `kDO_LAMBDA`.

NOTE: In edge cases like ‘-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned instead of `kDO_LAMBDA`, which is expected: github.com/ruby/prism/pull/3046

LPAREN_CONVERSION_TOKEN_TYPES

The ‘PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. The following token types are listed as those classified as `tLPAREN`.

Range

No documentation available

Attributes

source_buffer

Read

The Parser::Source::Buffer that the tokens were lexed from.

lexed

Read

An array of tuples that contain prism tokens and their associated lex state when they were lexed.

offset_cache

Read

A hash that maps offsets in bytes to offsets in characters.

Class Methods

new(source_buffer, lexed, offset_cache)

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 217
def initialize(source_buffer, lexed, offset_cache)
  @source_buffer = source_buffer
  @lexed = lexed
  @offset_cache = offset_cache
end

Initialize the lexer with the given source buffer, prism tokens, and offset cache.

Instance Methods

parse_complex(value)

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 409
def parse_complex(value)
  value.chomp!("i")

  if value.end_with?("r")
    Complex(0, parse_rational(value))
  elsif value.start_with?(/0[BbOoDdXx]/)
    Complex(0, parse_integer(value))
  else
    Complex(0, value)
  end
rescue ArgumentError
  0i
end

Parse a complex from the string representation.

parse_float(value)

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 402
def parse_float(value)
  Float(value)
rescue ArgumentError
  0.0
end

Parse a float from the string representation.

parse_integer(value)

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 395
def parse_integer(value)
  Integer(value)
rescue ArgumentError
  0
end

Parse an integer from the string representation.

parse_rational(value)

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 424
def parse_rational(value)
  value.chomp!("r")

  if value.start_with?(/0[BbOoDdXx]/)
    Rational(parse_integer(value))
  else
    Rational(value)
  end
rescue ArgumentError
  0r
end

Parse a rational from the string representation.

to_a

lib/prism/translation/parser/lexer.rb View on GitHub

          
            # File tmp/rubies/ruby-3.4.1/lib/prism/translation/parser/lexer.rb, line 227
def to_a
  tokens = []

  index = 0
  length = lexed.length

  heredoc_identifier_stack = []

  while index < length
    token, state = lexed[index]
    index += 1
    next if %i[IGNORED_NEWLINE __END__ EOF].include?(token.type)

    type = TYPES.fetch(token.type)
    value = token.value
    location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])

    case type
    when :kDO
      types = tokens.map(&:first)
      nearest_lambda_token_type = types.reverse.find { |type| LAMBDA_TOKEN_TYPES.include?(type) }

      if nearest_lambda_token_type == :tLAMBDA
        type = :kDO_LAMBDA
      end
    when :tCHARACTER
      value.delete_prefix!("?")
    when :tCOMMENT
      if token.type == :EMBDOC_BEGIN
        start_index = index

        while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
          value += next_token.value
          index += 1
        end

        if start_index != index
          value += next_token.value
          location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index][0].location.end_offset])
          index += 1
        end
      else
        value.chomp!
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
      end
    when :tNL
      value = nil
    when :tFLOAT
      value = parse_float(value)
    when :tIMAGINARY
      value = parse_complex(value)
    when :tINTEGER
      if value.start_with?("+")
        tokens << [:tUNARY_NUM, ["+", Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])
      end

      value = parse_integer(value)
    when :tLABEL
      value.chomp!(":")
    when :tLABEL_END
      value.chomp!(":")
    when :tLCURLY
      type = :tLBRACE if state == EXPR_BEG | EXPR_LABEL
    when :tLPAREN2
      type = :tLPAREN if tokens.empty? || LPAREN_CONVERSION_TOKEN_TYPES.include?(tokens.dig(-1, 0))
    when :tNTH_REF
      value = parse_integer(value.delete_prefix("$"))
    when :tOP_ASGN
      value.chomp!("=")
    when :tRATIONAL
      value = parse_rational(value)
    when :tSPACE
      value = nil
    when :tSTRING_BEG
      if token.type == :HEREDOC_START
        heredoc_identifier_stack.push(value.match(/<<[-~]?["'`]?(?<heredoc_identifier>.*?)["'`]?\z/)[:heredoc_identifier])
      end
      if ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSTRING
        value = ""
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      elsif ["\"", "'"].include?(value) && (next_token = lexed[index][0]) && next_token.type == :STRING_CONTENT && next_token.value.lines.count <= 1 && (next_next_token = lexed[index + 1][0]) && next_next_token.type == :STRING_END
        next_location = token.location.join(next_next_token.location)
        type = :tSTRING
        value = next_token.value.gsub("\\\\", "\\")
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 2
      elsif value.start_with?("<<")
        quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
        if quote == "`"
          type = :tXSTRING_BEG
          value = "<<`"
        else
          value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
        end
      end
    when :tSTRING_CONTENT
      unless (lines = token.value.lines).one?
        start_offset = offset_cache[token.location.start_offset]
        lines.map do |line|
          newline = line.end_with?("\r\n") ? "\r\n" : "\n"
          chomped_line = line.chomp
          if match = chomped_line.match(/(?<backslashes>\\+)\z/)
            adjustment = match[:backslashes].size / 2
            adjusted_line = chomped_line.delete_suffix("\\" * adjustment)
            if match[:backslashes].size.odd?
              adjusted_line.delete_suffix!("\\")
              adjustment += 2
            else
              adjusted_line << newline
            end
          else
            adjusted_line = line
            adjustment = 0
          end

          end_offset = start_offset + adjusted_line.length + adjustment
          tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
          start_offset = end_offset
        end
        next
      end
    when :tSTRING_DVAR
      value = nil
    when :tSTRING_END
      if token.type == :HEREDOC_END && value.end_with?("\n")
        newline_length = value.end_with?("\r\n") ? 2 : 1
        value = heredoc_identifier_stack.pop
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - newline_length])
      elsif token.type == :REGEXP_END
        value = value[0]
        location = Range.new(source_buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
      end
    when :tSYMBEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
        next_location = token.location.join(next_token.location)
        type = :tSYMBOL
        value = next_token.value
        value = { "~@" => "~", "!@" => "!" }.fetch(value, value)
        location = Range.new(source_buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
        index += 1
      end
    when :tFID
      if !tokens.empty? && tokens.dig(-1, 0) == :kDEF
        type = :tIDENTIFIER
      end
    when :tXSTRING_BEG
      if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :STRING_END
        type = :tBACK_REF2
      end
    end

    tokens << [type, [value, location]]

    if token.type == :REGEXP_END
      tokens << [:tREGEXP_OPT, [token.value[1..], Range.new(source_buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
    end
  end

  tokens
end

Convert the prism tokens into the expected format for the parser gem.