Ripper
is a Ruby script parser.
You can get information from the parser with event-based style. Information such as abstract syntax trees or simple lexical analysis of the Ruby program.
Usage
Ripper
provides an easy interface for parsing your program into a symbolic expression tree (or S-expression).
Understanding the output of the parser may come as a challenge, it’s recommended you use PP
to format the output for legibility.
require 'ripper' require 'pp' pp Ripper.sexp('def hello(world) "Hello, #{world}!"; end') #=> [:program, [[:def, [:@ident, "hello", [1, 4]], [:paren, [:params, [[:@ident, "world", [1, 10]]], nil, nil, nil, nil, nil, nil]], [:bodystmt, [[:string_literal, [:string_content, [:@tstring_content, "Hello, ", [1, 18]], [:string_embexpr, [[:var_ref, [:@ident, "world", [1, 27]]]]], [:@tstring_content, "!", [1, 33]]]]], nil, nil, nil]]]]
You can see in the example above, the expression starts with :program
.
From here, a method definition at :def
, followed by the method’s identifier :@ident
. After the method’s identifier comes the parentheses :paren
and the method parameters under :params
.
Next is the method body, starting at :bodystmt
(stmt
meaning statement), which contains the full definition of the method.
In our case, we’re simply returning a String
, so next we have the :string_literal
expression.
Within our :string_literal
you’ll notice two @tstring_content
, this is the literal part for Hello,
and !
. Between the two @tstring_content
statements is a :string_embexpr
, where embexpr is an embedded expression. Our expression consists of a local variable, or var_ref
, with the identifier (@ident
) of world
.
Resources
Requirements
-
ruby 1.9 (support CVS HEAD only)
-
bison 1.28 or later (Other yaccs do not work)
License
Ruby License.
-
Minero Aoki
-
aamine@loveruby.net
This array contains name of parser events.
This array contains name of scanner events.
This array contains name of all ripper events.
static VALUE
parser_dedent_string(VALUE self, VALUE input, VALUE width)
{
int wid, col;
StringValue(input);
wid = NUM2UINT(width);
col = dedent_string(input, wid);
return INT2NUM(col);
}
USE OF RIPPER LIBRARY ONLY.
Strips up to width
leading whitespaces from input
, and returns the stripped column width.
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/lexer.rb, line 44
def Ripper.lex(src, filename = '-', lineno = 1)
Lexer.new(src, filename, lineno).lex
end
Tokenizes the Ruby program and returns an array of an array, which is formatted like [[lineno, column], type, token, state]
.
require 'ripper' require 'pp' pp Ripper.lex("def m(a) nil end") #=> [[[1, 0], :on_kw, "def", FNAME ], [[1, 3], :on_sp, " ", FNAME ], [[1, 4], :on_ident, "m", ENDFN ], [[1, 5], :on_lparen, "(", BEG|LABEL], [[1, 6], :on_ident, "a", ARG ], [[1, 7], :on_rparen, ")", ENDFN ], [[1, 8], :on_sp, " ", BEG ], [[1, 9], :on_kw, "nil", END ], [[1, 12], :on_sp, " ", END ], [[1, 13], :on_kw, "end", END ]]
static VALUE
ripper_lex_state_name(VALUE self, VALUE state)
{
return rb_parser_lex_state_name(NUM2INT(state));
}
Returns a string representation of lex_state.
static VALUE
ripper_initialize(int argc, VALUE *argv, VALUE self)
{
struct parser_params *p;
VALUE src, fname, lineno;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
if (RB_TYPE_P(src, T_FILE)) {
p->lex.gets = ripper_lex_io_get;
}
else if (rb_respond_to(src, id_gets)) {
p->lex.gets = ripper_lex_get_generic;
}
else {
StringValue(src);
p->lex.gets = lex_get_str;
}
p->lex.input = src;
p->eofp = 0;
if (NIL_P(fname)) {
fname = STR_NEW2("(ripper)");
OBJ_FREEZE(fname);
}
else {
StringValueCStr(fname);
fname = rb_str_new_frozen(fname);
}
parser_initialize(p);
p->ruby_sourcefile_string = fname;
p->ruby_sourcefile = RSTRING_PTR(fname);
p->ruby_sourceline = NIL_P(lineno) ? 0 : NUM2INT(lineno) - 1;
return Qnil;
}
Create a new Ripper
object. src must be a String
, an IO
, or an Object
which has gets
method.
This method does not starts parsing. See also Ripper#parse
and Ripper.parse
.
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/core.rb, line 18
def Ripper.parse(src, filename = '(ripper)', lineno = 1)
new(src, filename, lineno).parse
end
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/sexp.rb, line 31
def Ripper.sexp(src, filename = '-', lineno = 1)
builder = SexpBuilderPP.new(src, filename, lineno)
sexp = builder.parse
sexp unless builder.error?
end
- EXPERIMENTAL
-
Parses
src
and create S-exp tree. Returns more readable tree rather thanRipper.sexp_raw
. This method is mainly for developer use.require 'ripper' require 'pp' pp Ripper.sexp("def m(a) nil end") #=> [:program, [[:def, [:@ident, "m", [1, 4]], [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil, nil, nil, nil]], [:bodystmt, [[:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/sexp.rb, line 57
def Ripper.sexp_raw(src, filename = '-', lineno = 1)
builder = SexpBuilder.new(src, filename, lineno)
sexp = builder.parse
sexp unless builder.error?
end
- EXPERIMENTAL
-
Parses
src
and create S-exp tree. This method is mainly for developer use.require 'ripper' require 'pp' pp Ripper.sexp_raw("def m(a) nil end") #=> [:program, [:stmts_add, [:stmts_new], [:def, [:@ident, "m", [1, 4]], [:paren, [:params, [[:@ident, "a", [1, 6]]], nil, nil, nil]], [:bodystmt, [:stmts_add, [:stmts_new], [:var_ref, [:@kw, "nil", [1, 9]]]], nil, nil, nil]]]]
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/lexer.rb, line 201
def Ripper.slice(src, pattern, n = 0)
if m = token_match(src, pattern)
then m.string(n)
else nil
end
end
- EXPERIMENTAL
-
Parses
src
and return a string which was matched topattern
.pattern
should be described asRegexp
.require 'ripper' p Ripper.slice('def m(a) nil end', 'ident') #=> "m" p Ripper.slice('def m(a) nil end', '[ident lparen rparen]+') #=> "m(a)" p Ripper.slice("<<EOS\nstring\nEOS", 'heredoc_beg nl $(tstring_content*) heredoc_end', 1) #=> "string\n"
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/lexer.rb, line 21
def Ripper.tokenize(src, filename = '-', lineno = 1)
Lexer.new(src, filename, lineno).tokenize
end
Tokenizes the Ruby program and returns an array of strings.
p Ripper.tokenize("def m(a) nil end") # => ["def", " ", "m", "(", "a", ")", " ", "nil", " ", "end"]
static VALUE
ripper_column(VALUE self)
{
struct parser_params *p;
long col;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
if (NIL_P(p->parsing_thread)) return Qnil;
col = p->lex.ptok - p->lex.pbeg;
return LONG2NUM(col);
}
Return column number of current parsing line. This number starts from 0.
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/core.rb, line 63
def compile_error(msg)
end
This method is called when the parser found syntax error.
VALUE
rb_parser_get_debug_output(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
return p->debug_output;
}
Get debug output.
VALUE
rb_parser_set_debug_output(VALUE self, VALUE output)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
return p->debug_output = output;
}
Set
debug output.
static VALUE
parser_dedent_string(VALUE self, VALUE input, VALUE width)
{
int wid, col;
StringValue(input);
wid = NUM2UINT(width);
col = dedent_string(input, wid);
return INT2NUM(col);
}
USE OF RIPPER LIBRARY ONLY.
Strips up to width
leading whitespaces from input
, and returns the stripped column width.
VALUE
rb_parser_encoding(VALUE vparser)
{
struct parser_params *p;
TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
return rb_enc_from_encoding(p->enc);
}
Return encoding of the source.
VALUE
rb_parser_end_seen_p(VALUE vparser)
{
struct parser_params *p;
TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
return p->ruby__end__seen ? Qtrue : Qfalse;
}
Return true if parsed source ended by +_END_+.
static VALUE
ripper_error_p(VALUE vparser)
{
struct parser_params *p;
TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
return p->error_p ? Qtrue : Qfalse;
}
Return true if parsed source has errors.
static VALUE
ripper_filename(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
return p->ruby_sourcefile_string;
}
Return current parsing filename.
static VALUE
ripper_lineno(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
if (NIL_P(p->parsing_thread)) return Qnil;
return INT2NUM(p->ruby_sourceline);
}
Return line number of current parsing line. This number starts from 1.
static VALUE
ripper_parse(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
if (!NIL_P(p->parsing_thread)) {
if (p->parsing_thread == rb_thread_current())
rb_raise(rb_eArgError, "Ripper#parse is not reentrant");
else
rb_raise(rb_eArgError, "Ripper#parse is not multithread-safe");
}
p->parsing_thread = rb_thread_current();
rb_ensure(ripper_parse0, self, ripper_ensure, self);
return p->result;
}
Start parsing and returns the value of the root action.
static VALUE
ripper_state(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
if (NIL_P(p->parsing_thread)) return Qnil;
return INT2NUM(p->lex.state);
}
Return scanner state of current token.
static VALUE
ripper_token(VALUE self)
{
struct parser_params *p;
long pos, len;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
if (!ripper_initialized_p(p)) {
rb_raise(rb_eArgError, "method called for uninitialized object");
}
if (NIL_P(p->parsing_thread)) return Qnil;
pos = p->lex.ptok - p->lex.pbeg;
len = p->lex.pcur - p->lex.ptok;
return rb_str_subseq(p->lex.lastline, pos, len);
}
Return the current token string.
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/core.rb, line 54
def warn(fmt, *args)
end
This method is called when weak warning is produced by the parser. fmt
and args
is printf style.
# File tmp/rubies/ruby-2.7.6/ext/ripper/lib/ripper/core.rb, line 59
def warning(fmt, *args)
end
This method is called when strong warning is produced by the parser. fmt
and args
is printf style.
VALUE
rb_parser_get_yydebug(VALUE self)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
return p->debug ? Qtrue : Qfalse;
}
Get yydebug.
VALUE
rb_parser_set_yydebug(VALUE self, VALUE flag)
{
struct parser_params *p;
TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
p->debug = RTEST(flag);
return flag;
}
Set
yydebug.