MatchData
encapsulates the result of matching a Regexp
against string. It is returned by Regexp#match
and String#match
, and also stored in a global variable returned by Regexp.last_match
.
Usage:
url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html' m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0"> m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html" m.regexp # => /(\d\.?)+/ # entire matched substring: m[0] # => "2.5.0" # Working with unnamed captures m = url.match(%r{([^/]+)/([^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"] # Working with named captures m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$}) m.captures # => ["2.5.0", "MatchData"] m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"} m[:version] # => "2.5.0" m.values_at(:version, :module) # => ["2.5.0", "MatchData"] # Numerical indexes are working, too m[1] # => "2.5.0" m.values_at(1, 2) # => ["2.5.0", "MatchData"]
Global variables equivalence
Parts of last MatchData
(returned by Regexp.last_match
) are also aliased as global variables:
-
$~
isRegexp.last_match
; -
$&
isRegexp.last_match
[ 0 ]
; -
$1
,$2
, and so on areRegexp.last_match
[ i ]
(captures by number); -
$`
isRegexp.last_match
.pre_match
; -
$'
isRegexp.last_match
.post_match
; -
$+
isRegexp.last_match
[ -1 ]
(the last capture).
See also “Special global variables” section in Regexp
documentation.
static VALUE
match_aref(int argc, VALUE *argv, VALUE match)
{
VALUE idx, length;
match_check(match);
rb_scan_args(argc, argv, "11", &idx, &length);
if (NIL_P(length)) {
if (FIXNUM_P(idx)) {
return rb_reg_nth_match(FIX2INT(idx), match);
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
if (num >= 0) {
return rb_reg_nth_match(num, match);
}
else {
return match_ary_aref(match, idx, Qnil);
}
}
}
else {
long beg = NUM2LONG(idx);
long len = NUM2LONG(length);
long num_regs = RMATCH_REGS(match)->num_regs;
if (len < 0) {
return Qnil;
}
if (beg < 0) {
beg += num_regs;
if (beg < 0) return Qnil;
}
else if (beg > num_regs) {
return Qnil;
}
if (beg+len > num_regs) {
len = num_regs - beg;
}
return match_ary_subseq(match, beg, len, Qnil);
}
}
When arguments index
, +start and length
, or range
are given, returns match and captures in the style of Array#[]
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m[1, 2] # => ["H", "X"] m[1..3] # => ["H", "X", "113"] m[-3, 2] # => ["X", "113"]
When string or symbol argument name
is given, returns the matched substring for the given name:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m['foo'] # => "h" m[:bar] # => "ge"
static VALUE
match_begin(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return LONG2NUM(RMATCH(match)->rmatch->char_offset[i].beg);
}
Returns the offset (in characters) of the beginning of the specified match.
When non-negative integer argument n
is given, returns the offset of the beginning of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.begin(0) # => 1 m[3] # => "113" m.begin(3) # => 3 m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.begin(0) # => 0 m[3] # => "с" m.begin(3) # => 2
When string or symbol argument name
is given, returns the offset of the beginning for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.begin('foo') # => 0 m[:bar] # => "g" m.begin(:bar) # => 2
Related: MatchData#end
, MatchData#offset
, MatchData#byteoffset
.
static VALUE
match_byteoffset(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return rb_assoc_new(Qnil, Qnil);
return rb_assoc_new(LONG2NUM(BEG(i)), LONG2NUM(END(i)));
}
Returns a two-element array containing the beginning and ending byte-based offsets of the nth match. n can be a string or symbol to reference a named capture.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.byteoffset(0) #=> [1, 7] m.byteoffset(4) #=> [6, 7] m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") p m.byteoffset(:foo) #=> [0, 1] p m.byteoffset(:bar) #=> [2, 3]
static VALUE
match_captures(VALUE match)
{
return match_array(match, 1);
}
Returns the array of captures, which are all matches except m[0]
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.captures # => ["H", "X", "113", "8"]
Related: MatchData.to_a
.
static VALUE
match_deconstruct_keys(VALUE match, VALUE keys)
{
VALUE h;
long i;
match_check(match);
if (NIL_P(RMATCH(match)->regexp)) {
return rb_hash_new_with_size(0);
}
if (NIL_P(keys)) {
h = rb_hash_new_with_size(onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)));
struct MEMO *memo;
memo = MEMO_NEW(h, match, 1);
onig_foreach_name(RREGEXP_PTR(RMATCH(match)->regexp), match_named_captures_iter, (void*)memo);
return h;
}
Check_Type(keys, T_ARRAY);
if (onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)) < RARRAY_LEN(keys)) {
return rb_hash_new_with_size(0);
}
h = rb_hash_new_with_size(RARRAY_LEN(keys));
for (i=0; i<RARRAY_LEN(keys); i++) {
VALUE key = RARRAY_AREF(keys, i);
VALUE name;
Check_Type(key, T_SYMBOL);
name = rb_sym2str(key);
int num = NAME_TO_NUMBER(RMATCH_REGS(match), RMATCH(match)->regexp, RMATCH(match)->regexp,
RSTRING_PTR(name), RSTRING_END(name));
if (num >= 0) {
rb_hash_aset(h, key, rb_reg_nth_match(num, match));
}
else {
return h;
}
}
return h;
}
Returns a hash of the named captures for the given names.
m = /(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/.match("18:37:22") m.deconstruct_keys([:hours, :minutes]) # => {:hours => "18", :minutes => "37"} m.deconstruct_keys(nil) # => {:hours => "18", :minutes => "37", :seconds => "22"}
Returns an empty hash of no named captures were defined:
m = /(\d{2}):(\d{2}):(\d{2})/.match("18:37:22") m.deconstruct_keys(nil) # => {}
static VALUE
match_end(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
return LONG2NUM(RMATCH(match)->rmatch->char_offset[i].end);
}
Returns the offset (in characters) of the end of the specified match.
When non-negative integer argument n
is given, returns the offset of the end of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.end(0) # => 7 m[3] # => "113" m.end(3) # => 6 m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.end(0) # => 3 m[3] # => "с" m.end(3) # => 3
When string or symbol argument name
is given, returns the offset of the end for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.end('foo') # => 1 m[:bar] # => "g" m.end(:bar) # => 3
Related: MatchData#begin
, MatchData#offset
, MatchData#byteoffset
.
static VALUE
match_equal(VALUE match1, VALUE match2)
{
const struct re_registers *regs1, *regs2;
if (match1 == match2) return Qtrue;
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
regs1 = RMATCH_REGS(match1);
regs2 = RMATCH_REGS(match2);
if (regs1->num_regs != regs2->num_regs) return Qfalse;
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
return Qtrue;
}
Returns true
if object
is another MatchData object whose target string, regexp, match, and captures are the same as self
, false
otherwise.
MatchData#eql?
is an alias for MatchData#==
.
static VALUE
match_hash(VALUE match)
{
const struct re_registers *regs;
st_index_t hashval;
match_check(match);
hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
regs = RMATCH_REGS(match);
hashval = rb_hash_uint(hashval, regs->num_regs);
hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
hashval = rb_hash_end(hashval);
return ST2FIX(hashval);
}
Returns the integer hash value for self
, based on the target string, regexp, match, and captures.
See also Object#hash
.
static VALUE
match_inspect(VALUE match)
{
VALUE cname = rb_class_path(rb_obj_class(match));
VALUE str;
int i;
struct re_registers *regs = RMATCH_REGS(match);
int num_regs = regs->num_regs;
struct backref_name_tag *names;
VALUE regexp = RMATCH(match)->regexp;
if (regexp == 0) {
return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
}
else if (NIL_P(regexp)) {
return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
cname, rb_reg_nth_match(0, match));
}
names = ALLOCA_N(struct backref_name_tag, num_regs);
MEMZERO(names, struct backref_name_tag, num_regs);
onig_foreach_name(RREGEXP_PTR(regexp),
match_inspect_name_iter, names);
str = rb_str_buf_new2("#<");
rb_str_append(str, cname);
for (i = 0; i < num_regs; i++) {
VALUE v;
rb_str_buf_cat2(str, " ");
if (0 < i) {
if (names[i].name)
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
else {
rb_str_catf(str, "%d", i);
}
rb_str_buf_cat2(str, ":");
}
v = rb_reg_nth_match(i, match);
if (NIL_P(v))
rb_str_buf_cat2(str, "nil");
else
rb_str_buf_append(str, rb_str_inspect(v));
}
rb_str_buf_cat2(str, ">");
return str;
}
Returns a string representation of self
:
m = /.$/.match("foo") # => #<MatchData "o"> m.inspect # => "#<MatchData \"o\">" m = /(.)(.)(.)/.match("foo") # => #<MatchData "foo" 1:"f" 2:"o" 3:"o"> m.inspect # => "#<MatchData \"foo\" 1:\"f\" 2:\"o\ m = /(.)(.)?(.)/.match("fo") # => #<MatchData "fo" 1:"f" 2:nil 3:"o"> m.inspect # => "#<MatchData \"fo\" 1:\"f\" 2:nil 3:\"o\">" Related: MatchData#to_s.
static VALUE
match_nth(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
backref_number_check(regs, i);
long start = BEG(i), end = END(i);
if (start < 0)
return Qnil;
return rb_str_subseq(RMATCH(match)->str, start, end - start);
}
Returns the matched substring corresponding to the given argument.
When non-negative argument n
is given, returns the matched substring for the n
th match:
m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil> m.match(0) # => "HX1138" m.match(4) # => "8" m.match(5) # => nil
When string or symbol argument name
is given, returns the matched substring for the given name:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.match('foo') # => "h" m.match(:bar) # => "ge"
static VALUE
match_nth_length(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return Qnil;
update_char_offset(match);
const struct rmatch_offset *const ofs =
&RMATCH(match)->rmatch->char_offset[i];
return LONG2NUM(ofs->end - ofs->beg);
}
Returns the length (in characters) of the matched substring corresponding to the given argument.
When non-negative argument n
is given, returns the length of the matched substring for the n
th match:
m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil> m.match_length(0) # => 6 m.match_length(4) # => 1 m.match_length(5) # => nil
When string or symbol argument name
is given, returns the length of the matched substring for the named match:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.match_length('foo') # => 1 m.match_length(:bar) # => 2
static VALUE
match_named_captures(VALUE match)
{
VALUE hash;
struct MEMO *memo;
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_hash_new();
hash = rb_hash_new();
memo = MEMO_NEW(hash, match, 0);
onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);
return hash;
}
Returns a hash of the named captures; each key is a capture name; each value is its captured string or nil
:
m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.named_captures # => {"foo"=>"h", "bar"=>"ge"} m = /(?<a>.)(?<b>.)/.match("01") # => #<MatchData "01" a:"0" b:"1"> m.named_captures #=> {"a" => "0", "b" => "1"} m = /(?<a>.)(?<b>.)?/.match("0") # => #<MatchData "0" a:"0" b:nil> m.named_captures #=> {"a" => "0", "b" => nil} m = /(?<a>.)(?<a>.)/.match("01") # => #<MatchData "01" a:"0" a:"1"> m.named_captures #=> {"a" => "1"}
static VALUE
match_names(VALUE match)
{
match_check(match);
if (NIL_P(RMATCH(match)->regexp))
return rb_ary_new_capa(0);
return rb_reg_names(RMATCH(match)->regexp);
}
Returns an array of the capture names (see Named Captures):
m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"o" baz:"g"> m.names # => ["foo", "bar", "baz"] m = /foo/.match('foo') # => #<MatchData "foo"> m.names # => [] # No named captures.
Equivalent to:
m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge") m.regexp.names # => ["foo", "bar", "baz"]
static VALUE
match_offset(VALUE match, VALUE n)
{
int i = match_backref_number(match, n);
struct re_registers *regs = RMATCH_REGS(match);
match_check(match);
backref_number_check(regs, i);
if (BEG(i) < 0)
return rb_assoc_new(Qnil, Qnil);
update_char_offset(match);
return rb_assoc_new(LONG2NUM(RMATCH(match)->rmatch->char_offset[i].beg),
LONG2NUM(RMATCH(match)->rmatch->char_offset[i].end));
}
Returns a 2-element array containing the beginning and ending offsets (in characters) of the specified match.
When non-negative integer argument n
is given, returns the starting and ending offsets of the n
th match:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.offset(0) # => [1, 7] m[3] # => "113" m.offset(3) # => [3, 6] m = /(т)(е)(с)/.match('тест') # => #<MatchData "тес" 1:"т" 2:"е" 3:"с"> m[0] # => "тес" m.offset(0) # => [0, 3] m[3] # => "с" m.offset(3) # => [2, 3]
When string or symbol argument name
is given, returns the starting and ending offsets for the named match:
m = /(?<foo>.)(.)(?<bar>.)/.match("hoge") # => #<MatchData "hog" foo:"h" bar:"g"> m[:foo] # => "h" m.offset('foo') # => [0, 1] m[:bar] # => "g" m.offset(:bar) # => [2, 3]
Related: MatchData#byteoffset
, MatchData#begin
, MatchData#end
.
VALUE
rb_reg_match_post(VALUE match)
{
VALUE str;
long pos;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = RMATCH(match)->str;
pos = END(0);
str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
return str;
}
Returns the substring of the target string from the end of the first match in self
(that is, self[0]
) to the end of the string; equivalent to regexp global variable $'
:
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.post_match # => ": The Movie"\
Related: MatchData.pre_match
.
VALUE
rb_reg_match_pre(VALUE match)
{
VALUE str;
struct re_registers *regs;
if (NIL_P(match)) return Qnil;
match_check(match);
regs = RMATCH_REGS(match);
if (BEG(0) == -1) return Qnil;
str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
return str;
}
Returns the substring of the target string from its beginning up to the first match in self
(that is, self[0]
); equivalent to regexp global variable $`
:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m[0] # => "HX1138" m.pre_match # => "T"
Related: MatchData#post_match
.
static VALUE
match_regexp(VALUE match)
{
VALUE regexp;
match_check(match);
regexp = RMATCH(match)->regexp;
if (NIL_P(regexp)) {
VALUE str = rb_reg_nth_match(0, match);
regexp = rb_reg_regcomp(rb_reg_quote(str));
RMATCH(match)->regexp = regexp;
}
return regexp;
}
Returns the regexp that produced the match:
m = /a.*b/.match("abc") # => #<MatchData "ab"> m.regexp # => /a.*b/
static VALUE
match_size(VALUE match)
{
match_check(match);
return INT2FIX(RMATCH_REGS(match)->num_regs);
}
Returns size of the match array:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.size # => 5
MatchData#length
is an alias for MatchData.size
.
static VALUE
match_string(VALUE match)
{
match_check(match);
return RMATCH(match)->str; /* str is frozen */
}
Returns the target string if it was frozen; otherwise, returns a frozen copy of the target string:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.string # => "THX1138."
static VALUE
match_to_a(VALUE match)
{
return match_array(match, 0);
}
Returns the array of matches:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.to_a # => ["HX1138", "H", "X", "113", "8"]
Related: MatchData#captures
.
static VALUE
match_to_s(VALUE match)
{
VALUE str = rb_reg_last_match(match_check(match));
if (NIL_P(str)) str = rb_str_new(0,0);
return str;
}
Returns the matched string:
m = /(.)(.)(\d+)(\d)/.match("THX1138.") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.to_s # => "HX1138" m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge") # => #<MatchData "hoge" foo:"h" bar:"ge"> m.to_s # => "hoge"
Related: MatchData.inspect
.
static VALUE
match_values_at(int argc, VALUE *argv, VALUE match)
{
VALUE result;
int i;
match_check(match);
result = rb_ary_new2(argc);
for (i=0; i<argc; i++) {
if (FIXNUM_P(argv[i])) {
rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
}
else {
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
if (num >= 0) {
rb_ary_push(result, rb_reg_nth_match(num, match));
}
else {
match_ary_aref(match, argv[i], result);
}
}
}
return result;
}
Returns match and captures at the given indexes
, which may include any mixture of:
-
Integers.
-
Ranges.
-
Names (strings and symbols).
Examples:
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8"> m.values_at(0, 2, -2) # => ["HX1138", "X", "113"] m.values_at(1..2, -1) # => ["H", "X", "8"] m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2") # => #<MatchData "1 + 2" a:"1" op:"+" b:"2"> m.values_at(0, 1..2, :a, :b, :op) # => ["1 + 2", "1", "+", "1", "2", "+"]