URI
is a module providing classes to handle Uniform Resource Identifiers (RFC2396)
Uniform handling of handling URIs
Flexibility to introduce custom URI
schemes
Flexibility to have an alternate URI::Parser
(or just different patterns and regexp’s)
require 'uri' uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413") #=> #<URI::HTTP:0x00000000b14880 URL:http://foo.com/posts?id=30&limit=5#time=1305298413> uri.scheme #=> "http" uri.host #=> "foo.com" uri.path #=> "/posts" uri.query #=> "id=30&limit=5" uri.fragment #=> "time=1305298413" uri.to_s #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"
module URI class RSYNC < Generic DEFAULT_PORT = 873 end @@schemes['RSYNC'] = RSYNC end #=> URI::RSYNC URI.scheme_list #=> {"FTP"=>URI::FTP, "HTTP"=>URI::HTTP, "HTTPS"=>URI::HTTPS, "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS, "MAILTO"=>URI::MailTo, "RSYNC"=>URI::RSYNC} uri = URI("rsync://rsync.foo.com") #=> #<URI::RSYNC:0x00000000f648c8 URL:rsync://rsync.foo.com>
A good place to view an RFC spec is www.ietf.org/rfc.html
Here is a list of all related RFC’s.
Class
tree URI::Generic
(in uri/generic.rb)
URI::FTP
- (in uri/ftp.rb)
URI::HTTP
- (in uri/http.rb)
URI::HTTPS
- (in uri/https.rb)
URI::LDAP
- (in uri/ldap.rb)
URI::LDAPS
- (in uri/ldaps.rb)
URI::MailTo
- (in uri/mailto.rb)
URI::Parser
- (in uri/common.rb)
URI::REGEXP
- (in uri/common.rb)
URI::REGEXP::PATTERN - (in uri/common.rb)
URI::Util - (in uri/common.rb)
URI::Escape
- (in uri/common.rb)
URI::Error
- (in uri/common.rb)
URI::InvalidURIError
- (in uri/common.rb)
URI::InvalidComponentError
- (in uri/common.rb)
URI::BadURIError
- (in uri/common.rb)
Akira Yamada <akira@ruby-lang.org>
Akira Yamada <akira@ruby-lang.org> Dmitry V. Sabanin <sdmitry@lrn.ru> Vincent Batts <vbatts@hashbangbash.com>
Copyright © 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.
$Id: uri.rb 53141 2015-12-16 05:07:31Z naruse $
curl https://encoding.spec.whatwg.org/encodings.json| ruby -rjson -e 'H={} h={ "shift_jis"=>"Windows-31J", "euc-jp"=>"cp51932", "iso-2022-jp"=>"cp50221", "x-mac-cyrillic"=>"macCyrillic", } JSON($<.read).map{|x|x["encodings"]}.flatten.each{|x| Encoding.find(n=h.fetch(n=x["name"].downcase,n))rescue next x["labels"].each{|y|H[y]=n} } puts "{" H.each{|k,v|puts %[ #{k.dump}=>#{v.dump},]} puts "}"
‘
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 460
def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
ary = []
return ary if str.empty?
enc = Encoding.find(enc)
str.b.each_line(separator) do |string|
string.chomp!(separator)
key, sep, val = string.partition('=')
if isindex
if sep.empty?
val = key
key = ''
end
isindex = false
end
if use__charset_ and key == '_charset_' and e = get_encoding(val)
enc = e
use__charset_ = false
end
key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
if val
val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
else
val = ''
end
ary << [key, val]
end
ary.each do |k, v|
k.force_encoding(enc)
k.scrub!
v.force_encoding(enc)
v.scrub!
end
ary
end
Decode URL-encoded form data from given str
.
This decodes application/x-www-form-urlencoded data and returns array of key-value array.
This refers url.spec.whatwg.org/#concept-urlencoded-parser , so this supports only &-separator, don’t support ;-separator.
ary = URI.decode_www_form("a=1&a=2&b=3") p ary #=> [['a', '1'], ['a', '2'], ['b', '3']] p ary.assoc('a').last #=> '1' p ary.assoc('b').last #=> '3' p ary.rassoc('a').last #=> '2' p Hash[ary] # => {"a"=>"2", "b"=>"3"}
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 392
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/ =~ str
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
end
Decode given str
of URL-encoded form data.
This decodes + to SP.
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 424
def self.encode_www_form(enum, enc=nil)
enum.map do |k,v|
if v.nil?
encode_www_form_component(k, enc)
elsif v.respond_to?(:to_ary)
v.to_ary.map do |w|
str = encode_www_form_component(k, enc)
unless w.nil?
str << '='
str << encode_www_form_component(w, enc)
end
end.join('&')
else
str = encode_www_form_component(k, enc)
str << '='
str << encode_www_form_component(v, enc)
end
end.join('&')
end
Generate URL-encoded form data from given enum
.
This generates application/x-www-form-urlencoded data defined in HTML5 from given an Enumerable
object.
This internally uses URI.encode_www_form_component(str)
.
This method doesn’t convert the encoding of given items, so convert them before call this method if you want to send data as other than original encoding or mixed encoding data. (Strings which are encoded in an HTML5 ASCII incompatible encoding are converted to UTF-8.)
This method doesn’t handle files. When you send a file, use multipart/form-data.
This refers url.spec.whatwg.org/#concept-urlencoded-serializer
URI.encode_www_form([["q", "ruby"], ["lang", "en"]]) #=> "q=ruby&lang=en" URI.encode_www_form("q" => "ruby", "lang" => "en") #=> "q=ruby&lang=en" URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en") #=> "q=ruby&q=perl&lang=en" URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]]) #=> "q=ruby&q=perl&lang=en"
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 374
def self.encode_www_form_component(str, enc=nil)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.encode!(enc, fallback: ->(x){"&#{x.ord};"})
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.force_encoding(Encoding::US_ASCII)
end
Encode given str
to URL-encoded form data.
This method doesn’t convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP (ASCII space) to + and converts others to %XX.
If enc
is given, convert str
to the encoding before percent encoding.
This is an implementation of www.w3.org/TR/2013/CR-html5-20130806/forms.html#url-encoded-form-data
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 302
def self.extract(str, schemes = nil, &block)
warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE
DEFAULT_PARSER.extract(str, schemes, &block)
end
URI::extract(str[, schemes][,&blk])
str
String to extract URIs from.
schemes
Limit URI
matching to a specific schemes.
Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.
require "uri" URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.") # => ["http://foo.example.com/bla", "mailto:test@example.com"]
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 274
def self.join(*str)
RFC3986_PARSER.join(*str)
end
URI::join(str[, str, ...])
str
String(s) to work with, will be converted to RFC3986 URIs before merging.
Joins URIs.
require 'uri' p URI.join("http://example.com/","main.rbx") # => #<URI::HTTP:0x2022ac02 URL:http://example.com/main.rbx> p URI.join('http://example.com', 'foo') # => #<URI::HTTP:0x01ab80a0 URL:http://example.com/foo> p URI.join('http://example.com', '/foo', '/bar') # => #<URI::HTTP:0x01aaf0b0 URL:http://example.com/bar> p URI.join('http://example.com', '/foo', 'bar') # => #<URI::HTTP:0x801a92af0 URL:http://example.com/bar> p URI.join('http://example.com', '/foo/', 'bar') # => #<URI::HTTP:0x80135a3a0 URL:http://example.com/foo/bar>
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 236
def self.parse(uri)
RFC3986_PARSER.parse(uri)
end
URI::parse(uri_str)
uri_str
String with URI
.
Creates one of the URI’s subclasses instance from the string.
Raised if URI given is not a correct one.
require 'uri' uri = URI.parse("http://www.ruby-lang.org/") p uri # => #<URI::HTTP:0x202281be URL:http://www.ruby-lang.org/> p uri.scheme # => "http" p uri.host # => "www.ruby-lang.org"
It’s recommended to first ::escape the provided uri_str
if there are any invalid URI
characters.
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 338
def self.regexp(schemes = nil)
warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE
DEFAULT_PARSER.make_regexp(schemes)
end
URI::regexp([match_schemes])
match_schemes
Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.
Returns a Regexp
object which matches to URI-like strings. The Regexp
object returned by this method includes arbitrary number of capture group (parentheses). Never rely on it’s number.
require 'uri' # extract first URI from html_string html_string.slice(URI.regexp) # remove ftp URIs html_string.sub(URI.regexp(['ftp']) # You should not rely on the number of parentheses html_string.scan(URI.regexp) do |*matches| p $& end
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 146
def self.scheme_list
@@schemes
end
Returns a Hash
of the defined schemes
# File tmp/rubies/ruby-2.5.9/lib/uri/common.rb, line 198
def self.split(uri)
RFC3986_PARSER.split(uri)
end
URI::split(uri)
uri
String with URI
.
Splits the string on following parts and returns array with result:
* Scheme * Userinfo * Host * Port * Registry * Path * Opaque * Query * Fragment
require 'uri' p URI.split("http://www.ruby-lang.org/") # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]