URI
is a module providing classes to handle Uniform Resource Identifiers (RFC2396).
Features
-
Uniform way of handling URIs.
-
Flexibility to introduce custom
URI
schemes. -
Flexibility to have an alternate
URI::Parser
(or just different patterns and regexp’s).
Basic example
require 'uri' uri = URI("http://foo.com/posts?id=30&limit=5#time=1305298413") #=> #<URI::HTTP http://foo.com/posts?id=30&limit=5#time=1305298413> uri.scheme #=> "http" uri.host #=> "foo.com" uri.path #=> "/posts" uri.query #=> "id=30&limit=5" uri.fragment #=> "time=1305298413" uri.to_s #=> "http://foo.com/posts?id=30&limit=5#time=1305298413"
Adding custom URIs
module URI class RSYNC < Generic DEFAULT_PORT = 873 end register_scheme 'RSYNC', RSYNC end #=> URI::RSYNC URI.scheme_list #=> {"FILE"=>URI::File, "FTP"=>URI::FTP, "HTTP"=>URI::HTTP, # "HTTPS"=>URI::HTTPS, "LDAP"=>URI::LDAP, "LDAPS"=>URI::LDAPS, # "MAILTO"=>URI::MailTo, "RSYNC"=>URI::RSYNC} uri = URI("rsync://rsync.foo.com") #=> #<URI::RSYNC rsync://rsync.foo.com>
RFC References
A good place to view an RFC spec is www.ietf.org/rfc.html.
Here is a list of all related RFC’s:
Class
tree
-
URI::Generic
(in uri/generic.rb)-
URI::File
- (in uri/file.rb) -
URI::FTP
- (in uri/ftp.rb) -
URI::HTTP
- (in uri/http.rb)-
URI::HTTPS
- (in uri/https.rb)
-
-
URI::LDAP
- (in uri/ldap.rb)-
URI::LDAPS
- (in uri/ldaps.rb)
-
-
URI::MailTo
- (in uri/mailto.rb)
-
-
URI::Parser
- (in uri/common.rb) -
URI::REGEXP
- (in uri/common.rb)-
URI::REGEXP::PATTERN - (in uri/common.rb)
-
-
URI::Util - (in uri/common.rb)
-
URI::Error
- (in uri/common.rb)-
URI::InvalidURIError
- (in uri/common.rb) -
URI::InvalidComponentError
- (in uri/common.rb) -
URI::BadURIError
- (in uri/common.rb)
-
Copyright Info
- Author
-
Akira Yamada <akira@ruby-lang.org>
- Documentation
-
Akira Yamada <akira@ruby-lang.org> Dmitry V. Sabanin <sdmitry@lrn.ru> Vincent Batts <vbatts@hashbangbash.com>
- License
-
Copyright © 2001 akira yamada <akira@ruby-lang.org> You can redistribute it and/or modify it under the same term as Ruby.
curl https://encoding.spec.whatwg.org/encodings.json| ruby -rjson -e 'H={} h={ "shift_jis"=>"Windows-31J", "euc-jp"=>"cp51932", "iso-2022-jp"=>"cp50221", "x-mac-cyrillic"=>"macCyrillic", } JSON($<.read).map{|x|x["encodings"]}.flatten.each{|x| Encoding.find(n=h.fetch(n=x["name"].downcase,n))rescue next x["labels"].each{|y|H[y]=n} } puts "{" H.each{|k,v|puts %[ #{k.dump}=>#{v.dump},]} puts "}"
‘
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 408
def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
ary = []
return ary if str.empty?
enc = Encoding.find(enc)
str.b.each_line(separator) do |string|
string.chomp!(separator)
key, sep, val = string.partition('=')
if isindex
if sep.empty?
val = key
key = +''
end
isindex = false
end
if use__charset_ and key == '_charset_' and e = get_encoding(val)
enc = e
use__charset_ = false
end
key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
if val
val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
else
val = +''
end
ary << [key, val]
end
ary.each do |k, v|
k.force_encoding(enc)
k.scrub!
v.force_encoding(enc)
v.scrub!
end
ary
end
Decodes URL-encoded form data from given str
.
This decodes application/x-www-form-urlencoded data and returns an array of key-value arrays.
This refers url.spec.whatwg.org/#concept-urlencoded-parser, so this supports only &-separator, and doesn’t support ;-separator.
ary = URI.decode_www_form("a=1&a=2&b=3") ary #=> [['a', '1'], ['a', '2'], ['b', '3']] ary.assoc('a').last #=> '1' ary.assoc('b').last #=> '3' ary.rassoc('a').last #=> '2' Hash[ary] #=> {"a"=>"2", "b"=>"3"}
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 340
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
end
Decodes given str
of URL-encoded form data.
This decodes + to SP.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 372
def self.encode_www_form(enum, enc=nil)
enum.map do |k,v|
if v.nil?
encode_www_form_component(k, enc)
elsif v.respond_to?(:to_ary)
v.to_ary.map do |w|
str = encode_www_form_component(k, enc)
unless w.nil?
str << '='
str << encode_www_form_component(w, enc)
end
end.join('&')
else
str = encode_www_form_component(k, enc)
str << '='
str << encode_www_form_component(v, enc)
end
end.join('&')
end
Generates URL-encoded form data from given enum
.
This generates application/x-www-form-urlencoded data defined in HTML5 from given an Enumerable
object.
This internally uses URI.encode_www_form_component(str)
.
This method doesn’t convert the encoding of given items, so convert them before calling this method if you want to send data as other than original encoding or mixed encoding data. (Strings which are encoded in an HTML5 ASCII incompatible encoding are converted to UTF-8.)
This method doesn’t handle files. When you send a file, use multipart/form-data.
This refers url.spec.whatwg.org/#concept-urlencoded-serializer
URI.encode_www_form([["q", "ruby"], ["lang", "en"]]) #=> "q=ruby&lang=en" URI.encode_www_form("q" => "ruby", "lang" => "en") #=> "q=ruby&lang=en" URI.encode_www_form("q" => ["ruby", "perl"], "lang" => "en") #=> "q=ruby&q=perl&lang=en" URI.encode_www_form([["q", "ruby"], ["q", "perl"], ["lang", "en"]]) #=> "q=ruby&q=perl&lang=en"
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 322
def self.encode_www_form_component(str, enc=nil)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
str.encode!(Encoding::UTF_8, invalid: :replace, undef: :replace)
str.encode!(enc, fallback: ->(x){"&##{x.ord};"})
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.force_encoding(Encoding::US_ASCII)
end
Encodes given str
to URL-encoded form data.
This method doesn’t convert *, -, ., 0-9, A-Z, _, a-z, but does convert SP (ASCII space) to + and converts others to %XX.
If enc
is given, convert str
to the encoding before percent encoding.
This is an implementation of www.w3.org/TR/2013/CR-html5-20130806/forms.html#url-encoded-form-data.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 252
def self.extract(str, schemes = nil, &block)
warn "URI.extract is obsolete", uplevel: 1 if $VERBOSE
DEFAULT_PARSER.extract(str, schemes, &block)
end
Synopsis
URI::extract(str[, schemes][,&blk])
Args
Description
Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.
Usage
require "uri" URI.extract("text here http://foo.example.org/bla and here mailto:test@example.com and here also.") # => ["http://foo.example.com/bla", "mailto:test@example.com"]
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 90
def self.for(scheme, *arguments, default: Generic)
const_name = scheme.to_s.upcase
uri_class = INITIAL_SCHEMES[const_name]
uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false)
Schemes.const_get(const_name, false)
end
uri_class ||= default
return uri_class.new(scheme, *arguments)
end
Construct a URI
instance, using the scheme to detect the appropriate class from URI.scheme_list
.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 224
def self.join(*str)
RFC3986_PARSER.join(*str)
end
Synopsis
URI::join(str[, str, ...])
Args
str
-
String(s) to work with, will be converted to RFC3986 URIs before merging.
Description
Joins URIs.
Usage
require 'uri' URI.join("http://example.com/","main.rbx") # => #<URI::HTTP http://example.com/main.rbx> URI.join('http://example.com', 'foo') # => #<URI::HTTP http://example.com/foo> URI.join('http://example.com', '/foo', '/bar') # => #<URI::HTTP http://example.com/bar> URI.join('http://example.com', '/foo', 'bar') # => #<URI::HTTP http://example.com/bar> URI.join('http://example.com', '/foo/', 'bar') # => #<URI::HTTP http://example.com/foo/bar>
# File tmp/rubies/ruby-3.1.3/lib/open-uri.rb, line 23
def self.open(name, *rest, &block)
if name.respond_to?(:open)
name.open(*rest, &block)
elsif name.respond_to?(:to_str) &&
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
(uri = URI.parse(name)).respond_to?(:open)
uri.open(*rest, &block)
else
super
end
end
Allows the opening of various resources including URIs.
If the first argument responds to the ‘open’ method, ‘open’ is called on it with the rest of the arguments.
If the first argument is a string that begins with <code>(protocol)://<code>, it is parsed by URI.parse
. If the parsed object responds to the ‘open’ method, ‘open’ is called on it with the rest of the arguments.
Otherwise, Kernel#open
is called.
OpenURI::OpenRead#open
provides URI::HTTP#open
, URI::HTTPS#open
and URI::FTP#open
, Kernel#open
.
We can accept URIs and strings that begin with http://, https:// and ftp://. In these cases, the opened file object is extended by OpenURI::Meta
.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 187
def self.parse(uri)
RFC3986_PARSER.parse(uri)
end
Synopsis
URI::parse(uri_str)
Args
Description
Creates one of the URI’s subclasses instance from the string.
Raises
URI::InvalidURIError
-
Raised if
URI
given is not a correct one.
Usage
require 'uri' uri = URI.parse("http://www.ruby-lang.org/") # => #<URI::HTTP http://www.ruby-lang.org/> uri.scheme # => "http" uri.host # => "www.ruby-lang.org"
It’s recommended to first ::escape the provided uri_str
if there are any invalid URI
characters.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 289
def self.regexp(schemes = nil)
warn "URI.regexp is obsolete", uplevel: 1 if $VERBOSE
DEFAULT_PARSER.make_regexp(schemes)
end
Synopsis
URI::regexp([match_schemes])
Args
match_schemes
-
Array
of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.
Description
Returns a Regexp
object which matches to URI-like strings. The Regexp
object returned by this method includes arbitrary number of capture group (parentheses). Never rely on its number.
Usage
require 'uri' # extract first URI from html_string html_string.slice(URI.regexp) # remove ftp URIs html_string.sub(URI.regexp(['ftp']), '') # You should not rely on the number of parentheses html_string.scan(URI.regexp) do |*matches| p $& end
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 71
def self.register_scheme(scheme, klass)
Schemes.const_set(scheme, klass)
end
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 76
def self.scheme_list
Schemes.constants.map { |name|
[name.to_s.upcase, Schemes.const_get(name)]
}.to_h
end
Returns a Hash
of the defined schemes.
# File tmp/rubies/ruby-3.1.3/lib/uri/common.rb, line 150
def self.split(uri)
RFC3986_PARSER.split(uri)
end
Synopsis
URI::split(uri)
Args
Description
Splits the string on following parts and returns array with result:
-
Scheme
-
Userinfo
-
Host
-
Port
-
Registry
-
Path
-
Opaque
-
Query
-
Fragment
Usage
require 'uri' URI.split("http://www.ruby-lang.org/") # => ["http", nil, "www.ruby-lang.org", nil, nil, "/", nil, nil, nil]