CSV::InputsScanner receives IO
inputs, encoding and the chunk_size. It also controls the life cycle of the object with its methods keep_start
, keep_end
, keep_back
, keep_drop
.
CSV::InputsScanner.scan() tries to match with pattern at the current position. If there’s a match, the scanner advances the “scan pointer” and returns the matched string. Otherwise, the scanner returns nil.
CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). If there is no more data (eos? = true), it returns “”.
Class Methods
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 87
def initialize(inputs, encoding, chunk_size: 8192)
@inputs = inputs.dup
@encoding = encoding
@chunk_size = chunk_size
@last_scanner = @inputs.empty?
@keeps = []
read_chunk
end
No documentation available
Instance Methods
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 96
def each_line(row_separator)
buffer = nil
input = @scanner.rest
position = @scanner.pos
offset = 0
n_row_separator_chars = row_separator.size
while true
input.each_line(row_separator) do |line|
@scanner.pos += line.bytesize
if buffer
if n_row_separator_chars == 2 and
buffer.end_with?(row_separator[0]) and
line.start_with?(row_separator[1])
buffer << line[0]
line = line[1..-1]
position += buffer.bytesize + offset
@scanner.pos = position
offset = 0
yield(buffer)
buffer = nil
next if line.empty?
else
buffer << line
line = buffer
buffer = nil
end
end
if line.end_with?(row_separator)
position += line.bytesize + offset
@scanner.pos = position
offset = 0
yield(line)
else
buffer = line
end
end
break unless read_chunk
input = @scanner.rest
position = @scanner.pos
offset = -buffer.bytesize if buffer
end
yield(buffer) if buffer
end
No documentation available
#
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 163
def eos?
@scanner.eos?
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 181
def keep_back
start, buffer = @keeps.pop
if buffer
string = @scanner.string
keep = string.byteslice(start, string.bytesize - start)
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
end
@scanner = StringScanner.new(buffer)
else
@scanner.pos = start
end
read_chunk if @scanner.eos?
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 197
def keep_drop
@keeps.pop
end
No documentation available
#
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 171
def keep_end
start, buffer = @keeps.pop
keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer
buffer << keep
keep = buffer
end
keep
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 167
def keep_start
@keeps.push([@scanner.pos, nil])
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 206
def read_chunk
return false if @last_scanner
unless @keeps.empty?
keep = @keeps.last
keep_start = keep[0]
string = @scanner.string
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
if keep_data
keep_buffer = keep[1]
if keep_buffer
keep_buffer << keep_data
else
keep[1] = keep_data.dup
end
end
keep[0] = 0
end
input = @inputs.first
case input
when StringIO
string = input.read
raise InvalidEncoding unless string.valid_encoding?
@scanner = StringScanner.new(string)
@inputs.shift
@last_scanner = @inputs.empty?
true
else
chunk = input.gets(nil, @chunk_size)
if chunk
raise InvalidEncoding unless chunk.valid_encoding?
@scanner = StringScanner.new(chunk)
if input.respond_to?(:eof?) and input.eof?
@inputs.shift
@last_scanner = @inputs.empty?
end
true
else
@scanner = StringScanner.new("".encode(@encoding))
@inputs.shift
@last_scanner = @inputs.empty?
if @last_scanner
false
else
read_chunk
end
end
end
end
No documentation available
#
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 201
def rest
@scanner.rest
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 140
def scan(pattern)
value = @scanner.scan(pattern)
return value if @last_scanner
if value
read_chunk if @scanner.eos?
return value
else
nil
end
end
No documentation available
lib/csv/parser.rb
View on GitHub
# File tmp/rubies/ruby-2.7.6/lib/csv/parser.rb, line 152
def scan_all(pattern)
value = @scanner.scan(pattern)
return value if @last_scanner
return nil if value.nil?
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
value << sub_value
end
value
end
No documentation available