csv.rb   [plain text]


# CSV -- module for generating/parsing CSV data.
  
# $Id: csv.rb,v 1.1.1.1 2003/10/15 10:11:48 melville Exp $
  
# This module is copyrighted free software by NAKAMURA, Hiroshi.
# You can redistribute it and/or modify it under the same term as Ruby.
  
  
class CSV
public

  # DESCRIPTION
  #   CSV::Cell -- Describes 1 cell of CSV.
  #   
  class Cell
  public
  
    # Datum as string.
    attr_accessor :data
    
    # Is this datum null?
    attr_accessor :is_null

    # SYNOPSIS
    #   cell = CSV::Cell.new(data = '', is_null = true)
    #
    # ARGS
    #   data: datum as String
    #   is_null: is this datum null?
    #
    # RETURNS
    #   cell: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  If is_null is true, datum is stored in the instance
    #   created but it should be treated as 'NULL'.
    #   
    def initialize(data = '', is_null = true)
      @data = data
      @is_null = is_null
    end

    # SYNOPSIS
    #   CSV::Cell#match(rhs)
    #
    # ARGS
    #   rhs: an instance of CSV::Cell to be compared.
    #
    # RETURNS
    #   true/false.  See the souce if you want to know matching algorithm.
    #
    # DESCRIPTION
    #   Compare another cell with me.  Bare in mind Null matches with Null
    #   using this method.  Use CSV::Cell#== if you want Null never matches
    #   with other data including Null.
    #
    def match(rhs)
      if @is_null and rhs.is_null
        true
      elsif @is_null or rhs.is_null
        false
      else
        @data == rhs.data
      end
    end

    # SYNOPSIS
    #   CSV::Cell#==(rhs)
    #
    # ARGS
    #   rhs: an instance of CSV::Cell to be compared.
    #
    # RETURNS
    #   true/false.  See the souce if you want to know matching algorithm.
    #
    # DESCRIPTION
    #   Compare another cell with me.  Bare in mind Null is not match with
    #   Null using this method.  Null never matches with other data including
    #   Null.  Use CSV::Cell#match if you want Null matches with Null.
    #
    def ==(rhs)
      if @is_null or rhs.is_null
        false
      else
        @data == rhs.data
      end
    end
  end
  

  # DESCRIPTION
  #   CSV::Row -- Describes a row of CSV.  Each element must be a CSV::Cell.
  #   
  class Row < Array
  public

    # SYNOPSIS
    #   CSV::Row#to_a
    #
    # RETURNS
    #   An Array of String.
    #
    # DESCRIPTION
    #   Convert CSV::Cell to String.  Null is converted to nil.
    #
    def to_a
      self.collect { |cell| cell.is_null ? nil : cell.data }
    end

    # SYNOPSIS
    #   CSV::Row#match(rhs)
    #
    # ARGS
    #   rhs: an Array of cells.  Each cell is a instance of CSV::Cell.
    #
    # RETURNS
    #   true/false.  See the souce if you want to know matching algorithm.
    #
    # DESCRIPTION
    #   Compare another row with me.
    #
    def match(rhs)
      if self.size != rhs.size
        return false
      end
      for idx in 0...(self.size)
        unless self[idx].match(rhs[idx])
          return false
        end
      end
      true
    end
  end


  # SYNOPSIS
  #   1. reader = CSV.open(filename, 'r')
  #
  #   2. CSV.open(filename, 'r') do |row|
  #        ...
  #      end
  #
  #   3. writer = CSV.open(filename, 'w')
  #
  #   4. CSV.open(filename, 'w') do |writer|
  #        ...
  #      end
  #
  # ARGS
  #   filename: filename to open.
  #   mode: 'r' for read (parse)
  #         'w' for write (generate)
  #   row: an Array of cells which is a parsed line.
  #   writer: Created writer instance.  See CSV::Writer#<< and
  #     CSV::Writer#add_row to know how to generate CSV string.
  #
  # RETURNS
  #   reader: Create reader instance.  To get parse result, see
  #     CSV::Reader#each.
  #   writer: Created writer instance.  See CSV::Writer#<< and
  #     CSV::Writer#add_row to know how to generate CSV string.
  #
  # DESCRIPTION
  #   Open a CSV formatted file to read or write.
  #
  # EXAMPLE 1
  #   reader = CSV.open('csvfile.csv', 'r')
  #   row1 = reader.shift
  #   row2 = reader.shift
  #   if row2.empty?
  #     p 'row2 not find.'
  #   end
  #   reader.close
  #   
  # EXAMPLE 2
  #   CSV.open('csvfile.csv', 'r') do |row|
  #     p row
  #   end
  #      
  # EXAMPLE 3
  #   writer = CSV.open('csvfile.csv', 'w')
  #   writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
  #   writer.close
  #   
  # EXAMPLE 4
  #   CSV.open('csvfile.csv', 'w') do |writer|
  #     writer << ['r1c1', 'r1c2']
  #     writer << ['r2c1', 'r2c2']
  #     writer << [nil, nil]
  #   end
  #
  def CSV.open(filename, mode, col_sep = ?,, &block)
    if mode == 'r' or mode == 'rb'
      open_reader(filename, col_sep, &block)
    elsif mode == 'w' or mode == 'wb'
      open_writer(filename, col_sep, &block)
    else
      raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
    end
  end

  def CSV.parse(filename, col_sep = ?,, &block)
    open_reader(filename, col_sep, &block)
  end

  def CSV.generate(filename, col_sep = ?,, &block)
    open_writer(filename, col_sep, &block)
  end

  # Private class methods.
  class << self
  private
    def open_reader(filename, col_sep, &block)
      file = File.open(filename, 'rb')
      if block
        begin
          CSV::Reader.parse(file, col_sep) do |row|
            yield(row)
          end
        ensure
          file.close
        end
        nil
      else
        reader = CSV::Reader.create(file, col_sep)
        reader.close_on_terminate
        reader
      end
    end

    def open_writer(filename, col_sep, &block)
      file = File.open(filename, 'wb')
      if block
        begin
          CSV::Writer.generate(file, col_sep) do |writer|
            yield(writer)
          end
        ensure
          file.close
        end
        nil
      else
        writer = CSV::Writer.create(file, col_sep)
        writer.close_on_terminate
        writer
      end
    end
  end


  # DESCRIPTION
  #   CSV::Reader -- CSV formatted string/stream reader.
  #   
  # EXAMPLE
  #   Read CSV lines untill the first column is 'stop'.
  #
  #   CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
  #     p row
  #     break if !row[0].is_null && row[0].data == 'stop'
  #   end
  #
  class Reader
    include Enumerable
  public

    # SYNOPSIS
    #   reader = CSV::Reader.create(str_or_readable)
    #
    # ARGS
    #   str_or_readable: a CSV data to be parsed.  A String or an IO.
    #
    # RETURNS
    #   reader: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  To get parse result, see CSV::Reader#each.
    #   
    def Reader.create(str_or_readable, col_sep = ?,)
      case str_or_readable
      when IO
        IOReader.new(str_or_readable, col_sep)
      when String
        StringReader.new(str_or_readable, col_sep)
      else
        IOReader.new(str_or_readable, col_sep)
      end
    end

    # SYNOPSIS
    #   CSV::Reader.parse(str_or_readable) do |row|
    #     ...
    #   end
    #
    # ARGS
    #   str_or_readable: a CSV data to be parsed.  A String or an IO.
    #   row: a CSV::Row; an Array of a CSV::Cell in a line.
    #
    # RETURNS
    #   nil
    #
    # DESCRIPTION
    #   Parse CSV data and get lines.  Caller block is called for each line
    #   with an argument which is a chunk of cells in a row.
    #
    #   Block value is always nil.  Rows are not cached for performance
    #   reason.
    #
    def Reader.parse(str_or_readable, col_sep = ?,)
      reader = create(str_or_readable, col_sep)
      reader.each do |row|
        yield(row)
      end
      reader.close
      nil
    end

    # SYNOPSIS
    #   CSV::Reader#each do |row|
    #     ...
    #   end
    #
    # ARGS
    #   row: a CSV::Row; an Array of a CSV::Cell in a line.
    #
    # RETURNS
    #   nil
    #
    # DESCRIPTION
    #   Caller block is called for each line with an argument which is a chunk
    #   of cells in a row.
    #
    #   Block value is always nil.  Rows are not cached for performance
    #   reason.
    #   
    def each
      while true
        row = Row.new
        parsed_cells = get_row(row)
        if parsed_cells == 0
          break
        end
        yield(row)
      end
      nil
    end

    # SYNOPSIS
    #   cell = CSV::Reader#shift
    #
    # RETURNS
    #   cell: a CSV::Row; an Array of a CSV::Cell.
    #
    # DESCRIPTION
    #   Extract cells of next line.
    #   
    def shift
      row = Row.new
      parsed_cells = get_row(row)
      row
    end

    # SYNOPSIS
    #   CSV::Reader#close
    #
    # RETURNS
    #   nil
    #
    # DESCRIPTION
    #   Close this reader.
    #   
    def close
      terminate
    end

  private
    def initialize(dev)
      raise RuntimeError.new('Do not instanciate this class directly.')
    end

    def get_row(row)
      raise NotImplementedError.new('Method get_row must be defined in a derived class.')
    end

    def terminate
      # Define if needed.
    end
  end
  

  # DESCRIPTION
  #   CSV::StringReader -- CSV formatted stream reader.
  #   
  # EXAMPLE
  #   Read CSV lines untill the first column is 'stop'.
  #
  #   CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
  #     p row
  #     break if !row[0].is_null && row[0].data == 'stop'
  #   end
  #
  class StringReader < Reader
  public

    # SYNOPSIS
    #   reader = CSV::StringReader.new(string)
    #
    # ARGS
    #   string: a CSV String to be parsed.
    #
    # RETURNS
    #   reader: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  To get parse result, see CSV::Reader#each.
    #   
    def initialize(string, col_sep = ?,)
      @col_sep = col_sep
      @dev = string
      @idx = 0
      if @dev[0, 3] == "\xef\xbb\xbf"
        @idx += 3
      end
    end

  private
    def get_row(row)
      parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
      if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size
        raise IllegalFormatError.new
      end
      @idx = next_idx
      parsed_cells
    end
  end


  # DESCRIPTION
  #   CSV::IOReader -- CSV formatted stream reader.
  #   
  # EXAMPLE
  #   Read CSV lines untill the first column is 'stop'.
  #
  #   CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
  #     p row
  #     break if !row[0].is_null && row[0].data == 'stop'
  #   end
  #
  class IOReader < Reader
  public

    # SYNOPSIS
    #   reader = CSV::IOReader.new(io)
    #
    # ARGS
    #   io: a CSV data to be parsed.  Must be an IO. (io#read is called.)
    #
    # RETURNS
    #   reader: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  To get parse result, see CSV::Reader#each.
    #   
    def initialize(io, col_sep = ?,)
      @io = io
      @col_sep = col_sep
      @dev = CSV::IOBuf.new(@io)
      @idx = 0
      if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
        @idx += 3
      end
      @close_on_terminate = false
    end

    # SYNOPSIS
    #   CSV::IOReader#close_on_terminate
    #
    # RETURNS
    #   true
    #
    # DESCRIPTION
    #   Tell this reader to close the IO when terminated (Triggered by invoking
    #   CSV::IOReader#close).
    #   
    def close_on_terminate
      @close_on_terminate = true
    end

  private
    def get_row(row)
      parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep)
      if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos?
        raise IllegalFormatError.new
      end
      dropped = @dev.drop(next_idx)
      @idx = next_idx - dropped
      parsed_cells
    end

    def terminate
      if @close_on_terminate
        @io.close
      end

      if @dev
        @dev.close
      end
    end
  end


  # DESCRIPTION
  #   CSV::Writer -- CSV formatted string/stream writer.
  #   
  # EXAMPLE
  #   Write rows to 'csvout' file.
  #
  #   outfile = File.open('csvout', 'wb')
  #   CSV::Writer.generate(outfile) do |csv|
  #     csv << ['c1', nil, '', '"', "\r\n", 'c2']
  #     # or
  #     csv.add_row [
  #       CSV::Cell.new('c1', false),
  #       CSV::Cell.new('dummy', true),
  #       CSV::Cell.new('', false),
  #       CSV::Cell.new('"', false),
  #       CSV::Cell.new("\r\n", false)
  #       CSV::Cell.new('c2', false)
  #     ]
  #     ...
  #     ...
  #   end
  #
  #   outfile.close
  #
  class Writer
  public

    # SYNOPSIS
    #   writer = CSV::Writer.create(str_or_readable)
    #
    # ARGS
    #   str_or_writable: device for generated CSV string.  Must respond to
    #     '<<(string)'.
    #
    # RETURNS
    #   writer: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  To add CSV data to generate CSV string, see
    #   CSV::Writer#<< or CSV::Writer#add_row.
    #   
    def Writer.create(str_or_readable, col_sep = ?,)
      BasicWriter.new(str_or_readable, col_sep)
    end

    # SYNOPSIS
    #   CSV::Writer.generate(str_or_writable) do |writer|
    #     ...
    #   end
    #
    # ARGS
    #   str_or_writable: device for generated CSV string.  Must respond to
    #     '<<(string)'.
    #   writer: Created writer instance.  See CSV::Writer#<< and
    #     CSV::Writer#add_row to know how to generate CSV string.
    #
    # RETURNS
    #   nil
    #
    # DESCRIPTION
    #   Create writer instance.  Caller block is called with the new instance.
    #   To add CSV data to generate CSV string, see CSV::Writer#<< or
    #   CSV::Writer#add_row.
    #   
    def Writer.generate(str_or_writable, col_sep = ?,)
      writer = Writer.create(str_or_writable, col_sep)
      yield(writer)
      writer.close
      nil
    end

    # SYNOPSIS
    #   CSV::Writer#<<(row)
    #
    # ARGS
    #   row: an Array of a String.
    #
    # RETURNS
    #   self
    #
    # DESCRIPTION
    #   Dump CSV stream to the device.  Argument is an array of a String like
    #   ['c1', 'c2', 'c3'].
    #   
    def <<(ary)
      row = ary.collect { |item|
        if item.is_a?(Cell)
          item
        elsif (item.nil?)
          Cell.new('', true)
        else
          Cell.new(item.to_s, false)
        end
      }
      CSV.generate_row(row, row.size, @dev, @col_sep)
      self
    end

    # SYNOPSIS
    #   CSV::Writer#<<(row)
    #
    # ARGS
    #   row: an Array of a CSV::Cell.
    #
    # RETURNS
    #   self
    #
    # DESCRIPTION
    #   Dump CSV stream to the device.  Argument is an array of a CSV::Cell
    #   like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)].
    #   (Formar is 'c1' and latter is Null.)
    #   
    def add_row(row)
      CSV.generate_row(row, row.size, @dev, @col_sep)
      self
    end

    # SYNOPSIS
    #   CSV::Writer#close
    #
    # RETURNS
    #   nil
    #
    # DESCRIPTION
    #   Close this writer.
    #   
    def close
      terminate
    end

  private
    def initialize(dev)
      raise RuntimeError.new('Do not instanciate this class directly.')
    end

    def terminate
      # Define if needed.
    end
  end


  # DESCRIPTION
  #   CSV::BasicWriter -- CSV formatted string/stream writer using <<.
  #   
  class BasicWriter < Writer
  public

    # SYNOPSIS
    #   writer = CSV::BasicWriter.new(str_or_writable)
    #
    # ARGS
    #   str_or_writable: device for generated CSV string.  Must respond to
    #     '<<(string)'.
    #
    # RETURNS
    #   writer: Created instance.
    #
    # DESCRIPTION
    #   Create instance.  To add CSV data to generate CSV string, see
    #   CSV::Writer#<< or CSV::Writer#add_row.
    #   
    def initialize(str_or_writable, col_sep = ?,)
      @col_sep = col_sep
      @dev = str_or_writable
      @close_on_terminate = false
    end

    # SYNOPSIS
    #   CSV::BasicWriter#close_on_terminate
    #
    # RETURNS
    #   true
    #
    # DESCRIPTION
    #   Tell this writer to close the IO when terminated (Triggered by invoking
    #   CSV::BasicWriter#close).
    #   
    def close_on_terminate
      @close_on_terminate = true
    end

  private
    def terminate
      if @close_on_terminate
        @dev.close
      end
    end
  end

  # SYNOPSIS
  #   cells = CSV.parse_line(src, col_sep = ?,)
  #
  # ARGS
  #   src: a CSV String.
  #   col_sep: Column separator.  ?, by default.  If you want to separate
  #     fields with semicolon, give ?; here.
  #
  # RETURNS
  #   cells: an Array of parsed cells in first line.  Each cell is a String.
  #
  # DESCRIPTION
  #   Parse one line from given string.  Bare in mind it parses ONE LINE.  Rest
  #   of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
  #   second line 'c,d' is ignored.
  #
  #   If you don't know whether a target string to parse is exactly 1 line or
  #   not, use CSV.parse_row instead of this method.
  #   
  def CSV.parse_line(src, col_sep = ?,)
    idx = 0
    res_type = :DT_COLSEP
    cells = Row.new
    begin
      while (res_type.equal?(:DT_COLSEP))
        cell = Cell.new
        res_type, idx = parse_body(src, idx, cell, col_sep)
        cells.push(cell.is_null ? nil : cell.data)
      end
    rescue IllegalFormatError
      return Row.new
    end
    cells
  end
  

  # SYNOPSIS
  #   str = CSV.generate_line(cells, col_sep = ?,)
  #
  # ARGS
  #   cells: an Array of cell to be converted to CSV string.  Each cell must 
  #     respond to 'to_s'.
  #   col_sep: Column separator.  ?, by default.  If you want to separate
  #     fields with semicolon, give ?; here.
  #
  # RETURNS
  #   str: a String of generated CSV string.
  #
  # DESCRIPTION
  #   Create a line from cells.  Each cell is stringified by to_s.
  #   
  def CSV.generate_line(cells, col_sep = ?,)
    if (cells.size == 0)
      return ''
    end
    res_type = :DT_COLSEP
    result_str = ''
    idx = 0
    while true
      cell = if (cells[idx].nil?)
          Cell.new('', true)
        else
          Cell.new(cells[idx].to_s, false)
        end
      generate_body(cell, result_str, col_sep)
      idx += 1
      if (idx == cells.size)
        break
      end
      generate_separator(:DT_COLSEP, result_str, col_sep)
    end
    result_str
  end
  
  # SYNOPSIS
  #   parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,)
  #
  # ARGS
  #   src: a CSV data to be parsed.  Must respond '[](idx)'.
  #     src[](idx) must return a char. (Not a string such as 'a', but 97).
  #     src[](idx_out_of_bounds) must return nil.  A String satisfies this
  #     requirement.
  #   idx: index of parsing location of 'src'.  0 origin.
  #   out_dev: buffer for parsed cells.  Must respond '<<(CSV::Cell)'.
  #   col_sep: Column separator.  ?, by default.  If you want to separate
  #     fields with semicolon, give ?; here.
  #
  # RETURNS
  #   parsed_cells: num of parsed cells.
  #   idx: index of next parsing location of 'src'.
  #
  # DESCRIPTION
  #   Parse a line from string.  To parse lines in CSV string, see EXAMPLE
  #   below.
  #
  # EXAMPLE
  #   src = "a,b\r\nc,d\r\ne,f"
  #   idx = 0
  #   begin
  #     parsed = []
  #     parsed_cells, idx = CSV.parse_row(src, idx, parsed)
  #     puts "Parsed #{ parsed_cells } cells."
  #     p parsed
  #   end while parsed_cells > 0
  #   
  def CSV.parse_row(src, idx, out_dev, col_sep = ?,)
    idx_backup = idx
    parsed_cells = 0
    res_type = :DT_COLSEP
    begin
      while (!res_type.equal?(:DT_ROWSEP))
        cell = Cell.new
        res_type, idx = parse_body(src, idx, cell, col_sep)
        if res_type.equal?(:DT_EOS)
          if idx == idx_backup #((parsed_cells == 0) && (cell.is_null))
            return 0, 0
          end
          res_type = :DT_ROWSEP
        end
        parsed_cells += 1
        out_dev << cell
      end
    rescue IllegalFormatError
      return 0, 0
    end
    return parsed_cells, idx
  end
  
  # SYNOPSIS
  #   parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,)
  #
  # ARGS
  #   src: an Array of CSV::Cell to be converted to CSV string.  Must respond to
  #     'size' and '[](idx)'.  src[idx] must return CSV::Cell.
  #   cells: num of cells in a line.
  #   out_dev: buffer for generated CSV string.  Must respond to '<<(string)'.
  #   col_sep: Column separator.  ?, by default.  If you want to separate
  #     fields with semicolon, give ?; here.
  #
  # RETURNS
  #   parsed_cells: num of converted cells.
  #
  # DESCRIPTION
  #   Convert a line from cells data to string.  To generate multi-row CSV
  #   string,  See EXAMPLE below.
  #
  # EXAMPLE
  #   def d(str)
  #     CSV::Cell.new(str, false)
  #   end
  #
  #   row1 = [d('a'), d('b')]
  #   row2 = [d('c'), d('d')]
  #   row3 = [d('e'), d('f')]
  #   src = [row1, row2, row3]
  #   buf = ''
  #   src.each do |row|
  #     parsed_cells = CSV.generate_row(row, 2, buf)
  #     puts "Created #{ parsed_cells } cells."
  #   end
  #   p buf
  #   
  def CSV.generate_row(src, cells, out_dev, col_sep = ?,)
    src_size = src.size
    if (src_size == 0)
      if cells == 0
        generate_separator(:DT_ROWSEP, out_dev, col_sep)
      end
      return 0
    end
    res_type = :DT_COLSEP
    parsed_cells = 0
    generate_body(src[parsed_cells], out_dev, col_sep)
    parsed_cells += 1
    while ((parsed_cells < cells) && (parsed_cells != src_size))
      generate_separator(:DT_COLSEP, out_dev, col_sep)
      generate_body(src[parsed_cells], out_dev, col_sep)
      parsed_cells += 1
    end
    if (parsed_cells == cells)
      generate_separator(:DT_ROWSEP, out_dev, col_sep)
    else
      generate_separator(:DT_COLSEP, out_dev, col_sep)
    end
    parsed_cells
  end
  
private
  class IllegalFormatError < RuntimeError; end

  # Private class methods.
  class << self
  private

    def parse_body(src, idx, cell, col_sep)
      cell.is_null = false
      state = :ST_START
      quoted = false
      cr = false
      c = nil
      while (c = src[idx])
        idx += 1
        result_state = :DT_UNKNOWN
        if (c == col_sep)
          if state.equal?(:ST_DATA)
            if cr
              raise IllegalFormatError.new
            end
            if (!quoted)
              state = :ST_END
              result_state = :DT_COLSEP
            else
              cell.data << c.chr
            end
          elsif state.equal?(:ST_QUOTE)
            if cr
              raise IllegalFormatError.new
            end
            state = :ST_END
            result_state = :DT_COLSEP
          else  # :ST_START
            cell.is_null = true
            state = :ST_END
            result_state = :DT_COLSEP
          end
        elsif (c == ?")         # " for vim syntax hilighting.
          if state.equal?(:ST_DATA)
            if cr
              raise IllegalFormatError.new
            end
            if quoted
              quoted = false
              state = :ST_QUOTE
            else
              raise IllegalFormatError.new
            end
          elsif state.equal?(:ST_QUOTE)
            cell.data << c.chr
            quoted = true
            state = :ST_DATA
          else  # :ST_START
            quoted = true
            state = :ST_DATA
          end
        elsif (c == ?\r)
          if cr
            raise IllegalFormatError.new
          end
          if quoted
            cell.data << c.chr
            state = :ST_DATA
          else
            cr = true
          end
        elsif (c == ?\n)
          if state.equal?(:ST_DATA)
            if cr
              state = :ST_END
              result_state = :DT_ROWSEP
              cr = false
            else
              if quoted
                cell.data << c.chr
                state = :ST_DATA
              else
                state = :ST_END
                result_state = :DT_ROWSEP
              end
            end
          elsif state.equal?(:ST_QUOTE)
            state = :ST_END
            result_state = :DT_ROWSEP
            if cr
              cr = false
            end
          else  # :ST_START
            cell.is_null = true
            state = :ST_END
            result_state = :DT_ROWSEP
          end
        else
          if state.equal?(:ST_DATA) || state.equal?(:ST_START)
            if cr
              raise IllegalFormatError.new
            end
            cell.data << c.chr
            state = :ST_DATA
          else  # :ST_QUOTE
            raise IllegalFormatError.new
          end
        end
        if state.equal?(:ST_END)
          return result_state, idx;
        end
      end
      if state.equal?(:ST_START)
        cell.is_null = true
      elsif state.equal?(:ST_QUOTE)
        true    # dummy for coverate; only a data
      elsif quoted
        raise IllegalFormatError.new
      elsif cr
        raise IllegalFormatError.new
      end
      return :DT_EOS, idx
    end
  
    def generate_body(cells, out_dev, col_sep)
      row_data = cells.data.dup
      if (!cells.is_null)
        if (row_data.gsub!('"', '""') ||
            row_data.include?(col_sep) ||
            (/[\r\n]/ =~ row_data) || (cells.data.empty?))
          out_dev << '"' << row_data << '"'
        else
          out_dev << row_data
        end
      end
    end
    
    def generate_separator(type, out_dev, col_sep)
      case type
      when :DT_COLSEP
        out_dev << col_sep.chr
      when :DT_ROWSEP
        out_dev << "\r\n"
      end
    end
  end


  # DESCRIPTION
  #   CSV::StreamBuf -- a class for a bufferd stream.
  #
  # EXAMPLE 1 -- an IO.
  #   class MyBuf < StreamBuf
  #     # Do initialize myself before a super class.  Super class might call my
  #     # method 'read'. (Could be awful for C++ user. :-)
  #     def initialize(s)
  #       @s = s
  #       super()
  #     end
  #
  #     # define my own 'read' method.
  #     # CAUTION: Returning nil means EnfOfStream.
  #     def read(size)
  #       @s.read(size)
  #     end
  #
  #     # release buffers. in Ruby which has GC, you do not have to call this...
  #     def terminate
  #       @s = nil
  #       super()
  #     end
  #   end
  #
  #   buf = MyBuf.new(STDIN)
  #   my_str = ''
  #   p buf[0, 0]               # => '' (null string)
  #   p buf[0]                  # => 97 (char code of 'a')
  #   p buf[0, 1]               # => 'a'
  #   my_str = buf[0, 5]
  #   p my_str                  # => 'abcde' (5 chars)
  #   p buf[0, 6]               # => "abcde\n" (6 chars)
  #   p buf[0, 7]               # => "abcde\n" (6 chars)
  #   p buf.drop(3)             # => 3 (dropped chars)
  #   p buf.get(0, 2)           # => 'de' (2 chars)
  #   p buf.is_eos?             # => false (is not EOS here)
  #   p buf.drop(5)             # => 3 (dropped chars)
  #   p buf.is_eos?             # => true (is EOS here)
  #   p buf[0]                  # => nil (is EOS here)
  #
  # EXAMPLE 2 -- String.
  #   This is a conceptual example.  No pros with this.
  # 
  #   class StrBuf < StreamBuf
  #     def initialize(s)
  #       @str = s
  #       @idx = 0
  #       super()
  #     end
  #   
  #     def read(size)
  #       str = @str[@idx, size]
  #       @idx += str.size
  #       str
  #     end
  #   end
  #
  class StreamBuf       # pure virtual. (do not instanciate it directly)
  public
  
    # SYNOPSIS
    #   char/str = CSV::StreamBuf#get(idx, n = nil)
    #   char/str = CSV::StreamBuf#[idx, n = nil]
    #
    # ARGS
    #   idx: index of a string to specify a start point of a string to get.
    #     Unlike String instance, idx < 0 returns nil.
    #   n: size of a string to get.
    #
    # RETURNS
    #   char: if n == nil.  A char at idx.
    #   str: if n != nil.  A partial string, from idx to (idx + size).  At
    #     EOF, the string size could not equal to arg n.
    #
    # DESCRIPTION
    #   Get a char or a partial string from the stream.
    #
    def [](idx, n = nil) 
      if idx < 0
        return nil
      end
      if (idx_is_eos?(idx))
        if n and (@offset + idx == buf_size(@cur_buf))
          # Like a String, 'abc'[4, 1] returns nil and
          # 'abc'[3, 1] returns '' not nil.
          return ''
        else
          return nil
        end
      end
      my_buf = @cur_buf
      my_offset = @offset
      next_idx = idx
      while (my_offset + next_idx >= buf_size(my_buf))
        if (my_buf == @buf_tail_idx)
          unless add_buf
            break
          end
        end
        next_idx = my_offset + next_idx - buf_size(my_buf)
        my_buf += 1
        my_offset = 0
      end
      loc = my_offset + next_idx
      if !n
        return @buf_list[my_buf][loc]		# Fixnum of char code.
      elsif (loc + n - 1 < buf_size(my_buf))
        return @buf_list[my_buf][loc, n]	# String.
      else # should do loop insted of (tail) recursive call...
	res = @buf_list[my_buf][loc, BufSize]
        size_added = buf_size(my_buf) - loc
        if size_added > 0
          idx += size_added
          n -= size_added
          ret = self[idx, n]
          if ret
            res << ret
          end
        end
        return res
      end
    end
    alias get []
  
    # SYNOPSIS
    #   size_dropped = CSV::StreamBuf#drop(n)
    #
    # ARGS
    #   n: drop size
    #
    # RETURNS
    #   size_dropped: droped size.  At EOF, dropped size might not equals to arg n.
    #     0 if n <= 0.
    #
    # DESCRIPTION
    #   Drop a string from the stream.  Once you drop the head of the stream,
    #   access to the dropped part via [] or get returns nil.
    #
    def drop(n)
      if is_eos?
        return 0
      end
      size_dropped = 0
      while (n > 0)
        if (!@is_eos || (@cur_buf != @buf_tail_idx))
          if (@offset + n < buf_size(@cur_buf))
            size_dropped += n
            @offset += n
            n = 0
          else
            size = buf_size(@cur_buf) - @offset
            size_dropped += size
            n -= size
            @offset = 0
            unless rel_buf
              unless add_buf
                break
              end
              @cur_buf = @buf_tail_idx
            end
          end
        end
      end
      size_dropped
    end
  
    # SYNOPSIS
    #   is_eos = CSV::StreamBuf#is_eos?
    #
    # RETURNS
    #   is_eos: true if end of the stream or false.
    #
    # DESCRIPTION
    #   Check EOF or not.
    #
    def is_eos?
      return idx_is_eos?(0)
    end
  
    # SYNOPSIS
    #   N/A
    #
    # DESCRIPTION
    #   Do not instanciate this class directly.  Define your own class which
    #   derives this class and define 'read' instance method.
    #
    def initialize
      @buf_list = []
      @cur_buf = @buf_tail_idx = -1
      @offset = 0
      @is_eos = false
      add_buf
      @cur_buf = @buf_tail_idx
    end
  
  protected
    def terminate
      while (rel_buf); end
    end
  
    # protected method 'read' must be defined in derived classes.
    # CAUTION: Returning a string which size is not equal to 'size' means
    #   EnfOfStream.  When it is not at EOS, you must block the callee, try to
    #   read and return the sized string.
    def read(size) # raise EOFError
      raise NotImplementedError.new('Method read must be defined in a derived class.')
    end
  
  private
  
    def buf_size(idx)
      @buf_list[idx].size
    end

    def add_buf
      if @is_eos
        return false
      end
      begin
        str_read = read(BufSize)
      rescue EOFError
        str_read = nil
      rescue
        terminate
        raise
      end
      if str_read.nil?
        @is_eos = true
        @buf_list.push('')
        @buf_tail_idx += 1
        false
      else
        @buf_list.push(str_read)
        @buf_tail_idx += 1
        true
      end
    end
  
    def rel_buf
      if (@cur_buf < 0)
        return false
      end
      @buf_list[@cur_buf] = nil
      if (@cur_buf == @buf_tail_idx)
        @cur_buf = -1
        return false
      else
        @cur_buf += 1
        return true
      end
    end
  
    def idx_is_eos?(idx)
      (@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx)))
    end
  
    BufSize = 1024 * 8
  end

  # DESCRIPTION
  #   CSV::IOBuf -- a class for a bufferd IO.
  #
  # EXAMPLE
  #   # File 'bigdata' could be a giga-byte size one!
  #   buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
  #   CSV::Reader.new(buf).each do |row|
  #     p row
  #     break if row[0].data == 'admin'
  #   end
  #
  class IOBuf < StreamBuf
  public
    def initialize(s)
      @s = s
      super()
    end
  
    def close
      terminate
    end

  private
    def read(size)
      @s.read(size)
    end
 
    def terminate
      super()
    end
  end
end