module MARC::REXMLReader

The REXMLReader is the ‘default’ parser, since we can at least be assured that REXML is probably there. It uses REXML’s PullParser to handle larger document sizes without consuming insane amounts of memory, but it’s still REXML (read: slow), so it’s a good idea to use an alternative parser if available. If you don’t know the best parser available, you can use the MagicReader or set:

MARC::XMLReader.parser=MARC::XMLReader::USE_BEST_AVAILABLE

or

MARC::XMLReader.parser=“magic”

or

reader = MARC::XMLReader.new(fh, :parser=>“magic”) (or the constant)

which will cascade down to REXML if nothing better is found.

Public Class Methods

extended(receiver) click to toggle source
# File lib/marc/xml_parsers.rb, line 202
def self.extended(receiver)
  require "rexml/document"
  require "rexml/parsers/pullparser"
  receiver.init
end

Public Instance Methods

each() { |build_record| ... } click to toggle source

Loop through the MARC records in the XML document

# File lib/marc/xml_parsers.rb, line 214
def each
  if block_given?
    while @parser.has_next?
      event = @parser.pull
      # if it's the start of a record element
      if event.start_element? && (strip_ns(event[0]) == "record")
        yield build_record
      end
    end
  else
    enum_for(:each)
  end
end
init() click to toggle source

Sets our parser

# File lib/marc/xml_parsers.rb, line 209
def init
  @parser = REXML::Parsers::PullParser.new(@handle)
end

Private Instance Methods

build_record() click to toggle source

will accept parse events until a record has been built up

# File lib/marc/xml_parsers.rb, line 236
def build_record
  record = MARC::Record.new
  data_field = nil
  control_field = nil
  subfield = nil
  text = ""
  attrs = nil
  if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader)
    datafield = nil
    cursor = nil
    open_elements = []
    @parser.each do |node|
      if node.value? && cursor
        if cursor.is_a?(Symbol) && (cursor == :leader)
          record.leader = node.value
        else
          cursor.value = node.value
        end
        cursor = nil
      end
      next unless node.namespace_uri == @ns
      if open_elements.index(node.local_name.downcase)
        open_elements.delete(node.local_name.downcase)
        next
      else
        open_elements << node.local_name.downcase
      end
      case node.local_name.downcase
      when "leader"
        cursor = :leader
      when "controlfield"
        record << datafield if datafield
        datafield = nil
        control_field = MARC::ControlField.new(node.attribute("tag"))
        record << control_field
        cursor = control_field
      when "datafield"
        record << datafield if datafield
        datafield = nil
        data_field = MARC::DataField.new(node.attribute("tag"), node.attribute(IND1), node.attribute(IND2))
        datafield = data_field
      when "subfield"
        raise "No datafield to add to" unless datafield
        subfield = MARC::Subfield.new(node.attribute(CODE))
        datafield.append(subfield)
        cursor = subfield
      when "record"
        record << datafield if datafield
        return record
      end
    end

  else
    while @parser.has_next?
      event = @parser.pull

      if event.text?
        text += REXML::Text.unnormalize(event[0])
        next
      end

      if event.start_element?
        text = ""
        attrs = event[1]
        case strip_ns(event[0])
        when "controlfield"
          text = ""
          control_field = MARC::ControlField.new(attrs[TAG])
        when "datafield"
          text = ""
          data_field = MARC::DataField.new(attrs[TAG], attrs[IND1],
            attrs[IND2])
        when "subfield"
          text = ""
          subfield = MARC::Subfield.new(attrs[CODE])
        end
      end

      if event.end_element?
        case strip_ns(event[0])
        when "leader"
          record.leader = text
        when "record"
          return record
        when "controlfield"
          control_field.value = text
          record.append(control_field)
        when "datafield"
          record.append(data_field)
        when "subfield"
          subfield.value = text
          data_field.append(subfield)
        end
      end
    end
  end
end
strip_ns(str) click to toggle source
# File lib/marc/xml_parsers.rb, line 230
def strip_ns(str)
  str.sub(/^.*:/, "")
end