module MARC::REXMLReader
The REXMLReader
is the ‘default’ parser, since we can at least be assured that REXML is probably there. It uses REXML’s PullParser to handle larger document sizes without consuming insane amounts of memory, but it’s still REXML (read: slow), so it’s a good idea to use an alternative parser if available. If you don’t know the best parser available, you can use the MagicReader
or set:
MARC::XMLReader.parser=
MARC::XMLReader::USE_BEST_AVAILABLE
or
MARC::XMLReader.parser=
“magic”
or
reader = MARC::XMLReader.new
(fh, :parser=>“magic”) (or the constant)
which will cascade down to REXML if nothing better is found.
Public Class Methods
extended(receiver)
click to toggle source
# File lib/marc/xml_parsers.rb, line 202 def self.extended(receiver) require "rexml/document" require "rexml/parsers/pullparser" receiver.init end
Public Instance Methods
each() { |build_record| ... }
click to toggle source
Loop through the MARC
records in the XML document
# File lib/marc/xml_parsers.rb, line 214 def each if block_given? while @parser.has_next? event = @parser.pull # if it's the start of a record element if event.start_element? && (strip_ns(event[0]) == "record") yield build_record end end else enum_for(:each) end end
init()
click to toggle source
Sets our parser
# File lib/marc/xml_parsers.rb, line 209 def init @parser = REXML::Parsers::PullParser.new(@handle) end
Private Instance Methods
build_record()
click to toggle source
will accept parse events until a record has been built up
# File lib/marc/xml_parsers.rb, line 236 def build_record record = MARC::Record.new data_field = nil control_field = nil subfield = nil text = "" attrs = nil if Module.constants.index("Nokogiri") && @parser.is_a?(Nokogiri::XML::Reader) datafield = nil cursor = nil open_elements = [] @parser.each do |node| if node.value? && cursor if cursor.is_a?(Symbol) && (cursor == :leader) record.leader = node.value else cursor.value = node.value end cursor = nil end next unless node.namespace_uri == @ns if open_elements.index(node.local_name.downcase) open_elements.delete(node.local_name.downcase) next else open_elements << node.local_name.downcase end case node.local_name.downcase when "leader" cursor = :leader when "controlfield" record << datafield if datafield datafield = nil control_field = MARC::ControlField.new(node.attribute("tag")) record << control_field cursor = control_field when "datafield" record << datafield if datafield datafield = nil data_field = MARC::DataField.new(node.attribute("tag"), node.attribute(IND1), node.attribute(IND2)) datafield = data_field when "subfield" raise "No datafield to add to" unless datafield subfield = MARC::Subfield.new(node.attribute(CODE)) datafield.append(subfield) cursor = subfield when "record" record << datafield if datafield return record end end else while @parser.has_next? event = @parser.pull if event.text? text += REXML::Text.unnormalize(event[0]) next end if event.start_element? text = "" attrs = event[1] case strip_ns(event[0]) when "controlfield" text = "" control_field = MARC::ControlField.new(attrs[TAG]) when "datafield" text = "" data_field = MARC::DataField.new(attrs[TAG], attrs[IND1], attrs[IND2]) when "subfield" text = "" subfield = MARC::Subfield.new(attrs[CODE]) end end if event.end_element? case strip_ns(event[0]) when "leader" record.leader = text when "record" return record when "controlfield" control_field.value = text record.append(control_field) when "datafield" record.append(data_field) when "subfield" subfield.value = text data_field.append(subfield) end end end end end
strip_ns(str)
click to toggle source
# File lib/marc/xml_parsers.rb, line 230 def strip_ns(str) str.sub(/^.*:/, "") end