class MARC::XMLReader

the constructor which you can pass either a filename:

reader = MARC::XMLReader.new('/Users/edsu/marc.xml')

or a File object,

reader = Marc::XMLReader.new(File.new('/Users/edsu/marc.xml'))

or really any object that responds to read(n)

reader = MARC::XMLReader.new(StringIO.new(xml))

By default, XMLReader uses REXML’s pull parser, but you can swap that out with Nokogiri or jrexml (or let the system choose the ‘best’ one). The :parser can either be one of the defined constants or the constant’s value.

reader = MARC::XMLReader.new(fh, :parser=>'magic')

It is also possible to set the default parser at the class level so all subsequent instances will use it instead:

MARC::XMLReader.best_available
"nokogiri" # returns parser name, but doesn't set it.

Use:

MARC::XMLReader.best_available!

or

MARC::XMLReader.nokogiri!

By default, all XML parsers except REXML require the MARC namespace (www.loc.gov/MARC21/slim) to be included. Adding the option ‘ignore_namespace` to the call to `new` with a true value will allow parsing to proceed, e.g.,

reader = MARC::XMLReader.new(filename, parser: :nokogiri, ignore_namespace: true)

You can also pass in an error_handler option that will be called if there are any validation errors found when parsing a record.

reader = MARC::XMLReader.new(fh, error_handler: ->(reader, record, block) { ... })

By default, a MARC::RecordException is raised halting all future parsing.

Constants

USE_BEST_AVAILABLE
USE_JREXML
USE_JSTAX
USE_LIBXML
USE_NOKOGIRI
USE_REXML

Attributes

error_handler[R]
parser[R]

Public Class Methods

best_available() click to toggle source

Returns the value of the best available parser

# File lib/marc/xmlreader.rb, line 117
def best_available
  parser = nil
  if defined? JRUBY_VERSION
    unless parser
      begin
        require "nokogiri"
        parser = USE_NOKOGIRI
      rescue LoadError
      end
    end
    unless parser
      begin
        # try to find the class, so we throw an error if not found
        java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
        parser = USE_JSTAX
      rescue java.lang.ClassNotFoundException
      end
    end
    unless parser
      begin
        require "jrexml"
        parser = USE_JREXML
      rescue LoadError
      end
    end
  else
    begin
      require "nokogiri"
      parser = USE_NOKOGIRI
    rescue LoadError
    end
    unless defined? JRUBY_VERSION
      unless parser
        begin
          require "xml"
          parser = USE_LIBXML
        rescue LoadError
        end
      end
    end
  end
  parser ||= USE_REXML
  parser
end
best_available!() click to toggle source

Sets the best available parser as the default

# File lib/marc/xmlreader.rb, line 163
def best_available!
  @@parser = best_available
end
choose_parser(p) click to toggle source
# File lib/marc/xmlreader.rb, line 182
def choose_parser(p)
  match = false
  constants.each do |const|
    next unless const.to_s.match?("^USE_")
    if const_get(const) == p
      match = true
      return p
    end
  end
  raise ArgumentError.new("Parser '#{p}' not defined") unless match
end
jrexml!() click to toggle source

Sets jrexml as the default parser

# File lib/marc/xmlreader.rb, line 173
def jrexml!
  @@parser = USE_JREXML
end
new(file, options = {}) click to toggle source
# File lib/marc/xmlreader.rb, line 58
def initialize(file, options = {})
  if file.is_a?(String)
    handle = File.new(file)
  elsif file.respond_to?(:read, 5)
    handle = file
  else
    raise ArgumentError, "must pass in path or File"
  end
  @handle = handle

  if options[:ignore_namespace]
    @ignore_namespace = options[:ignore_namespace]
  end

  parser = if options[:parser]
    self.class.choose_parser(options[:parser].to_s)
  else
    @@parser
  end

  case parser
  when "magic" then extend MagicReader
  when "rexml" then extend REXMLReader
  when "jrexml"
    raise ArgumentError, "jrexml only available under jruby" unless defined? JRUBY_VERSION
    extend JREXMLReader
  when "nokogiri" then extend NokogiriReader
  when "jstax"
    raise ArgumentError, "jstax only available under jruby" unless defined? JRUBY_VERSION
    extend JRubySTAXReader
  when "libxml" then extend LibXMLReader
                     raise ArgumentError, "libxml not available under jruby" if defined? JRUBY_VERSION
  end

  @error_handler = options[:error_handler]
end
nokogiri!() click to toggle source

Sets Nokogiri as the default parser

# File lib/marc/xmlreader.rb, line 168
def nokogiri!
  @@parser = USE_NOKOGIRI
end
parser() click to toggle source

Returns the currently set parser type

# File lib/marc/xmlreader.rb, line 97
def parser
  @@parser
end
parser=(p) click to toggle source

Sets the class parser

# File lib/marc/xmlreader.rb, line 112
def parser=(p)
  @@parser = choose_parser(p)
end
parsers() click to toggle source

Returns an array of all the parsers available

# File lib/marc/xmlreader.rb, line 102
def parsers
  p = []
  constants.each do |const|
    next unless const.match?("^USE_")
    p << const
  end
  p
end
rexml!() click to toggle source

Sets REXML as the default parser

# File lib/marc/xmlreader.rb, line 178
def rexml!
  @@parser = USE_REXML
end