class MARC::XMLWriter
A class for writing MARC
records as MARCXML. BIG CAVEAT! XMLWriter
will not convert your MARC8 to UTF8 bug the authors to do this if you need it
Constants
- COLLECTION_TAG
the constructor which you must pass a file path or an object that responds to a write message the second argument is a hash of options, currently only supporting one option, stylesheet
writer =
XMLWriter.new
‘marc.xml’, :stylesheet => ‘style.xsl’ writer.write record
Public Class Methods
encode(record, opts = {})
click to toggle source
a static method that accepts a MARC::Record
object and returns a REXML::Document for the XML serialization.
# File lib/marc/xmlwriter.rb, line 90 def self.encode(record, opts = {}) single_char = Regexp.new('[\da-z ]{1}') subfield_char = Regexp.new('[\dA-Za-z!"#$%&\'()*+,-./:;<=>?{}_^`~\[\]\\\]{1}') control_field_tag = Regexp.new("00[1-9A-Za-z]{1}") # Right now, this writer handles input from the strict and # lenient MARC readers. Because it can get 'loose' MARC in, it # attempts to do some cleanup on data values that are not valid # MARCXML. # TODO? Perhaps the 'loose MARC' checks should be split out # into a tolerant MARCXMLWriter allowing the main one to skip # this extra work. # TODO: At the very least there should be some logging # to record our attempts to account for less than perfect MARC. e = REXML::Element.new("record") e.add_namespace(MARC_NS) if opts[:include_namespace] leader_element = REXML::Element.new("leader") leader_element.add_text(fix_leader(record.leader)) e.add_element(leader_element) record.each do |field| if field.instance_of?(MARC::DataField) datafield_elem = REXML::Element.new("datafield") ind1 = field.indicator1 # If marc is leniently parsed, we may have some dirty data; using # the 'z' ind1 value should help us locate these later to fix ind1 = "z" if ind1.nil? || !ind1.match?(single_char) ind2 = field.indicator2 # If marc is leniently parsed, we may have some dirty data; using # the 'z' ind2 value should help us locate these later to fix ind2 = "z" if field.indicator2.nil? || !ind2.match?(single_char) datafield_elem.add_attributes({ "tag" => field.tag, "ind1" => ind1, "ind2" => ind2 }) field.subfields.each do |subfield| subfield_element = REXML::Element.new("subfield") code = subfield.code # If marc is leniently parsed, we may have some dirty data; using # the blank subfield code should help us locate these later to fix code = " " if subfield.code.match(subfield_char).nil? subfield_element.add_attribute("code", code) text = subfield.value subfield_element.add_text(text) datafield_elem.add_element(subfield_element) end e.add_element datafield_elem elsif field.instance_of?(MARC::ControlField) control_element = REXML::Element.new("controlfield") tag = field.tag # We need a marker for invalid tag values (we use 000) tag = "00z" unless tag.match(control_field_tag) || MARC::ControlField.control_tag?(tag) control_element.add_attribute("tag", tag) text = field.value control_element.add_text(text) e.add_element(control_element) end end # return xml e end
fix_leader(leader)
click to toggle source
# File lib/marc/xmlwriter.rb, line 69 def self.fix_leader(leader) fixed_leader = leader.gsub(/[^\w|^\s]/, "Z") # The leader must have at least 24 characters fixed_leader = fixed_leader.ljust(24) if fixed_leader.length < 24 # MARCXML is particular about last four characters; ILSes aren't if fixed_leader[20..23] != "4500" fixed_leader[20..23] = "4500" end # MARCXML doesn't like a space here so we need a filler character: Z if fixed_leader[6..6] == " " fixed_leader[6..6] = "Z" end fixed_leader end
new(file, opts = {}, &blk)
click to toggle source
# File lib/marc/xmlwriter.rb, line 24 def initialize(file, opts = {}, &blk) @writer = REXML::Formatters::Default.new if file.instance_of?(String) @fh = File.new(file, "w") elsif file.respond_to?(:write) @fh = file else raise ArgumentError, "must pass in file name or handle" end @stylesheet = opts[:stylesheet] @fh.write("<?xml version='1.0'?>\n") @fh.write(stylesheet_tag) @fh.write(COLLECTION_TAG) @fh.write("\n") if block_given? blk.call(self) self.close end end
Public Instance Methods
close()
click to toggle source
close underlying filehandle
# File lib/marc/xmlwriter.rb, line 64 def close @fh.write("</collection>") @fh.close end
stylesheet_tag()
click to toggle source
# File lib/marc/xmlwriter.rb, line 47 def stylesheet_tag if @stylesheet %(<?xml-stylesheet type="text/xsl" href="#{@stylesheet}"?>\n) else "" end end
write(record)
click to toggle source
write a record to the file or handle
# File lib/marc/xmlwriter.rb, line 57 def write(record) @writer.write(MARC::XMLWriter.encode(record), @fh) @fh.write("\n") end