class Rpareia::Parser
Attributes
project[R]
Public Class Methods
new(xml)
click to toggle source
# File lib/rpareia/parser.rb, line 32 def initialize(xml) @xml = xml parse end
Private Instance Methods
find_duplicated(arr)
click to toggle source
# File lib/rpareia/parser.rb, line 50 def find_duplicated(arr) arr.detect {|e| arr.rindex(e) != arr.index(e) } end
parse()
click to toggle source
# File lib/rpareia/parser.rb, line 163 def parse parse_xml parse_task parse_data_sources parse_deterministic_linkage parse_output end
parse_data_sources()
click to toggle source
# File lib/rpareia/parser.rb, line 54 def parse_data_sources @project[:data_sources] = [] @xml.xpath("/project/data-sources/data-source").each do |data_source| id = data_source['id'].to_s raise MissingDataSourceId.exception if id.empty? file = data_source['file'].to_s raise MissingDataSourceFile.exception("Missing file attribute from data source '#{id}'") if file.empty? raise InvalidDataSourceFile.exception("File '#{file}' from data source '#{id}' does not exist") unless File.exist?(file) type = data_source['type'].to_s raise InvalidDataSourceType.exception("Data source type '#{type}' not supported") if type != "delimited" field_separator = data_source['field-separator'].to_s raise MissingFieldSeparator.exception("Missing field separator from data source '#{id}'") if field_separator.empty? fields = data_source.xpath("fields/field") raise FieldsElementNotFound.exception("Element 'field' not found on data source '#{id}'") if fields.empty? my_fields = [] fields.each do |field| name = field['name'] raise MissingFieldName.exception("Attribute 'name' not found on data source '#{id}'") if name.empty? type = field['type'] raise InvalidFieldType.exception("Invalid type '#{type}' from field '#{name}', data source '#{id}'") unless ['int', 'string'].include? type my_fields << {name: name, type: type} end if dup = find_duplicated(my_fields.map{|f| f[:name]}) raise DuplicatedFieldName.exception("Duplicated field name '#{dup}' on data source '#{id}'") end @project[:data_sources] << { id: id, file: file, fields: my_fields } end if dup = find_duplicated(@project[:data_sources].map{|el| el[:id]}) raise DuplicatedDataSourceId.exception("Duplicated data source id '#{dup}'") end case @project[:name] when 'linkage' if @project[:data_sources].size != 2 raise InvalidNumberOfSources.exception("Linkage: expected two data-source, #{@project[:data_sources].size} given") end when 'deduplication' if @project[:data_sources].size != 1 raise InvalidNumberOfSources.exception("Deduplication: expected one data-sources, #{@project[:data_sources].size} given") end else raise InvalidTaskError.exception("Invalid task: '#{@project[:name]}'") end end
parse_deterministic_linkage()
click to toggle source
# File lib/rpareia/parser.rb, line 113 def parse_deterministic_linkage deterministic_linkage = @xml.xpath("/project/deterministic-linkage") raise DeterministicLinkageElementNotFound.exception("Missing deterministic-linkage element") if deterministic_linkage.empty? conjunction = deterministic_linkage.xpath("conjunction") size = conjunction.size case size when 0 raise ConjunctionElementNotFound.exception("Missing conjunction element") when 1 else raise MultipleConjunctionElements.exception("Only one conjunction element is allowed, #{size} found") end parts = conjunction.xpath("part") raise MissingPart.exception("At leas one part element is required") if parts.empty? @project[:parts] = [] parts.each do |part| field_name = part['field-name'].to_s raise MissingFieldName.exception("Missing attribute field-name on part element") if field_name.empty? @project[:parts] << {field_name: field_name} end @project[:parts].each do |part| @project[:data_sources].each do |data_source| unless data_source[:fields].map{|e| e[:name]}.include?(part[:field_name]) raise MissingPartFieldNameOnDataSource.exception("Field name '#{part[:field_name]}' not found on data source '#{data_source[:id]}'") end end end end
parse_output()
click to toggle source
# File lib/rpareia/parser.rb, line 148 def parse_output output = @xml.xpath("/project/output") case size = output.size when 0 raise MissingOutputElement.exception("Missing output element") when 1 else raise MultipleOutputElement.exception("Only one output element is allowed, #{size} found") end @project[:output] = output.first['deterministic'] raise MissingDeterministicAttribute.exception("Missing attribute 'deterministic' on output element") unless @project[:output] end
parse_task()
click to toggle source
# File lib/rpareia/parser.rb, line 46 def parse_task @project = {name: @xml.xpath("/project/@task").first.value} end
parse_xml()
click to toggle source
# File lib/rpareia/parser.rb, line 40 def parse_xml @xml = Nokogiri::XML(@xml) raise SyntaxError.exception(@xml.errors.join("\n")) unless @xml.errors.empty? end