class Gtin2atc::Builder
Constants
- AtcDifferent
- AtcNotInBag
- AtcNotInSwissindex
- AtcNotInSwissmedic
- CsvOutputOptions
- Mesurements
- RouteOfAdministrations
see Selection of units of WHO DDD guidelines
- SameInAll
- Strip_For_Sax_Machine
Public Class Methods
new(opts)
click to toggle source
# File lib/gtin2atc/builder.rb, line 22 def initialize(opts) Util.set_logging(opts[:log]) @output = opts[:output] @do_compare = opts[:compare] @gen_reports = opts[:compare] and opts[:full] Util.debug_msg "Builder: opts are #{opts} @do_compare is #{@do_compare} output #{@output}" @data_swissmedic = {} @data_bag = {} @data_swissindex = {} @bag_entries_without_gtin = 0 end
Public Instance Methods
bag_xml_extractor()
click to toggle source
# File lib/gtin2atc/builder.rb, line 127 def bag_xml_extractor data = {} @bag = BagDownloader.new xml = @bag.download Util.debug_msg "bag_xml_extractor xml is #{xml.size} bytes long" result = PreparationsEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true) @bag_entries_without_gtin = 0 result.Preparations.Preparation.each do |seq| item = {} item[:atc_code] = (atcc = seq.AtcCode) ? atcc : '' seq.Packs.Pack.each do |pac| gtin = pac.GTIN if gtin gtin = gtin.to_i item[:gtin] = gtin item[:name] = seq.NameDe + " " + pac.DescriptionDe if pac.Prices item[:exfactory_price] = pac.Prices.ExFactoryPrice.Price if pac.Prices.ExFactoryPrice item[:public_price] = pac.Prices.PublicPrice.Price if pac.Prices.PublicPrice end data[gtin] = item Util.debug_msg "run_bag_extractor add #{item}" if $VERBOSE else @bag_entries_without_gtin += 1 Util.debug_msg "run_bag_extractor skip phar #{seq.NameDe}: #{seq.DescriptionDe} without gtin." end end end Util.debug_msg "bag_xml_extractor extracted #{data.size} items. Skipped #{@bag_entries_without_gtin} entries without gtin" data end
calc_checksum(str)
click to toggle source
# File lib/gtin2atc/builder.rb, line 33 def calc_checksum(str) str = str.strip sum = 0 val = str.split(//u) 12.times do |idx| fct = ((idx%2)*2)+1 sum += fct*val[idx].to_i end ((10-(sum%10))%10).to_s end
check_bag()
click to toggle source
# File lib/gtin2atc/builder.rb, line 260 def check_bag matching_atc_codes = [] not_in_swissmedic = [] match_in_swissmedic = [] shorter_in_swissmedic = [] longer_in_swissmedic = [] different_atc_in_swissmedic = [] not_in_swissindex = [] match_in_swissindex = [] shorter_in_swissindex = [] longer_in_swissindex = [] different_atc_in_swissindex = [] j = 0 @data_bag.each{ |gtin, item| atc_code = item[:atc_code] j += 1 Util.debug_msg "#{gtin}: j #{j} checking #{atc_code} in #{item}" if @data_swissmedic[gtin] and @data_swissindex[gtin] and atc_code == @data_swissmedic[gtin][:atc_code] and atc_code == @data_swissindex[gtin][:atc_code] matching_atc_codes << "#{gtin}: matching_atc_codes swissindex #{item} #{@data_swissmedic[gtin][:atc_code]} and #{@data_swissindex[gtin][:atc_code]}" next end if not @data_swissindex[gtin] not_in_swissindex << "#{gtin}: Not in swissindex #{item}" elsif atc_code == @data_swissindex[gtin][:atc_code] match_in_swissindex << "ATC code #{atc_code} for #{gtin} matches swissindex #{@data_swissindex[gtin][:atc_code]}" elsif atc_code.length < @data_swissindex[gtin][:atc_code].length longer_in_swissindex << "ATC code #{item[:atc_code]} for #{gtin} longer in swissindex #{@data_swissindex[gtin][:atc_code]}" elsif atc_code.length > @data_swissindex[gtin][:atc_code].length shorter_in_swissindex << "ATC code #{atc_code} for #{gtin} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}" else matching_atc_codes << "ATC code #{atc_code} for #{gtin} differs from swissindex #{@data_swissindex[gtin][:atc_code]}" end if not @data_swissmedic[gtin] not_in_swissmedic << "#{gtin}: Not in swissmedic #{item}" elsif atc_code == @data_swissmedic[gtin][:atc_code] match_in_swissmedic << "ATC code #{atc_code} for #{gtin} matches swissmedic #{@data_swissmedic[gtin][:atc_code]}" elsif atc_code.length < @data_swissmedic[gtin][:atc_code].length longer_in_swissmedic << "ATC code #{item[:atc_code]} for #{gtin} longer in swissmedic #{@data_swissmedic[gtin][:atc_code]}" elsif atc_code.length > @data_swissmedic[gtin][:atc_code].length shorter_in_swissmedic << "ATC code #{atc_code} for #{gtin} shorter in swissmedic #{@data_swissmedic[gtin][:atc_code]}" else different_atc_in_swissmedic << "ATC code #{atc_code} for #{gtin} differs from swissmedic #{@data_swissmedic[gtin][:atc_code]}" end total1 = not_in_swissindex + match_in_swissindex + longer_in_swissindex + shorter_in_swissindex + different_atc_in_swissindex total2 = not_in_swissmedic + match_in_swissmedic + longer_in_swissmedic + shorter_in_swissmedic + different_atc_in_swissmedic # Util.debug_msg "#{gtin}: j #{j} finished #{total1} #{total2} #{atc_code} matching_atc_codes #{matching_atc_codes}" } Util.info "Result of verifing data from bag (SL): bag-data fetched from #{@bag.origin}. bag had #{@data_bag.size} entries #{@bag_entries_without_gtin.size} entries had no GTIN field Not in swissmedic #{not_in_swissmedic.size} Not in swissindex #{not_in_swissindex.size} " Util.info "Comparing ATC-Codes between bag and swissmedic" topic_swissmedic = 'compare_bag_to_swissmedic' report(topic_swissmedic, SameInAll, matching_atc_codes) report(topic_swissmedic, 'atc are the same in swissmedic and bag, but not in swissindex', match_in_swissmedic) report(topic_swissmedic, 'atc are different in swissmedic and bag', different_atc_in_swissmedic) report(topic_swissmedic, 'atc are shorter in swissmedic than in bag', shorter_in_swissmedic) report(topic_swissmedic, 'atc are longer in swissmedic than in bag', longer_in_swissmedic) Util.info "Comparing ATC-Codes between bag and swissindex" topic_swissindex = 'compare_bag_to_swissindex' report(topic_swissindex, SameInAll, matching_atc_codes) report(topic_swissindex, 'atc are the same in swissindex and bag, but not in swissmedic', match_in_swissindex) report(topic_swissindex, 'atc are different in swissmedic and bag', different_atc_in_swissindex) report(topic_swissindex, 'atc are shorter in swissindex than in bag', shorter_in_swissindex) report(topic_swissindex, 'atc are longer in swissindex than in bag', longer_in_swissindex) end
check_swissmedic()
click to toggle source
# File lib/gtin2atc/builder.rb, line 348 def check_swissmedic matching = [] not_in_bag = [] not_in_swissindex = [] matching_atc_codes = [] shorter_in_swissmedic = [] longer_in_swissindex = [] different_atc = [] @data_swissmedic.each{ |gtin, item| atc_code = item[:atc_code] if @data_bag[gtin] and @data_swissmedic[gtin] and @data_bag[gtin] and atc_code.eql?(@data_bag[gtin][1]) and atc_code.eql?(@data_swissindex[gtin][1]) matching << "#{gtin} #{atc_code} #{@data_swissmedic[gtin][1]} match in bag, swissmedic and swissindex" next end unless @data_swissindex[gtin] not_in_swissindex << "Swissmedic #{gtin}: Not in swissindex #{item}" next end if item[:atc_code] == @data_swissindex[gtin][:atc_code] matching_atc_codes << "ATC code #{atc_code} for #{gtin} matches swissindex #{@data_swissindex[gtin][:atc_code]}" elsif item[:atc_code].length < @data_swissindex[gtin][:atc_code].length longer_in_swissindex << "ATC code #{item[:atc_code]} for #{gtin} longer in swissindex #{@data_swissindex[gtin][:atc_code]}" elsif item[:atc_code].length > @data_swissindex[gtin][:atc_code].length shorter_in_swissmedic << "ATC code #{atc_code} for #{gtin} shorter in swissindex #{@data_swissindex[gtin][:atc_code]}" else different_atc << "ATC code #{atc_code} for #{gtin} differs from swissindex #{@data_swissindex[gtin][:atc_code]}" end unless @data_bag[gtin] not_in_bag << "#{gtin}: Not in bag #{item}" next end } Util.info "Result of verifing data from swissmedic: swissmedic had #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin} swissindex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin} bag #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin} Matching #{matching.size} items. Not in bag #{not_in_bag.size} Not in swissindex #{not_in_swissindex.size} Comparing ATC-Codes between swissmedic and swissindex " topic = 'compare swissmedic to swisssindex' report(topic, 'atc match in swissindex and swissmedic', matching_atc_codes) report(topic, 'atc are different in swissindex and swissmedic', different_atc) report(topic, 'atc are the same in swissindex and swissmedic', matching_atc_codes) report(topic, 'atc are shorter in swissindex', shorter_in_swissmedic) report(topic, 'atc are longer in swissindex', longer_in_swissindex) end
compare()
click to toggle source
# File lib/gtin2atc/builder.rb, line 402 def compare all_gtin = @data_bag.merge(@data_swissindex).merge(@data_swissmedic).sort matching_atc_codes = [] not_in_bag = [] not_in_swissmedic = [] not_in_swissindex = [] different_atc = [] all_gtin.each{ |gtin, item| if @data_bag[gtin] and @data_swissindex[gtin] and @data_swissmedic[gtin] and @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code] and @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code] matching_atc_codes << "#{gtin}: ATC-Code #{@data_bag[gtin][:atc_code]} matches in bag, swissmedic and swissindex" next end unless @data_swissmedic[gtin] not_in_swissmedic << "#{gtin}: Not in swissmedic #{item}" next end unless @data_swissindex[gtin] not_in_swissindex << "#{gtin}: Not in swissindex #{item}" next end unless @data_bag[gtin] not_in_bag << "#{gtin}: Not in bag #{item}" next end different_atc << "#{gtin}: ATC code differs bag #{@data_bag[gtin][:atc_code]} swissindex #{@data_swissindex[gtin][:atc_code]}" } Util.info "Comparing all GTIN-codes: Found infos about #{all_gtin.size} entries bag #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin} swissindex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin} swissmedic #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin} " topic = 'compare all' report(topic, SameInAll, matching_atc_codes) report(topic, AtcNotInBag, not_in_bag) report(topic, AtcNotInSwissindex, not_in_swissindex) report(topic, AtcNotInSwissmedic, not_in_swissmedic) report(topic, AtcDifferent, different_atc) end
epha_atc_extractor()
click to toggle source
# File lib/gtin2atc/builder.rb, line 43 def epha_atc_extractor data = {} body = open('https://download.epha.ch/data/atc/atc.csv').read.force_encoding('UTF-8') Util.debug_msg "epha_atc_extractor is #{body.size} bytes long" csv = CSV.new(body, { :headers => false, :col_sep => ',' } ) csv.to_a.each{ |line| data[line[0]] = line[2] if line[2] } Util.debug_msg "epha_atc_extractor extracted #{data.size} items" data end
get_valid_ddd(full_text)
click to toggle source
return [qty, unit, roa] for simple, valid ddd return [nil, nil, full_text] for structured DDDs like Standarddosis: 10 Tabletten oder 50 ml Mixtur a simple ddd is valid if meets the following three criteria first part is a float value second part is a unit e.g. ml third part is one or several (separated by ‘,’) RouteOfAdministrations
# File lib/gtin2atc/builder.rb, line 166 def get_valid_ddd(full_text) result =[nil, nil, full_text] return result unless full_text return result if full_text.index(';') # e.g. 0,12 g O,R; 18 mg P; 60 mg SL; 3 mg TD; 50 mg TD Gel; 12 mg P bezogen auf Testosteronundecanoat return result if full_text.index(':') # e.g. Standarddosis: 10 Tabletten oder 50 ml Mixture if full_text parts = full_text.split(' ') return result if parts.size > 3 qty = parts[0] ? parts[0].sub(',', '.').to_f : '' if Mesurements.index(parts[1]) and qty != 0.0 roas = parts[2].split(',') valid = roas.size > 0 roas.each{ |roa| unless RouteOfAdministrations.index(roa) valid = false break end } result = [qty, parts[1], parts[2]] if valid end end result end
oddb_calc_xml_extractor()
click to toggle source
# File lib/gtin2atc/builder.rb, line 102 def oddb_calc_xml_extractor filename = 'oddb_calc.xml' data = {} unless File.exists?('oddb_calc.xml') puts "Unable to open #{filename}" else xml = IO.read(filename) Util.debug_msg "oddb_calc_xml_extractor xml is #{xml.size} bytes long" result = ARTICLESEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true) result.ARTICLES.ARTICLE.each do |article| item = {} gtin = article.GTIN.to_i item[:gtin] = gtin item[:PKG_SIZE] = article.PKG_SIZE item[:SELLING_UNITS] = article.SELLING_UNITS item[:MEASURE] = article.MEASURE if article.COMPOSITIONS.COMPONENT and article.COMPOSITIONS.COMPONENT.size == 1 item[:COMPOSITIONS] = article.COMPOSITIONS end data[gtin] = item end Util.debug_msg "oddb_calc_xml_extractor extracted #{data.size} items" end data end
report(topic, msg, details)
click to toggle source
# File lib/gtin2atc/builder.rb, line 338 def report(topic, msg, details) full_msg = "#{topic}: #{details.size} #{msg}" Util.info " #{full_msg}" return unless @gen_reports File.open((full_msg+'.txt').gsub(/[: ,]+/, '_'), 'w+') { |file| file.puts full_msg details.sort.each{|detail| file.puts detail } } end
run(gtins_to_parse=[], output_name=nil)
click to toggle source
# File lib/gtin2atc/builder.rb, line 191 def run(gtins_to_parse=[], output_name=nil) Util.debug_msg("run #{gtins_to_parse}") Util.debug_msg("@use_swissindex true") @oddb_calc = oddb_calc_xml_extractor @data_epha_atc = epha_atc_extractor @data_swissindex = swissindex_xml_extractor @data_bag = bag_xml_extractor emitted_ids = [] if @do_compare output_name = File.join(Util.get_archive, 'gtin2atc_swissindex.csv') else output_name ||= 'gtin2atc.csv' output_name = File.join(Util.get_archive, output_name) end CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile| csvfile << ["gtin", "ATC", 'pharmacode', 'description', 'exfactory_price', 'public_price', 'selling units', 'name', 'qty', 'unit', 'ddd:qty', 'ddd:unit', 'ddd:full_text'] @data_swissindex.sort.each do |gtin, item| if @do_compare or gtins_to_parse.size == 0 or gtins_to_parse.index(gtin.to_s) or gtins_to_parse.index(item[:pharmacode].to_s) atc = item[:atc_code] ddd = get_valid_ddd(@data_epha_atc[atc]) selling_units = @oddb_calc[gtin] ? @oddb_calc[gtin][:SELLING_UNITS] : nil exfactory_price = @data_bag[gtin] ? @data_bag[gtin][:exfactory_price] : nil public_price = @data_bag[gtin] ? @data_bag[gtin][:public_price] : nil emitted_ids << gtin.to_i if gtin emitted_ids << item[:pharmacode].to_i if item[:pharmacode] if @oddb_calc[gtin] and @oddb_calc[gtin][:COMPOSITIONS] comp = @oddb_calc[gtin][:COMPOSITIONS].COMPONENT.first csvfile << [gtin, atc, item[:pharmacode], item[:description], exfactory_price, public_price, selling_units, comp.NAME, comp.QTY, comp.UNIT, ddd].flatten else csvfile << [gtin, atc, item[:pharmacode], item[:description], exfactory_price, public_price, selling_units, nil, nil, nil, ddd].flatten end end end end msg = "swissindex: Extracted #{gtins_to_parse.size} of #{@data_swissindex.size} items into #{output_name} for #{gtins_to_parse}" Util.debug_msg(msg) missing_ids = [] gtins_to_parse.each{ |id| next if emitted_ids.index(id.to_i) missing_ids << id } File.open('pharmacode_gtin_not_found.txt', 'w+', CsvOutputOptions) { |f| f.write missing_ids.join("\n") } msg = "swissindex: Could not find info for #{missing_ids.size} missing ids see file pharmacode_gtin_not_found.txt" Util.debug_msg(msg) return unless @do_compare output_name = File.join(Util.get_archive, 'gtin2atc_bag.csv') CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile| csvfile << ["gtin", "ATC", 'description'] @data_bag.sort.each do |gtin, item| csvfile << [gtin, item[:atc_code], item[:description]] end end Util.debug_msg "bag: Extracted #{gtins_to_parse.size} of #{@data_bag.size} items into #{output_name} for #{gtins_to_parse}" @data_swissmedic = swissmedic_xls_extractor output_name = File.join(Util.get_archive, 'gtin2atc_swissmedic.csv') CSV.open(output_name,'w+', CsvOutputOptions) do |csvfile| csvfile << ["gtin", "ATC", 'description'] @data_swissmedic.sort.each do |gtin, item| csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]] end end Util.debug_msg "swissmedic: Extracted #{@data_swissmedic.size} items into #{output_name}" check_bag check_swissmedic compare end
swissindex_xml_extractor()
click to toggle source
# File lib/gtin2atc/builder.rb, line 82 def swissindex_xml_extractor @swissindex = SwissindexDownloader.new xml = @swissindex.download Util.debug_msg "swissindex_xml_extractor xml is #{xml.size} bytes long" data = {} result = PharmaEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true) items = result.PHARMA.ITEM items.each do |pac| item = {} gtin = pac.GTIN ? pac.GTIN.to_i : nil next unless item[:gtin].to_i item[:gtin] = gtin item[:pharmacode] = (phar = pac.PHAR) ? phar: '' item[:atc_code] = (code = pac.ATC) ? code.to_s : '' item[:description] = pac.DSCR data[gtin] = item end Util.debug_msg "swissindex_xml_extractor extracted #{data.size} items" data end
swissmedic_xls_extractor()
click to toggle source
# File lib/gtin2atc/builder.rb, line 55 def swissmedic_xls_extractor @swissmedic = SwissmedicDownloader.new filename = @swissmedic.download Util.debug_msg "swissmedic_xls_extractor xml is #{filename}" data = {} @sheet = RubyXL::Parser.parse(File.expand_path(filename)).worksheets[0] i_5,i_3 = 0,10 # :swissmedic_numbers atc = 5 # :atc_code @sheet.each_with_index do |row, i| next if (i <= 1) next unless row[i_5] and row[i_3] no8 = sprintf('%05d',row[i_5].value.to_i) + sprintf('%03d',row[i_3].value.to_i) unless no8.empty? next if no8.to_i == 0 item = {} ean_base12 = "7680#{no8}" gtin = (ean_base12.ljust(12, '0') + calc_checksum(ean_base12)).to_i item = {} item[:gtin] = gtin item[:atc_code] = row[atc] ? row[atc].value.to_s : '' item[:name] = row[2].value.to_s data[gtin] = item end end Util.debug_msg "swissmedic_xls_extractor extracted #{data.size} items" data end