class Medreg::PersonImporter
Constants
- Match_qualification_with_austria
- RECIPIENTS
Public Class Methods
new(glns_to_import = [])
click to toggle source
# File lib/medreg/person_importer.rb, line 60 def initialize(glns_to_import = []) @glns_to_import = glns_to_import.clone @glns_to_import.delete_if {|item| item.size == 0} @info_to_gln = {} @@logInfo = [] FileUtils.rm_f(Personen_YAML) if File.exists?(Personen_YAML) FileUtils.rm_f(Personen_CSV) if File.exists?(Personen_CSV) FileUtils.mkdir_p(File.dirname(Personen_YAML)) @yaml_file = File.open(Personen_YAML, 'w+') @csv_file = File.open(Personen_CSV, 'w+') @persons_prev_import = 0 @persons_skipped = 0 @persons_deleted = 0 @persons_created = 0 @skip_to_doctor = nil @archive = ARCHIVE_PATH @@all_doctors = {} @@errors = [] setup_default_agent unless setup_default_agent end
Public Instance Methods
add_item(info, item)
click to toggle source
# File lib/medreg/person_importer.rb, line 85 def add_item(info, item) info << item.to_s.gsub(',',' ') end
get_detail_info(info, doc)
click to toggle source
# File lib/medreg/person_importer.rb, line 308 def get_detail_info(info, doc) text = doc.xpath('//div').text m = text.match(/Nationalität:\s*([Ö\w+])[^:]+:\s+(\d+)/) # Special case Österreich unless m and m[2] == info.gln.to_s File.open(File.join(LOG_PATH, 'doc_div.txt'), 'w+') { |f| f.write text } Medreg.log "ERROR: Id in text does not match #{info.gln } match was #{m.inspect}" return [] end addresses = [] nrAdresses = doc.xpath('//ol/li/div').size 0.upto(nrAdresses-1).each { |idx| lines = [] doc.xpath('//ol/li/div')[idx].children.each{ |x| lines << x.text } address = Address2.new address.fon = [] address.fax = [] address.type = 'at_praxis' address.additional_lines = [] address.canton = info.authority address.name = lines[0] lines[1].sub!(/^[A-Z]\. /, '') lines[1..-1].each { |line| if /^Telefon: /.match(line) address.fon << line.split('Telefon: ')[1].gsub(/\-/, ' ') next elsif /^Fax: /.match(line) address.fax << line.split('Fax: ')[1].gsub(/\-/, ' ') next else next if line.length <= 1 if m = line.match(/(|\w\w[-\. ])(\d{4})\s+(\S+)/) address.location = line else address.additional_lines << line end end } addresses << address } addresses end
get_detail_to_glns(glns)
click to toggle source
# File lib/medreg/person_importer.rb, line 273 def get_detail_to_glns(glns) max_retries = 100 @idx = 0 r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb')) @skip_to_doctor ||= r_loop.state_id Medreg.log "get_detail_to_glns #{glns.size}. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport glns.each { |gln| if r_loop.must_skip?(gln.to_s) Medreg.log "Skipping #{gln.inspect}. Waiting for #{r_loop.state_id.inspect}" if DebugImport @persons_skipped += 1 next end @idx += 1 nr_tries = 0 while nr_tries < max_retries begin Medreg.log "Searching for doctor with GLN #{gln}. Created #{@persons_created}. At #{@persons_created+@persons_prev_import} of #{glns.size}.#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}" get_one_doctor(r_loop, gln) break rescue Mechanize::ResponseCodeError, Timeout::Error => e raise e if defined?(MiniTest) nr_tries += 1 Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries}" sleep(10 * 60) # wait 10 minutes till medreg server is back again rescue StandardError => e raise e if defined?(MiniTest) nr_tries += 1 Medreg.log "rescue Mechanize::ResponseCodeError #{gln.inspect}. nr_tries #{nr_tries} error was e #{e}" sleep(10 * 60) # wait 10 minutes till medreg server is back again end end raise "Max retries #{nr_tries} for #{gln.to_s} reached. Aborting import" if nr_tries == max_retries } r_loop.finished end
get_latest_file()
click to toggle source
# File lib/medreg/person_importer.rb, line 350 def get_latest_file agent = Mechanize.new target = File.join @archive, Time.now.strftime("persons_%Y.%m.%d.xlsx") needs_update = true save_for_log "get_latest_file target #{target} #{File.exist?(target)} from URL #{MedRegPerson_XLS_URL}" return target if File.exist?(target) @download = nil begin file = agent.get(MedRegPerson_XLS_URL) @download = file.body rescue Net::HTTP::Persistent::Error, Timeout::Error => e Medreg.log "Catched error #{e}" search_name = File.join @archive, Time.now.strftime("persons_%Y*.xlsx") candidates = Dir.glob(search_name) if candidates.size == 0 save_for_log "getting file from MedRegPerson_XLS_URL failed. Could not find any prior downloads via #{search_name}" raise e end best = candidates.max_by {|f| File.mtime(f)} save_for_log "getting file from MedRegPerson_XLS_URL failed. Using #{best} #{File.mtime(best)} #{File.size(best)} bytes" @download = IO.read(best) end File.open(target, 'w+') { |f| f.write @download } @download = nil # release it target end
get_one_doctor(r_loop, gln)
click to toggle source
# File lib/medreg/person_importer.rb, line 209 def get_one_doctor(r_loop, gln) maxSeconds = defined?(Minitest) ? 3600 : 120 r_loop.try_run(gln, maxSeconds) do # increase timeout from default of 10 seconds. Measured 46 seconds for the first gln if @@all_doctors[gln.to_s] Medreg.log "ERROR: Skip search GLN #{gln} as already found" next end info = @info_to_gln[gln.to_s] unless info msg = "ERROR: could not find info for GLN #{gln}" @@errors << msg Medreg.log msg next end url = MedRegOmURL + "de/Suche/Detail/?gln=#{gln}&vorname=#{info.first_name.gsub(/ /, '+')}&name=#{info.family_name.gsub(/ /, '+')}" page_1 = @agent.get(url) data_2 = [ ['Name', info.family_name], ['Vorname', info.first_name], ['Gln', gln.to_s], ['AutomatischeSuche', 'True'], ] page_2 = @agent.post(MedRegOmURL + 'Suche/GetSearchCount', data_2) data_3 = [ ['currentpage', '1'], ['pagesize', '10'], ['sortfield', ''], ['sortorder', 'Ascending'], ['pageraction', ''], ['filter', ''], ] page_3 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_3) data_4 = [ ['Name', info.family_name], ['Vorname', info.first_name], ['Gln', gln.to_s], ['AutomatischeSuche', 'True'], ['currentpage', '1'], ['pagesize', '10'], ['sortfield', ''], ['sortorder', 'Ascending'], ['pageraction', ''], ['filter', ''], ] page_4 = @agent.post(MedRegOmURL + 'Suche/GetSearchData', data_4) regExp = /id"\:(\d\d+)/i unless page_4.body.match(regExp) File.open(File.join(LOG_PATH, 'page_4.body'), 'w+') { |f| f.write page_4.body } msg = "ERROR: Could not find an gln #{gln} via url #{url}" @@errors << msg Medreg.log msg next end medregId = page_4.body.match(regExp)[1] page_5 = @agent.get(MedRegOmURL + "de/Detail/Detail?pid=#{medregId}") File.open(File.join(LOG_PATH, "#{gln}.html"), 'w+') { |f| f.write page_5.content } if DebugImport doc_hash = parse_details( Nokogiri::HTML(page_5.content), gln, info) store_doctor(doc_hash) @persons_created += 1 @@all_doctors[gln.to_s] = doc_hash end end
parse_details(doc, gln, info)
click to toggle source
# File lib/medreg/person_importer.rb, line 167 def parse_details(doc, gln, info) unless doc.xpath("//tr") and doc.xpath("//tr").size > 3 Medreg.log "ERROR: Could not find a table with info for #{gln}" return nil end doc_hash = Hash.new doc_hash[:ean13] = gln.to_s.clone doc_hash[:name] = info.family_name doc_hash[:firstname] = info.first_name doc_hash[:may_dispense_narcotics] = (info.may_dispense_narcotics && info.may_dispense_narcotics.match(/ja/i)) ? true : false doc_hash[:may_sell_drugs] = (info.may_sell_drugs && info.may_sell_drugs.match(/ja/i)) ? true : false doc_hash[:remark_sell_drugs] = info.remark_sell_drugs idx_beruf = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Beruf\r\n/) then idx_beruf = j; break; end } idx_titel = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weiterbildungstitel/) then idx_titel = j; break; end } idx_privat = nil; 0.upto(doc.xpath("//tr").size) { |j| if doc.xpath("//tr")[j].text.match(/^\s*Weitere Qualifikationen/) then idx_privat = j; break; end } # doc_hash[:exam] = doc.xpath("//tr")[idx_beruf+1].text.strip.split(/\r\n|\n/)[1].to_i # Jahr des Staatsexamen wird nicht angezeigt!! specialities = [] (idx_titel+1).upto(idx_privat-1).each{ |j| line = doc.xpath("//tr")[j].text ; unless line.match(/Keine Angaben vorhanden/) line = line.gsub("\r\n", '') specialities << string_to_qualification(line, gln) end } doc_hash[:specialities] = specialities capabilities = [] (idx_privat+1).upto(99).each{ |j| next unless doc.xpath("//tr")[j] line = doc.xpath("//tr")[j].text ; unless line.match(/Keine Angaben vorhanden/) capabilities << string_to_qualification(line, gln) end } doc_hash[:capabilities] = capabilities addresses = get_detail_info(info, doc) doc_hash[:addresses] = addresses doc_hash end
parse_xls(path)
click to toggle source
# File lib/medreg/person_importer.rb, line 430 def parse_xls(path) Medreg.log "parsing #{path}" workbook = RubyXL::Parser.parse(path) positions = [] rows = 0 workbook[0].each do |row| next unless row and row[COL[:gln]] rows += 1 if rows > 1 info = PersonInfo.new [:gln, :family_name, :first_name, :authority, :diploma, :may_dispense_narcotics, :may_sell_drugs,:remark_sell_drugs].each { |field| cmd = "info.#{field} = row[COL[#{field.inspect}]] ? row[COL[#{field.inspect}]].value : nil" eval(cmd) } @info_to_gln[row[COL[:gln]].value] = info end end @glns_to_import = @info_to_gln.keys.sort.uniq end
report()
click to toggle source
# File lib/medreg/person_importer.rb, line 376 def report report = "Persons update\n\n" report << "Skipped doctors: #{@persons_skipped}#{@skip_to_doctor ? '. Waited for ' + @skip_to_doctor.to_s : ''}" << "\n" report << "New doctors: " << @persons_created.to_s << "\n" report << "Doctors from previous imports: " << @persons_prev_import.to_s << "\n" report << "Deleted doctors: " << @persons_deleted.to_s << "\n" if @@errors.size > 0 report << "\n\nFound following errors/warnings:\n\n" report << @@errors.join("\n") report << "\n" end report end
save_for_log(msg)
click to toggle source
# File lib/medreg/person_importer.rb, line 55 def save_for_log(msg) Medreg.log(msg) withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}" unless defined?(MiniTest) @@logInfo << withTimeStamp end
save_import_to_csv(filename)
click to toggle source
# File lib/medreg/person_importer.rb, line 84 def save_import_to_csv(filename) def add_item(info, item) info << item.to_s.gsub(',',' ') end field_names = ["ean13", "name", "firstname", "specialities", "capabilities", "may_dispense_narcotics", "remark_sell_drugs", "address_additional_lines", "address_canton", "address_fax", "address_fon", "address_location", "address_type", ] CSV.open(filename, "wb") do |csv| csv << field_names @@all_doctors.each{ |gln, doctor| maxlines = 1 maxlines = doctor[:specialities].size if doctor[:specialities].size > maxlines maxlines = doctor[:capabilities].size if doctor[:capabilities].size > maxlines maxlines = doctor[:addresses].size if doctor[:addresses].size > maxlines 0.upto(maxlines-1). each{ |idx| info = [] field_names[0..2].each{ |name| add_item(info, eval("doctor[:#{name}]")) } add_item(info, doctor[:specialities][idx]) add_item(info, doctor[:capabilities][idx]) add_item(info, doctor[:may_dispense_narcotics] ? 1 : 0) add_item(info, doctor[:remark_sell_drugs]) address = doctor[:addresses][idx] field_names[7..-1].each{ |name| add_item(info, eval("x = address.#{name.sub('address_','')}; x.is_a?(Array) ? x.join(\"\n\") : x")) } if address csv << info } } end end
save_import_to_yaml(filename)
click to toggle source
# File lib/medreg/person_importer.rb, line 80 def save_import_to_yaml(filename) File.open(filename, 'w+') {|f| f.write(@@all_doctors.to_yaml) } save_for_log "Saved #{@@all_doctors.size} doctors in #{filename}" end
setup_default_agent()
click to toggle source
# File lib/medreg/person_importer.rb, line 154 def setup_default_agent @agent = Mechanize.new @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0' @agent.redirect_ok = :all @agent.follow_meta_refresh_self = true @agent.follow_meta_refresh = :everwhere @agent.redirection_limit = 55 @agent.follow_meta_refresh = true @agent.ignore_bad_chunking = true if defined?(MiniTest) then @agent.log = Logger.new Mechanize_Log end @agent end
store_doctor(hash)
click to toggle source
# File lib/medreg/person_importer.rb, line 389 def store_doctor(hash) return unless hash action = nil pointer = nil doctor = Person.new doctor.ean13 = hash[:ean13] extract = [ :ean13, # :exam, :email, :firstname, :language, :name, :praxis, :salutation, :specialities, :capabilities, :title, :addresses, :may_dispense_narcotics, :may_sell_drugs, :remark_sell_drugs, ] doc_hash = {} extract.each { |key| if(value = hash[key]) case key when :praxis value = (value == 'Ja') when :specialities, :capabilities if(value.is_a?(String)) value = [value] elsif(value.is_a?(Array)) value = value end end doc_hash.store(key, value) end } end
update()
click to toggle source
# File lib/medreg/person_importer.rb, line 131 def update saved = @glns_to_import.clone r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb')) @state_yaml = r_loop.state_file.sub('.state', '.yaml') if File.exist?(@state_yaml) @@all_doctors = YAML.load_file(@state_yaml) @persons_prev_import = @@all_doctors.size puts "Got #{@persons_prev_import} items from previous import saved in #{@state_yaml}" end latest = get_latest_file save_for_log "parse_xls #{latest} specified GLN glns #{saved.inspect}" parse_xls(latest) @info_to_gln.keys get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import) save_import_to_yaml(Personen_YAML) save_import_to_csv(Personen_CSV) return @persons_created, @persons_prev_import, @persons_deleted, @persons_skipped ensure if @persons_created > 0 save_import_to_yaml(@state_yaml) save_import_to_csv(@state_yaml.sub('.yaml','.csv')) end end
Private Instance Methods
string_to_qualification(line, gln)
click to toggle source
# File lib/medreg/person_importer.rb, line 457 def string_to_qualification(line, gln) return nil if line.match(/Weiterbildungstitel|Weitere Qualifikationen|Beruf.*Jahr.*Land/im) m = line.match(Match_qualification_with_austria) if m infos = m[1..3].join(',').gsub("\r","").gsub(/\s\s+/, ' ').strip.split(/ ,|,/) # infos[1] = infos[1].to_i # transform year into an integer return infos.join(', ') else msg = "PROBLEM: could not find speciality for GLN #{gln} in line '#{line}'" @@errors << msg Medreg.log msg end nil end