class Lobbyliste::Factories::ListFactory
This class is used to build the list from raw data
Attributes
data[R]
Public Class Methods
build(text_data,html_data)
click to toggle source
@return [Lobbyliste::List]
# File lib/lobbyliste/factories/list_factory.rb, line 8 def self.build(text_data,html_data) factory = new(text_data,html_data) ::Lobbyliste::List.new( factory.organisations, factory.tags, factory.abbreviations, factory.last_update ) end
new(text_data,html_data)
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 18 def initialize(text_data,html_data) @text_data = text_data @html_data = html_data @lines = text_data.each_line.to_a.map(&:chomp) @organisations = nil @tags = nil @abbreviations = nil @names = nil end
Public Instance Methods
abbreviations()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 68 def abbreviations return @abbreviations if @abbreviations abbreviations = Hash.new{|h,k| h[k] = []} current_abbr = "A" extract_abbreviation_data.each do |line| if line.match(/^[A-ZÄÖÜ][A-ZÄÖÜa-zäöüß]+$/) && [current_abbr[0],current_abbr[0].next].include?(line[0]) current_abbr = line elsif line.match(/^\– \d+/) id = line.match(/^\– (\d+)/)[1].to_i abbreviations[current_abbr] << id end end @abbreviations = abbreviations end
last_update()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 84 def last_update date = @text_data.match(/^Stand: (\d\d\.\d\d\.\d\d\d\d)/) Date.parse(date[1]) end
names()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 42 def names extract_names unless @names @names end
organisations()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 30 def organisations return @organisations if @organisations @organisations = organisations_data.map do |organisation_data| id = organisation_data[0].to_i name = names[id] tags = tags_for_organisation(id) abbreviations = abbreviations_for_organisation(id) ::Lobbyliste::Factories::OrganisationFactory.build(name,organisation_data,tags,abbreviations) end end
Private Instance Methods
abbreviations_for_organisation(organisation_id)
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 138 def abbreviations_for_organisation(organisation_id) abbreviations. select {|_,organisation_ids| organisation_ids.include?(organisation_id)}. map(&:first) end
begin_organisation?(line)
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 174 def begin_organisation?(line) line =~/^N a m e u n d S i t z \, 1 \. A d r e s s e$/ end
extract_abbreviation_data()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 124 def extract_abbreviation_data start_line = @lines.index {|line| line == "Verzeichnis der anderen Namensformen"} @lines. drop(start_line+1). reject {|line| ignored_line?(line)} end
extract_names()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 145 def extract_names names = {} regexp = Regexp.compile(/<p><b>(\d+)\n<\/b>N a m e u n d S i t z \, 1 \. A d r e s s e\n<\/p>\n<p><b>(.*?)\n<\/b>/m) @html_data.to_enum(:scan, regexp).each do match = Regexp.last_match names[match[1].to_i] = CGI.unescape_html(match[2].gsub("\n"," ")) end @names = names end
extract_tag_data()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 116 def extract_tag_data start_line = @lines.index {|line| line == "Stichwortverzeichnis"} @lines. drop(start_line+1). take_while {|line| !(line == "Verzeichnis der anderen Namensformen")}. reject {|line| ignored_line?(line)} end
ignored_line?(line)
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 159 def ignored_line?(line) regexps = [ /^– \d+ –$/, /^Aktuelle Fassung der öffentlichen Liste/, /^Die Zahlen verweisen auf die fortlaufenden Nummern im Hauptteil/, /^\n$/ ] regexps.any? {|regexp| line.match(regexp)} end
organisations_data()
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 92 def organisations_data start_lines = [] end_line = nil @lines.each_with_index do |line,i| if possible_organisation_id?(line) && begin_organisation?(@lines[i+1]) start_lines << i elsif line == "Stichwortverzeichnis" end_line = i - 1 break end end organisations_data = start_lines.each_cons(2).map do |a,b| @lines[a..b-1] end organisations_data. push(@lines[start_lines.last..end_line]). map { |data| data.reject {|line| ignored_line?(line)} } end
possible_organisation_id?(line)
click to toggle source
# File lib/lobbyliste/factories/list_factory.rb, line 170 def possible_organisation_id?(line) line =~ /^\d+$/ end