class SJCBusSchedule::Crawler
Constants
- BASE_URL
- FILTERS
Public Class Methods
new(http: HTTParty, query: { number: "" })
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 14 def initialize(http: HTTParty, query: { number: "" }) @http = http validate_filter(query.keys.first) set_url(*query.first) end
Public Instance Methods
process()
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 20 def process response = HTTParty.get(@url) doc = Nokogiri::HTML(response) schedule_links(doc).map do |schedule_link| bus_url = "http://servicos2.sjc.sp.gov.br#{schedule_link["href"]}" response = HTTParty.get(bus_url) doc = Nokogiri::HTML(response) bus_parser = SJCBusSchedule::Parser::Bus.new(doc: doc) schedules_parser = SJCBusSchedule::Parser::Schedules.new(doc: doc) build_buses(bus_parser: bus_parser, schedules_parser: schedules_parser) end end
Private Instance Methods
build_buses(bus_parser:, schedules_parser:)
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 52 def build_buses(bus_parser:, schedules_parser:) SJCBusSchedule::Bus.new( number: bus_parser.number, name: bus_parser.name, direction: bus_parser.direction, itinerary: bus_parser.itinerary, note: bus_parser.note, schedules: build_schedules(schedules_parser: schedules_parser) ) end
build_schedules(schedules_parser:)
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 63 def build_schedules(schedules_parser:) schedules_parser.schedules.map do |schedule| SJCBusSchedule::Schedule.new( period: schedule[:period], hours: schedule[:hours], references: schedules_parser.references ) end end
schedule_links(doc)
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 48 def schedule_links(doc) doc.css("a[href*='/servicos/horario-e-itinerario.aspx?acao=d&id_linha=']") end
set_url(filter, query)
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 44 def set_url(filter, query) @url = "#{BASE_URL}?acao=p&opcao=#{FILTERS[filter]}&txt=#{URI.encode(query.to_s)}" end
validate_filter(filter)
click to toggle source
# File lib/sjc_bus_schedule/crawler.rb, line 40 def validate_filter(filter) raise ArgumentError, "valid filters: number, name, and itinerary" if FILTERS[filter].nil? end