class VbulletinScraper::V4::PostScraper
Public Class Methods
new(input)
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 6 def initialize(input) @data = nil if input.start_with? "http" || "www" @data = Nokogiri::HTML(open(input, :allow_redirections => :all)) @data.encoding = "UTF-8" else @data = Nokogiri::HTML(input) @data.encoding = "UTF-8" end end
Public Instance Methods
get_post_content()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 41 def get_post_content postContent = get_items_by_selector('.content blockquote') if postContent != nil postContentNoQuotes = Nokogiri::HTML.fragment(postContent.inner_html) postContentNoQuotes.search('div').remove postContentNoQuotes.search('comment()').remove return get_raw_text(postContentNoQuotes.to_s) end return '' end
get_post_content_raw()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 33 def get_post_content_raw postContent = get_item_by_selector('.content blockquote') if postContent != nil return postContent.to_s end return '' end
get_post_permalink()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 89 def get_post_permalink permalink = get_item_by_selector_with_attribute('.postcounter', 'href') if permalink != nil return get_raw_text(permalink) end return '' end
get_post_submit_datetime()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 52 def get_post_submit_datetime dateFormat = '%m-%d-%Y' timeFormat = '%I:%M %P' dateTimeFormat = dateFormat + ', ' + timeFormat rawDateTimeString = get_item_by_selector('.date') if rawDateTimeString != nil rawDateTimeString = get_raw_text(rawDateTimeString.text) begin if rawDateTimeString.include? 'Yesterday' rawDateTimeString = rawDateTimeString.gsub('Yesterday', '') formattedDateTimeString = Date.yesterday.strftime(dateFormat) + rawDateTimeString submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat) elsif rawDateTimeString.include? 'Today' rawDateTimeString = rawDateTimeString.gsub('Today', '') formattedDateTimeString = Date.today.strftime(dateFormat) + rawDateTimeString submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat) else submitDateTime = DateTime.strptime(rawDateTimeString, dateTimeFormat) end rescue ArgumentError submitDateTime = nil end return submitDateTime end return nil end
get_quotes()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 80 def get_quotes quotes = get_items_by_selector('.bbcode_container') if quotes != nil return get_items_by_selector('.bbcode_container') else return [] end end
get_vbulletin_post_id()
click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 17 def get_vbulletin_post_id vbulletinPostId = get_item_by_selector_with_attribute('.nodecontrols a', 'name') if vbulletinPostId != nil return get_raw_text(vbulletinPostId.gsub('post', '')) end return '' end