class VbulletinScraper::V4::PostScraper

Public Class Methods

new(input) click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 6
def initialize(input)
    @data = nil
    if input.start_with? "http" || "www"
        @data = Nokogiri::HTML(open(input, :allow_redirections => :all))
        @data.encoding = "UTF-8"
    else
        @data = Nokogiri::HTML(input)
        @data.encoding = "UTF-8"
    end
end

Public Instance Methods

get_post_author() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 25
def get_post_author
    postAuthor = get_item_by_selector('.username')
    if postAuthor != nil
        return get_raw_text(postAuthor.text)
    end
    return ''
end
get_post_content() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 41
def get_post_content
    postContent = get_items_by_selector('.content blockquote')
    if postContent != nil
        postContentNoQuotes = Nokogiri::HTML.fragment(postContent.inner_html)
        postContentNoQuotes.search('div').remove
        postContentNoQuotes.search('comment()').remove
        return get_raw_text(postContentNoQuotes.to_s)
    end
    return ''
end
get_post_content_raw() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 33
def get_post_content_raw
    postContent = get_item_by_selector('.content blockquote')
    if postContent != nil
        return postContent.to_s
    end
    return ''
end
get_post_permalink() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 89
def get_post_permalink
    permalink = get_item_by_selector_with_attribute('.postcounter', 'href')
    if permalink != nil
        return get_raw_text(permalink)
    end
    return ''
end
get_post_submit_datetime() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 52
def get_post_submit_datetime
    dateFormat = '%m-%d-%Y'
    timeFormat = '%I:%M %P'
    dateTimeFormat = dateFormat + ', ' + timeFormat
    
    rawDateTimeString = get_item_by_selector('.date')
    if rawDateTimeString != nil
        rawDateTimeString = get_raw_text(rawDateTimeString.text)
        begin
            if rawDateTimeString.include? 'Yesterday'
                rawDateTimeString = rawDateTimeString.gsub('Yesterday', '')
                formattedDateTimeString = Date.yesterday.strftime(dateFormat) + rawDateTimeString
                submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
            elsif rawDateTimeString.include? 'Today'
                rawDateTimeString = rawDateTimeString.gsub('Today', '')
                formattedDateTimeString = Date.today.strftime(dateFormat) + rawDateTimeString
                submitDateTime = DateTime.strptime(formattedDateTimeString, dateTimeFormat)
            else
                submitDateTime = DateTime.strptime(rawDateTimeString, dateTimeFormat)
            end
        rescue ArgumentError
            submitDateTime = nil
        end
        return submitDateTime
    end
    return nil
end
get_quotes() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 80
def get_quotes
    quotes = get_items_by_selector('.bbcode_container')
    if quotes != nil
        return get_items_by_selector('.bbcode_container')
    else
        return []
    end
end
get_vbulletin_post_id() click to toggle source
# File lib/vbulletin_scraper/V4/post_scraper.rb, line 17
def get_vbulletin_post_id
    vbulletinPostId = get_item_by_selector_with_attribute('.nodecontrols a', 'name')
    if vbulletinPostId != nil
        return get_raw_text(vbulletinPostId.gsub('post', ''))
    end
    return ''
end