class FindArticles
Public Class Methods
at_random(count: 100)
click to toggle source
# File lib/wikipedia_twitterbot/find_articles.rb, line 26 def self.at_random(count: 100) # As of December 2015, recently created articles have page ids under # 50_000_000. ids = Array.new(count) { Random.rand(60_000_000) } by_ids(ids) end
by_ids(ids)
click to toggle source
Entry points #
# File lib/wikipedia_twitterbot/find_articles.rb, line 8 def self.by_ids(ids) existing_ids = Article.all.pluck(:id) ids -= existing_ids page_data = get_pages(ids) article_data = page_data.select { |page| page['ns'] == 0 } article_data.select! { |page| existing_ids.exclude?(page['pageid']) } articles = [] article_data.each do |article| revision = article['revisions'][0] articles << Article.new(id: article['pageid'], title: article['title'], latest_revision: revision['revid'], latest_revision_datetime: revision['timestamp']) end articles end
by_title(title)
click to toggle source
# File lib/wikipedia_twitterbot/find_articles.rb, line 33 def self.by_title(title) existing = Article.find_by(title: title) return existing if existing.present? page_data = Wiki.query title_info_query(title) article_data = page_data.data['pages'].values.first article = Article.new(id: article_data['pageid'], title: article_data['title'], latest_revision: article_data['lastrevid'], latest_revision_datetime: article_data['touched']) return article unless article_data['redirect'] # If it's a redirect, return the redirect target instead. redirect_target = article.wikilinks.first return by_title(redirect_target) end
get_pages(article_ids)
click to toggle source
# File lib/wikipedia_twitterbot/find_articles.rb, line 71 def self.get_pages(article_ids) pages = {} threads = article_ids.in_groups(10, false).each_with_index.map do |group_of_ids, i| Thread.new(i) do pages = {} group_of_ids.each_slice(50) do |fifty_ids| rev_query = revisions_query(fifty_ids) rev_response = Wiki.query rev_query pages.merge! rev_response.data['pages'] end end end threads.each(&:join) pages.values end
revisions_query(article_ids)
click to toggle source
# File lib/wikipedia_twitterbot/find_articles.rb, line 65 def self.revisions_query(article_ids) { prop: 'revisions', pageids: article_ids, rvprop: 'userid|ids|timestamp' } end
title_info_query(title)
click to toggle source
# File lib/wikipedia_twitterbot/find_articles.rb, line 60 def self.title_info_query(title) { prop: 'info', titles: title } end
title_revisions_query(title)
click to toggle source
Internal methods #
# File lib/wikipedia_twitterbot/find_articles.rb, line 54 def self.title_revisions_query(title) { prop: 'revisions', titles: title, rvprop: 'userid|ids|timestamp' } end