class Rumors::Api::Client::Base
Constants
- DATA_HOST
- SIMILARITY
Public Class Methods
new(text)
click to toggle source
# File lib/rumors/api/client/base.rb, line 8 def initialize(text) @text = text.strip @urls = URI.extract(@text).map { |url| URI.parse(URI.escape(url)) } end
Public Instance Methods
list_articles()
click to toggle source
# File lib/rumors/api/client/base.rb, line 20 def list_articles body = build_body('list_articles', @text) post_request(body) end
search()
click to toggle source
# File lib/rumors/api/client/base.rb, line 13 def search @articles = list_articles return unless @articles.code == 200 return_article end
Private Instance Methods
build_body(util, argument)
click to toggle source
# File lib/rumors/api/client/base.rb, line 132 def build_body(util, argument) current_util_class = "rumors/api/client/utils/#{util}".classify current_util = Object.const_get(current_util_class).new(argument) { query: current_util.purify_gql_query, variables: current_util.variables } end
build_headers()
click to toggle source
# File lib/rumors/api/client/base.rb, line 126 def build_headers { 'Content-Type': 'application/json', } end
calculate_similarity(contents)
click to toggle source
# File lib/rumors/api/client/base.rb, line 89 def calculate_similarity(contents) # NOTE: https://github.com/jpmckinney/tf-idf-similarity most_like = { article_id: '', score: 0 } original_text = TfIdfSimilarity::Document.new(@text) corpus = [original_text] contents.each do |h| corpus << h.values.first end model = TfIdfSimilarity::TfIdfModel.new(corpus) matrix = model.similarity_matrix contents.each do |h| article_id, text = h.to_a.flatten score = matrix[model.document_index(original_text), model.document_index(text)] next unless score > most_like[:score] most_like[:article_id] = article_id most_like[:score] = score end most_like end
compare_urls(contents)
click to toggle source
# File lib/rumors/api/client/base.rb, line 58 def compare_urls(contents) contents.each do |content| return content.keys.first if exist_same_url?(content['urls']) end nil end
equal_arrays?(this_one, another)
click to toggle source
# File lib/rumors/api/client/base.rb, line 85 def equal_arrays?(this_one, another) this_one.size == another.size && (this_one & another) == this_one end
exist_same_url?(response_urls)
click to toggle source
# File lib/rumors/api/client/base.rb, line 65 def exist_same_url?(response_urls) return unless response_urls response_urls.each do |response_url| @urls.each do |url| next unless response_url.host == url.host response_uris = response_url.path.split("/").reject { |path| path.empty? } uris = url.path.split("/").reject { |path| path.empty? } next unless equal_arrays?(response_uris, uris) return true if url.query.nil? querys = url.query.split('&') response_querys = response_url.query.split('&') return true if equal_arrays?(response_querys, querys) end end false end
find_article(article_id)
click to toggle source
# File lib/rumors/api/client/base.rb, line 43 def find_article(article_id) @articles['data']['ListArticles']['edges'].select { |h, v| h['node']['id'] == article_id }.first['node'] end
parse_content()
click to toggle source
# File lib/rumors/api/client/base.rb, line 47 def parse_content # [{ 'article_id' => TfIdfSimilarity::Document(text), 'urls' => ["url"] }] parsed_articles = JSON.parse(@articles.body) parsed_articles['data']['ListArticles']['edges'].map do |article| node = article['node'] content = Hash[node['id'], TfIdfSimilarity::Document.new(node['text'])] content['urls'] = node['hyperlinks'].nil? ? nil : node["hyperlinks"].map { |link| URI.parse(URI.escape(link["url"])) } content end end
post_request(body)
click to toggle source
# File lib/rumors/api/client/base.rb, line 118 def post_request(body) HTTParty.post( DATA_HOST, body: body.to_json, headers: build_headers ) end
return_article()
click to toggle source
# File lib/rumors/api/client/base.rb, line 27 def return_article contents = parse_content return if contents.nil? || contents.empty? article_id = nil if @urls.any? article_id = compare_urls(contents) else most_like = calculate_similarity(contents) return unless most_like[:score] > SIMILARITY article_id = most_like[:article_id] end find_article(article_id) if article_id end