class Rumors::Api::Client::Base

Constants

DATA_HOST
SIMILARITY

Public Class Methods

new(text) click to toggle source
# File lib/rumors/api/client/base.rb, line 8
def initialize(text)
  @text = text.strip
  @urls = URI.extract(@text).map { |url| URI.parse(URI.escape(url)) }
end

Public Instance Methods

list_articles() click to toggle source
# File lib/rumors/api/client/base.rb, line 20
def list_articles
  body = build_body('list_articles', @text)
  post_request(body)
end

Private Instance Methods

build_body(util, argument) click to toggle source
# File lib/rumors/api/client/base.rb, line 132
def build_body(util, argument)
  current_util_class = "rumors/api/client/utils/#{util}".classify
  current_util = Object.const_get(current_util_class).new(argument)
  {
    query: current_util.purify_gql_query,
    variables: current_util.variables
  }
end
build_headers() click to toggle source
# File lib/rumors/api/client/base.rb, line 126
def build_headers
  {
    'Content-Type': 'application/json',
  }
end
calculate_similarity(contents) click to toggle source
# File lib/rumors/api/client/base.rb, line 89
def calculate_similarity(contents)
  # NOTE: https://github.com/jpmckinney/tf-idf-similarity
  most_like = {
    article_id: '',
    score: 0
  }

  original_text = TfIdfSimilarity::Document.new(@text)

  corpus = [original_text]
  contents.each do |h|
    corpus << h.values.first
  end

  model = TfIdfSimilarity::TfIdfModel.new(corpus)
  matrix = model.similarity_matrix

  contents.each do |h|
    article_id, text = h.to_a.flatten
    score = matrix[model.document_index(original_text), model.document_index(text)]
    next unless score > most_like[:score]

    most_like[:article_id] = article_id
    most_like[:score] = score
  end

  most_like
end
compare_urls(contents) click to toggle source
# File lib/rumors/api/client/base.rb, line 58
def compare_urls(contents)
  contents.each do |content|
    return content.keys.first if exist_same_url?(content['urls'])
  end
  nil
end
equal_arrays?(this_one, another) click to toggle source
# File lib/rumors/api/client/base.rb, line 85
def equal_arrays?(this_one, another)
  this_one.size == another.size && (this_one & another) == this_one
end
exist_same_url?(response_urls) click to toggle source
# File lib/rumors/api/client/base.rb, line 65
def exist_same_url?(response_urls)
  return unless response_urls

  response_urls.each do |response_url|
    @urls.each do |url|
      next unless response_url.host == url.host

      response_uris = response_url.path.split("/").reject { |path| path.empty? }
      uris = url.path.split("/").reject { |path| path.empty? }
      next unless equal_arrays?(response_uris, uris)
      return true if url.query.nil?
      querys = url.query.split('&')
      response_querys = response_url.query.split('&')
      return true if equal_arrays?(response_querys, querys)
    end
  end

  false
end
find_article(article_id) click to toggle source
# File lib/rumors/api/client/base.rb, line 43
def find_article(article_id)
  @articles['data']['ListArticles']['edges'].select { |h, v| h['node']['id'] == article_id }.first['node']
end
parse_content() click to toggle source
# File lib/rumors/api/client/base.rb, line 47
def parse_content
  # [{ 'article_id' => TfIdfSimilarity::Document(text), 'urls' => ["url"] }]
  parsed_articles = JSON.parse(@articles.body)
  parsed_articles['data']['ListArticles']['edges'].map do |article|
    node = article['node']
    content = Hash[node['id'], TfIdfSimilarity::Document.new(node['text'])]
    content['urls'] = node['hyperlinks'].nil? ? nil : node["hyperlinks"].map { |link| URI.parse(URI.escape(link["url"])) }
    content
  end
end
post_request(body) click to toggle source
# File lib/rumors/api/client/base.rb, line 118
def post_request(body)
  HTTParty.post(
    DATA_HOST,
    body: body.to_json,
    headers: build_headers
  )
end
return_article() click to toggle source
# File lib/rumors/api/client/base.rb, line 27
def return_article
  contents = parse_content
  return if contents.nil? || contents.empty?
  article_id = nil

  if @urls.any?
    article_id = compare_urls(contents)
  else
    most_like = calculate_similarity(contents)
    return unless most_like[:score] > SIMILARITY
    article_id = most_like[:article_id]
  end

  find_article(article_id) if article_id
end