module CurateTumblr::Tumblr::ExtractLinks

Constants

REGEXS_EXTRACTS_TUMBLR

REGEX_EXTRACT_TUMBLR = “href=.*.tumblr.com”

REGEX_EXTERNALS_FLICKR = [ “".flickr.com/photos/."” ]

REGEX_TUMBLR_GLOBALKEY_REBLOG_URL
REGEX_TUMBLR_KEY_REBLOG_URL
REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL
REGEX_TUMBLR_POST_SLUG_URL
REGEX_TUMBLR_POST_URL
REGEX_TUMBLR_REBLOG_URL
REGEX_TUMBLR_URL
REGEX_TUMBLR_URL_REBLOG_URL
REGEX_TUMBLR_URL_REBLOG_URL_SPEC

Attributes

Public Class Methods

get_external_urls_from_text( text ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 146
def get_external_urls_from_text( text )
  raise "text #{text.class} is not a String" if !text.is_a? String
  text.force_encoding 'utf-8'
  ar_urls = get_links_from_regexs_caption( REGEX_EXTERNALS_LINKS, text )
  ar_urls = get_urls_not_tumblr( ar_urls )
  ar_urls = CurateTumblr.get_format_ar_urls( ar_urls )
  Set.new( ar_urls ).to_a
end
get_post_id_from_post_url( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 41
def get_post_id_from_post_url( url )
  return false if !tumblr_post_url?( url )
  return CurateTumblr.format_post_id( url.scan( /#{REGEX_TUMBLR_POST_SLUG_URL}/ ).first.gsub('post/', '') ) if /#{REGEX_TUMBLR_POST_SLUG_URL}/ =~ url
  return CurateTumblr.format_post_id( url.scan( /#{REGEX_TUMBLR_POST_URL}/ ).first.gsub('post/', '') ) if /#{REGEX_TUMBLR_POST_URL}/ =~ url
  false
end
get_post_id_from_reblog_url( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 48
def get_post_id_from_reblog_url( url )
  return false if !tumblr_reblog_url?( url )
  CurateTumblr.format_post_id( url.scan(/#{REGEX_TUMBLR_REBLOG_URL}/).first.gsub('reblog/', '') )
end
get_reblog_key_from_reblog_url( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 53
def get_reblog_key_from_reblog_url( url )
  return false if !tumblr_reblog_url?( url )
  global_key = url.scan(/#{REGEX_TUMBLR_GLOBALKEY_REBLOG_URL}/).first.gsub('reblog/', '')
  return false if global_key.empty?
  return CurateTumblr.format_post_reblog_key( global_key.scan( /#{REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL}/ ).first ) if /#{REGEX_TUMBLR_KEY_REDIRECT_REBLOG_URL}/ =~ global_key
  return CurateTumblr.format_post_reblog_key( global_key.scan( /#{REGEX_TUMBLR_KEY_REBLOG_URL}/ ).first ) if /#{REGEX_TUMBLR_KEY_REBLOG_URL}/ =~ global_key
  false
end
get_tumblr_from_reblog_url( reblog_url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 28
def get_tumblr_from_reblog_url( reblog_url )
  return false if !tumblr_reblog_url?( reblog_url )  
  if /#{REGEX_TUMBLR_URL_REBLOG_URL_SPEC}/ =~ reblog_url
    tumblr_url = reblog_url.scan( /#{REGEX_TUMBLR_URL_REBLOG_URL_SPEC}/ ).first 
  elsif /#{REGEX_TUMBLR_URL_REBLOG_URL}/ =~ reblog_url
    tumblr_url = reblog_url.scan( /#{REGEX_TUMBLR_URL_REBLOG_URL}/ ).first
  else
    tumblr_url = ""
  end 
  CurateTumblr.format_tumblr_url!( tumblr_url )
  tumblr_url
end
get_tumblr_url( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 21
def get_tumblr_url( url )
  return false if !tumblr_url?( url )      
  tumblr_url = url.scan(/#{REGEX_TUMBLR_URL}/).first
  CurateTumblr.format_tumblr_url!( tumblr_url )
  tumblr_url
end
get_tumblr_urls_from_text( text ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 138
def get_tumblr_urls_from_text( text )
  raise "text #{text.class} is not a String" if !text.is_a? String
  ar_urls = get_tumblr_links_from_regexs_caption( REGEXS_EXTRACTS_TUMBLR, text )
  ar_urls = get_urls_only_tumblr( ar_urls )
  ar_urls = CurateTumblr.get_format_ar_tumblr_urls( ar_urls )
  Set.new( ar_urls ).to_a
end
get_urls_not_tumblr( ar_urls ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 130
def get_urls_not_tumblr( ar_urls )
   ar_new_urls = []
   ar_urls.each do |url|
     ar_new_urls << url if !simple_tumblr_url?( url ) 
   end
   ar_new_urls
 end
get_urls_only_tumblr( ar_urls ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 122
def get_urls_only_tumblr( ar_urls )
  ar_new_urls = []
  ar_urls.each do |url|
    ar_new_urls << url if tumblr_url?( url )
  end
  ar_new_urls
end
simple_tumblr_url?( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 62
def simple_tumblr_url?( url )
  return true if /#{REGEX_TUMBLR_URL}/ =~ url
  false 
end
tumblr_post_url?( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 73
def tumblr_post_url?( url )
  return false if !tumblr_url?( url )
  return true if /#{REGEX_TUMBLR_POST_SLUG_URL}/ =~ url 
  return true if /#{REGEX_TUMBLR_POST_URL}/ =~ url 
  false
end
tumblr_reblog_url?( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 80
def tumblr_reblog_url?( url )
  return false if !tumblr_url?( url )
  return true if /#{REGEX_TUMBLR_REBLOG_URL}/ =~ url 
  false
end
tumblr_url?( url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 67
def tumblr_url?( url )
  return true if simple_tumblr_url?( url ) &&
    !url.index("<a ") && !url.index("</a>") && !url.index("</a>")
  false 
end
valid_tumblr_url?( tumblr_url ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 86
def valid_tumblr_url?( tumblr_url )
  REGEXS_EXTRACTS_TUMBLR.each do |regex|
    if /#{regex}/ =~ tumblr_url 
    else
      return false
    end
    true
  end
end

Public Instance Methods

extract_links_caption_from_post( hash_post ) click to toggle source
# File lib/curate_tumblr/tumblr/extract_links.rb, line 175
def extract_links_caption_from_post( hash_post )
  return false if !hash_post.has_key?( "caption" )
  add_tofollow_tumblr_links_from_caption( hash_post["caption"], CurateTumblr.get_source_from_hash_post( hash_post ) )
  add_external_links_from_caption( hash_post["caption"] )
  true        
end
get_reblog_key_from_post( tumblr_url, post_id ) click to toggle source

— utils —

# File lib/curate_tumblr/tumblr/extract_links.rb, line 184
def get_reblog_key_from_post( tumblr_url, post_id )
  tumblr_url = @tumblr_name if tumblr_url.empty?
  CurateTumblr.get_reblog_key_from_hash_post( get_hash_post( tumblr_url, post_id ) )    
end