module TextCommon

Constants

VERSION

Public Class Methods

clean_bytes(string) click to toggle source
# File lib/text_common.rb, line 15
def self.clean_bytes(string)
  string.gsub("\u0000", '').gsub("\xE2",'').gsub("\xE3",'')    
end
clean_null_bytes(string) click to toggle source

stackoverflow.com/questions/29320369/coping-with-string-contains-null-byte-sent-from-users

# File lib/text_common.rb, line 11
def self.clean_null_bytes(string)
  return string.delete("\u0000")
end
clean_utf16(string) click to toggle source

TODO

# File lib/text_common.rb, line 20
def self.clean_utf16(string)
end
convert_k_to_number(text) click to toggle source
# File lib/text_common.rb, line 55
def self.convert_k_to_number(text)
  if text =~ /k/i
    #2.9k
    parts = text.split(/k/i)
    return (parts[0].to_f*1000).round(0)
  elsif text =~ /m/i
    #2.9k
    parts = text.split(/m/i)
    return (parts[0].to_f*1000000).round(0)
  end
  return text
end
convert_utf16_to_utf8(string) click to toggle source

www.justinweiss.com/articles/3-steps-to-fix-encoding-problems-in-ruby/

# File lib/text_common.rb, line 29
def self.convert_utf16_to_utf8(string)
  return string.encode('UTF-8', invalid: :replace, undef: :replace)
end
count_words(text) click to toggle source

tested todo strip html and then count words todo nick readability python sublime

# File lib/text_common.rb, line 44
def self.count_words(text)
  parts = text.split(" ")
  return parts.size
end
create_category(text) click to toggle source
# File lib/text_common.rb, line 98
def self.create_category(text)
end
create_hash_tags(text) click to toggle source
# File lib/text_common.rb, line 95
def self.create_hash_tags(text)
end
is_notable?(text) click to toggle source
return a true or false when a post is "NOTABLE" i.e. actually significant
look at gauging the economic amount / ECONOMIC VALUE of work that went into it: 
 * a lot of links are notable
 * a lof of words are notable
 * rich amounts of tag elements are notable

. * a short bit of content likely isn't

distinguish between a reblogged thing or a link blog item
# File lib/text_common.rb, line 90
def self.is_notable?(text)
end
present_key(str) click to toggle source
# File lib/text_common.rb, line 49
def self.present_key(str)
  parts = str.split("_")
  str = parts.map(&:capitalize).join(' ')
  return str
end
regex_to_string(regex) click to toggle source
# File lib/text_common.rb, line 23
def self.regex_to_string(regex)
  text = regex.to_s.split(':').last.sub(/\)$/,'').upcase
  return text.gsub(/\?/,'').gsub('.+','_').gsub('[ \-]','_').gsub(' ','_').strip
end
sha(text) click to toggle source

TESTED

# File lib/text_common.rb, line 36
def self.sha(text)
  #return "" if text.nil?
  return Digest::SHA1.hexdigest(text.to_s)
end
sha_it(text) click to toggle source
# File lib/text_common.rb, line 72
def self.sha_it(text)
  text = text.to_json if text.is_a?(Hash) || text.is_a?(Array)
  return Digest::SHA1.hexdigest(text.to_s)
end
strip_breaks(text) click to toggle source
# File lib/text_common.rb, line 68
def self.strip_breaks(text)
  return text.gsub(/\n/,'').gsub(/\r/,'')
end
suggest_hash_tags(text) click to toggle source
# File lib/text_common.rb, line 77
def self.suggest_hash_tags(text)
end