module TextCommon
Constants
- VERSION
Public Class Methods
clean_bytes(string)
click to toggle source
# File lib/text_common.rb, line 15 def self.clean_bytes(string) string.gsub("\u0000", '').gsub("\xE2",'').gsub("\xE3",'') end
clean_null_bytes(string)
click to toggle source
stackoverflow.com/questions/29320369/coping-with-string-contains-null-byte-sent-from-users
# File lib/text_common.rb, line 11 def self.clean_null_bytes(string) return string.delete("\u0000") end
clean_utf16(string)
click to toggle source
TODO
# File lib/text_common.rb, line 20 def self.clean_utf16(string) end
convert_k_to_number(text)
click to toggle source
# File lib/text_common.rb, line 55 def self.convert_k_to_number(text) if text =~ /k/i #2.9k parts = text.split(/k/i) return (parts[0].to_f*1000).round(0) elsif text =~ /m/i #2.9k parts = text.split(/m/i) return (parts[0].to_f*1000000).round(0) end return text end
convert_utf16_to_utf8(string)
click to toggle source
www.justinweiss.com/articles/3-steps-to-fix-encoding-problems-in-ruby/
# File lib/text_common.rb, line 29 def self.convert_utf16_to_utf8(string) return string.encode('UTF-8', invalid: :replace, undef: :replace) end
count_words(text)
click to toggle source
tested todo strip html and then count words todo nick readability python sublime
# File lib/text_common.rb, line 44 def self.count_words(text) parts = text.split(" ") return parts.size end
create_category(text)
click to toggle source
# File lib/text_common.rb, line 98 def self.create_category(text) end
is_notable?(text)
click to toggle source
return a true or false when a post is "NOTABLE" i.e. actually significant look at gauging the economic amount / ECONOMIC VALUE of work that went into it: * a lot of links are notable * a lof of words are notable * rich amounts of tag elements are notable
. * a short bit of content likely isn't
distinguish between a reblogged thing or a link blog item
# File lib/text_common.rb, line 90 def self.is_notable?(text) end
present_key(str)
click to toggle source
# File lib/text_common.rb, line 49 def self.present_key(str) parts = str.split("_") str = parts.map(&:capitalize).join(' ') return str end
regex_to_string(regex)
click to toggle source
# File lib/text_common.rb, line 23 def self.regex_to_string(regex) text = regex.to_s.split(':').last.sub(/\)$/,'').upcase return text.gsub(/\?/,'').gsub('.+','_').gsub('[ \-]','_').gsub(' ','_').strip end
sha(text)
click to toggle source
TESTED
# File lib/text_common.rb, line 36 def self.sha(text) #return "" if text.nil? return Digest::SHA1.hexdigest(text.to_s) end
sha_it(text)
click to toggle source
# File lib/text_common.rb, line 72 def self.sha_it(text) text = text.to_json if text.is_a?(Hash) || text.is_a?(Array) return Digest::SHA1.hexdigest(text.to_s) end
strip_breaks(text)
click to toggle source
# File lib/text_common.rb, line 68 def self.strip_breaks(text) return text.gsub(/\n/,'').gsub(/\r/,'') end