class String

Public Instance Methods

basename() click to toggle source
# File lib/nlp_toolz/helpers/string_extended.rb, line 17
def basename
  self.split("/").last
end
clean_up() click to toggle source

ToDo: check abbr against list of ..

# File lib/nlp_toolz/helpers/string_extended.rb, line 6
def clean_up
  foo = self.encode('UTF-8', :invalid => :replace, :undef => :replace)
  bar = foo.gsub(/[\p{Pi}\p{Pf}"'„“‘’“”«»‹›]/,'')       # quotation marks
           .gsub(/\b\/\b/,' ')
           .gsub(/(\p{Ps})(.)/,'\1 \2')   # left braces
           .gsub(/(.)(\p{Pe})/,'\1 \2')   # right braces
           .gsub(/([\w]{3,})([\.])/,'\1 \2')  # abbrevation?
           .gsub(/(.)([,;:!?]+)/,'\1 \2')     # punctation
  bar
end