class Persian::Text

Persian Text class

Dynamic methods

Class text

Persian Text class Digest Persian texts

Persian Text class Digest Persian texts

Constants

AR_FA_CHAR
BRACKETS
END_VOWEL
EN_FA_KEYBOARD_CHAR

Exchange Standard QWERTY Keyboard layout

HAREKATS
SIGNS

Public Class Methods

add_zwnj(text, point) click to toggle source
# File lib/persian/text/text.rb, line 108
def self.add_zwnj(text, point)
  text = text.scan(/^.{#{point}}|.+/).join('‌')
  text
end
ast(text) click to toggle source

Resplace ست with sاست if lastest character before s is ا

# File lib/persian/text/text.rb, line 63
def self.ast(text)
  a = 'ا'
  ast = 'است'
  st = 'ست'

  text.gsub!(/(#{a})\s(#{ast})/, '\1' + st)
  text
end
character(text) click to toggle source

Replace Arabic characters with Persian characters.

# File lib/persian/text/text.rb, line 8
def self.character(text)
  AR_FA_CHAR.each { |k, v| text.gsub!(k, v) }
  text
end
constant?(const_name) click to toggle source
# File lib/persian/dynamic.rb, line 29
def self.constant?(const_name)
  Persian.const_defined?(const_name)
end
english_to_persian_char(text) click to toggle source

Replace english characters with it's key persian value on standard persian keyboard For now just support QWERTY keyboard

# File lib/persian/text/keyboard.rb, line 9
def self.english_to_persian_char(text)
  EN_FA_KEYBOARD_CHAR.each { |k, v| text.gsub!(k, v) }
  text
end
fix_y_after_vowel(text) click to toggle source

Add '‌ی' after names that end with ه, ا, و

# File lib/persian/text/text.rb, line 49
def self.fix_y_after_vowel(text)
  text += '‌ی' if END_VOWEL.include? text[-1]
  text
end
general_brackets(text, left = '«', right = '»') click to toggle source

Replace general brackets with one type brackets Default: 0xAB & 0xBB

# File lib/persian/text/text.rb, line 40
def self.general_brackets(text, left = '«', right = '»')
  text.gsub!(/"(.*?)"/, left + '\1' + right)
  text.gsub!(/\[(.*?)\]/, left + '\1' + right)
  text.gsub!(/\{(.*?)\}/, left + '\1' + right)
  text.gsub!(/\((.*?)\)/, left + '\1' + right)
  text
end
get_constant(const_name) click to toggle source
# File lib/persian/dynamic.rb, line 33
def self.get_constant(const_name)
  Persian.const_get(const_name)
end
keshide(text) click to toggle source

Remove keshide from text

# File lib/persian/text/text.rb, line 73
def self.keshide(text)
  text.gsub!(/ـ+/, '')
  text
end
method_missing(method, *arg, &block) click to toggle source
Calls superclass method
# File lib/persian/dynamic.rb, line 6
def self.method_missing(method, *arg, &block)
  # remove methods
  if method.to_s =~ /^remove_\w*/
    # get method characters without remove_
    char = method.to_s.gsub(/^remove_(\w*)/, '\1').upcase

    # execute remove_character if char is a valid constant
    if constant? char
      text = Persian.rm_char(arg[0], get_constant(char))
      text
    else
      super
    end
  else
    # Run default no method error
    super
  end
end
persian_to_english_char(text) click to toggle source

Replace standard persian keyboard characters with it's key persian value on english keyboard For now just support QWERTY keyboard

# File lib/persian/text/keyboard.rb, line 16
def self.persian_to_english_char(text)
  EN_FA_KEYBOARD_CHAR.each { |v, k| text.gsub!(k, v) }
  text
end
remove_brackets(text) click to toggle source

Remove All barckets

# File lib/persian/text/text.rb, line 27
def self.remove_brackets(text)
  BRACKETS.each { |v| text.gsub!(v, '') }
  text
end
remove_extra_question_mark(text) click to toggle source
# File lib/persian/text/text.rb, line 102
def self.remove_extra_question_mark(text)
  mark = '؟'
  text.gsub!(/(#{mark}){2,}/, '\1')
  text
end
remove_extra_spaces(text) click to toggle source

Remove extra spaces in text

# File lib/persian/text/text.rb, line 14
def self.remove_extra_spaces(text)
  text = text.split.join(' ')
  text = text.split('‌').join('‌')
  text
end
remove_harekats(text) click to toggle source

Remove Arabic harecats from text

# File lib/persian/text/text.rb, line 21
def self.remove_harekats(text)
  HAREKATS.each { |v| text.gsub!(v, '') }
  text
end
remove_noghtevirgool_baz_start(text) click to toggle source
# File lib/persian/text/text.rb, line 152
def self.remove_noghtevirgool_baz_start(text)
  noghtevirgool = '؛'

  regex = /([\(\[«])[ ‌]*[#{noghtevirgool}]/
  text.gsub!(regex, '\1')
  text
end
remove_noghtevirgool_para_end(text) click to toggle source
# File lib/persian/text/text.rb, line 146
def self.remove_noghtevirgool_para_end(text)
  noghtevirgool = '؛'
  text.gsub!(/#{noghtevirgool}(\n|$)/, '.\1')
  text
end
remove_question_exclamation(text) click to toggle source
# File lib/persian/text/text.rb, line 113
def self.remove_question_exclamation(text)
  question = '؟'
  exclamation = '!'
  text.gsub!(/(#{question})+(#{exclamation})+/, '\1\2')
  text
end
remove_signs(text, with = '') click to toggle source

Remove Persian signs

# File lib/persian/text/text.rb, line 33
def self.remove_signs(text, with = '')
  SIGNS.each { |v| text.gsub!(v, with) }
  text
end
remove_signs_after_noghtevirgool(text) click to toggle source
# File lib/persian/text/text.rb, line 133
def self.remove_signs_after_noghtevirgool(text)
  signs = '[\.،؛:!؟\-…]'
  noghtevirgool = '؛'
  text.gsub!(/(#{noghtevirgool})[#{signs}]+/, '\1')
  text
end
remove_signs_after_virgool(text) click to toggle source
# File lib/persian/text/text.rb, line 167
def self.remove_signs_after_virgool(text)
  pattern = /(،)([ ‌]+)?([،؛:!؟\-][\.،؛:!؟\-]*|\.(?!\.))/

  text.gsub!(pattern, '\1\2')
  text
end
remove_space_before_virgool(text) click to toggle source
# File lib/persian/text/text.rb, line 160
def self.remove_space_before_virgool(text)
  virgool = '،'

  text.gsub!(/\s+(#{virgool})/, '\1')
  text
end
remove_space_noghtevirgool(text) click to toggle source
# File lib/persian/text/text.rb, line 127
def self.remove_space_noghtevirgool(text)
  noghtevirgool = '؛'
  text.gsub!(/\s+(#{noghtevirgool})/, '\1')
  text
end
remove_stopwords(text) click to toggle source
# File lib/persian/text/text.rb, line 120
def self.remove_stopwords(text)
  stopwords = ['و', 'در', 'به', 'این', 'با', 'از', 'که', 'است', 'را']
  words = text.scan(/\S+/)
  keywords = words.select { |word| !stopwords.include?(word) }
  keywords.join(' ')
end
replace_e_y(text) click to toggle source

Use ی instead of ئ if next char is ی Example پائیز => پاییز

# File lib/persian/text/text.rb, line 80
def self.replace_e_y(text)
  e = 'ئ'
  y = 'ی'
  text.gsub!(/#{e}(#{y})/, '\1\1')
  text
end
replace_zwnj_mi(text) click to toggle source

Replace Space with Zero-width none-joiner after می and نمی

# File lib/persian/text/text.rb, line 55
def self.replace_zwnj_mi(text)
  mi = 'می'
  nmi = 'نمی'
  text.gsub!(/(^|\s)(#{mi}|#{nmi})\s(\S+)/, '\1\2‌\3')
  text
end
respond_to_missing?(method, include_private = false) click to toggle source
Calls superclass method
# File lib/persian/dynamic.rb, line 25
def self.respond_to_missing?(method, include_private = false)
  method.to_s.start_with?('remove_') || super
end
rm_char(text, char) click to toggle source
# File lib/persian/text/text.rb, line 181
def self.rm_char(text, char)
  text.gsub!(/(#{char})/, '')
  text
end
rm_virgool_in_end(text) click to toggle source
# File lib/persian/text/text.rb, line 186
def self.rm_virgool_in_end(text)
  text.gsub!(/(،)([ ‌\n]+)?$/, '.\2')
  text
end
space_after_dot(text) click to toggle source
# File lib/persian/text/text.rb, line 191
def self.space_after_dot(text)
  text.gsub!(/(\.)(\S)/, '\1 \2')
  text
end
space_after_noghtevirgool(text) click to toggle source
# File lib/persian/text/text.rb, line 140
def self.space_after_noghtevirgool(text)
  noghtevirgool = '؛'
  text.gsub!(/(#{noghtevirgool})(\S)/, '\1 \2')
  text
end
space_after_virgool(text) click to toggle source
# File lib/persian/text/text.rb, line 174
def self.space_after_virgool(text)
  virgool = '،'

  text.gsub!(/(#{virgool})(\S)/, '\1 \2')
  text
end
suffix(text) click to toggle source
# File lib/persian/text/text.rb, line 92
def self.suffix(text)
  tar = 'تر'
  ee = 'ی'
  n = 'ن'
  ha = 'ها'
  ye = 'ی'
  text.gsub!(/\s+(#{tar}(#{ee}(#{n})?)?)|(#{ha}(#{ye})?)\s+/, '‌\1')
  text
end
three_dots(text) click to toggle source
# File lib/persian/text/text.rb, line 87
def self.three_dots(text)
  text.gsub!(/\.{3,}/, '…')
  text
end