class Persian::Text
Persian
Text
class-
Dynamic methods
Class text
Constants
- AR_FA_CHAR
- BRACKETS
- END_VOWEL
- EN_FA_KEYBOARD_CHAR
Exchange Standard QWERTY Keyboard layout
- HAREKATS
- SIGNS
Public Class Methods
# File lib/persian/text/text.rb, line 108 def self.add_zwnj(text, point) text = text.scan(/^.{#{point}}|.+/).join('') text end
Resplace ست with sاست if lastest character before s is ا
# File lib/persian/text/text.rb, line 63 def self.ast(text) a = 'ا' ast = 'است' st = 'ست' text.gsub!(/(#{a})\s(#{ast})/, '\1' + st) text end
Replace Arabic characters with Persian
characters.
# File lib/persian/text/text.rb, line 8 def self.character(text) AR_FA_CHAR.each { |k, v| text.gsub!(k, v) } text end
# File lib/persian/dynamic.rb, line 29 def self.constant?(const_name) Persian.const_defined?(const_name) end
Replace english characters with it's key persian value on standard persian keyboard For now just support QWERTY keyboard
# File lib/persian/text/keyboard.rb, line 9 def self.english_to_persian_char(text) EN_FA_KEYBOARD_CHAR.each { |k, v| text.gsub!(k, v) } text end
Add 'ی' after names that end with ه, ا, و
# File lib/persian/text/text.rb, line 49 def self.fix_y_after_vowel(text) text += 'ی' if END_VOWEL.include? text[-1] text end
Replace general brackets with one type brackets Default: 0xAB & 0xBB
# File lib/persian/text/text.rb, line 40 def self.general_brackets(text, left = '«', right = '»') text.gsub!(/"(.*?)"/, left + '\1' + right) text.gsub!(/\[(.*?)\]/, left + '\1' + right) text.gsub!(/\{(.*?)\}/, left + '\1' + right) text.gsub!(/\((.*?)\)/, left + '\1' + right) text end
# File lib/persian/dynamic.rb, line 33 def self.get_constant(const_name) Persian.const_get(const_name) end
Remove keshide from text
# File lib/persian/text/text.rb, line 73 def self.keshide(text) text.gsub!(/ـ+/, '') text end
# File lib/persian/dynamic.rb, line 6 def self.method_missing(method, *arg, &block) # remove methods if method.to_s =~ /^remove_\w*/ # get method characters without remove_ char = method.to_s.gsub(/^remove_(\w*)/, '\1').upcase # execute remove_character if char is a valid constant if constant? char text = Persian.rm_char(arg[0], get_constant(char)) text else super end else # Run default no method error super end end
Replace standard persian keyboard characters with it's key persian value on english keyboard For now just support QWERTY keyboard
# File lib/persian/text/keyboard.rb, line 16 def self.persian_to_english_char(text) EN_FA_KEYBOARD_CHAR.each { |v, k| text.gsub!(k, v) } text end
Remove All barckets
# File lib/persian/text/text.rb, line 27 def self.remove_brackets(text) BRACKETS.each { |v| text.gsub!(v, '') } text end
# File lib/persian/text/text.rb, line 102 def self.remove_extra_question_mark(text) mark = '؟' text.gsub!(/(#{mark}){2,}/, '\1') text end
Remove extra spaces in text
# File lib/persian/text/text.rb, line 14 def self.remove_extra_spaces(text) text = text.split.join(' ') text = text.split('').join('') text end
Remove Arabic harecats from text
# File lib/persian/text/text.rb, line 21 def self.remove_harekats(text) HAREKATS.each { |v| text.gsub!(v, '') } text end
# File lib/persian/text/text.rb, line 152 def self.remove_noghtevirgool_baz_start(text) noghtevirgool = '؛' regex = /([\(\[«])[ ]*[#{noghtevirgool}]/ text.gsub!(regex, '\1') text end
# File lib/persian/text/text.rb, line 146 def self.remove_noghtevirgool_para_end(text) noghtevirgool = '؛' text.gsub!(/#{noghtevirgool}(\n|$)/, '.\1') text end
# File lib/persian/text/text.rb, line 113 def self.remove_question_exclamation(text) question = '؟' exclamation = '!' text.gsub!(/(#{question})+(#{exclamation})+/, '\1\2') text end
Remove Persian
signs
# File lib/persian/text/text.rb, line 33 def self.remove_signs(text, with = '') SIGNS.each { |v| text.gsub!(v, with) } text end
# File lib/persian/text/text.rb, line 133 def self.remove_signs_after_noghtevirgool(text) signs = '[\.،؛:!؟\-…]' noghtevirgool = '؛' text.gsub!(/(#{noghtevirgool})[#{signs}]+/, '\1') text end
# File lib/persian/text/text.rb, line 167 def self.remove_signs_after_virgool(text) pattern = /(،)([ ]+)?([،؛:!؟\-][\.،؛:!؟\-]*|\.(?!\.))/ text.gsub!(pattern, '\1\2') text end
# File lib/persian/text/text.rb, line 160 def self.remove_space_before_virgool(text) virgool = '،' text.gsub!(/\s+(#{virgool})/, '\1') text end
# File lib/persian/text/text.rb, line 127 def self.remove_space_noghtevirgool(text) noghtevirgool = '؛' text.gsub!(/\s+(#{noghtevirgool})/, '\1') text end
# File lib/persian/text/text.rb, line 120 def self.remove_stopwords(text) stopwords = ['و', 'در', 'به', 'این', 'با', 'از', 'که', 'است', 'را'] words = text.scan(/\S+/) keywords = words.select { |word| !stopwords.include?(word) } keywords.join(' ') end
Use ی instead of ئ if next char is ی Example پائیز => پاییز
# File lib/persian/text/text.rb, line 80 def self.replace_e_y(text) e = 'ئ' y = 'ی' text.gsub!(/#{e}(#{y})/, '\1\1') text end
Replace Space with Zero-width none-joiner after می and نمی
# File lib/persian/text/text.rb, line 55 def self.replace_zwnj_mi(text) mi = 'می' nmi = 'نمی' text.gsub!(/(^|\s)(#{mi}|#{nmi})\s(\S+)/, '\1\2\3') text end
# File lib/persian/dynamic.rb, line 25 def self.respond_to_missing?(method, include_private = false) method.to_s.start_with?('remove_') || super end
# File lib/persian/text/text.rb, line 181 def self.rm_char(text, char) text.gsub!(/(#{char})/, '') text end
# File lib/persian/text/text.rb, line 186 def self.rm_virgool_in_end(text) text.gsub!(/(،)([ \n]+)?$/, '.\2') text end
# File lib/persian/text/text.rb, line 191 def self.space_after_dot(text) text.gsub!(/(\.)(\S)/, '\1 \2') text end
# File lib/persian/text/text.rb, line 140 def self.space_after_noghtevirgool(text) noghtevirgool = '؛' text.gsub!(/(#{noghtevirgool})(\S)/, '\1 \2') text end
# File lib/persian/text/text.rb, line 174 def self.space_after_virgool(text) virgool = '،' text.gsub!(/(#{virgool})(\S)/, '\1 \2') text end
# File lib/persian/text/text.rb, line 92 def self.suffix(text) tar = 'تر' ee = 'ی' n = 'ن' ha = 'ها' ye = 'ی' text.gsub!(/\s+(#{tar}(#{ee}(#{n})?)?)|(#{ha}(#{ye})?)\s+/, '\1') text end
# File lib/persian/text/text.rb, line 87 def self.three_dots(text) text.gsub!(/\.{3,}/, '…') text end