class BankStatementParser::Utils
Utilities
Public Class Methods
ascii_filter(text)
click to toggle source
Filter the specified text, re-encoding to ASCII
# File lib/bank_statement_parser/utils.rb, line 24 def self.ascii_filter text rv = text # Squash some Unicode character categories # # {Zs} necessary to match statement date line # {Pc} necessary to match statement record lines rv.gsub!(/[\p{Zs}\p{Pc}]/, " ") # Replace Unicode soft hyphens rv.gsub!(/\u00ad/, "-") # Replace... well, who knows just *what* this is... rv.gsub!(/\u0a0c/, " ") # Re-encode to ASCII encoding_options = { invalid: :replace, # Replace invalid byte sequences undef: :replace, # Replace anything not defined in ASCII replace: '', # Use a blank for those replacements universal_newline: true # Always break lines with \n } rv = rv.encode(Encoding.find('US-ASCII'), encoding_options) # Replace ASCII form feed characters rv.gsub!(/\f/, "\n") rv end