class Replace
Attributes
Public Class Methods
# File lib/replace.rb, line 8 def initialize(string) @string = string end
Public Instance Methods
增加一些必要的分行
# File lib/replace.rb, line 228 def add_line_break replace(@string) do s /(\p{Han})[[:blank:]]*([:,])[[:blank:]]*(\p{Han})/, '\1\2 \3' s /(\p{Han})[[:blank:]]*([。.!?;])[[:blank:]]*(\p{Han})/, '\1\2'"\n"'\3' s /(\p{Han})[[:blank:]]*(\p{Ps})/, '\1 \2' s /(\p{Pe})[[:blank:]]*(\p{Han})/, '\1 \2' end self end
# File lib/replace.rb, line 365 def ancient_literature replace(@string) do s /_古诗文网/, '' s /作者:.*\r?\n/, '' end del_head_blank end
双字节 ASCII 字符转为单字节字符 (通过验证, 危险等级: 0) !"#$%&'()*+,-./ 0123456789:;<=>? @ABCDEFGHIJKLMNO PQRSTUVWXYZ[\]^_ `abcdefghijklmno pqrstuvwxyz{|}~ !“#$%&‘()*+,-./ 0123456789:;<=>? @ABCDEFGHIJKLMNO PQRSTUVWXYZ^_ `abcdefghijklmno pqrstuvwxyz{|}~
# File lib/replace.rb, line 203 def ascii2 replace(@string) do s /([\u{FF01}-\u{FF5E}])/ do bytes = $1.bytes bytes[1] -= 0xBC bytes[2] -= 0x60 bytes[2] += 64*bytes[1] bytes[2..2].pack("c*") end end self end
批量逐个替换第一个匹配项
# File lib/replace.rb, line 49 def batch_replace(regexps = {}) regexps.each do |key, value| replace(@string) do sub! Regexp.new("\\G(.*?)#{key}", Regexp::MULTILINE), '\1'" ^[#{value}] " end end self end
删除汉字之间的空格 (通过验证, 危险等级: 3) 添加汉字与数字、英文之间的空格 del_head_blank.del_blank_line
# File lib/replace.rb, line 241 def blank replace(@string) do # 删除汉字之间的空格, "无 法 处 理 这 种 情 况" s /(\p{Han})[[:blank:]]+(\p{Han})/, '\1\2' # 添加汉字与数字、英文之间的空格 s /(\p{Han})(\w)/, '\1 \2' s /(\w)(\p{Han})/, '\1 \2' end del_head_blank.del_blank_line end
判定章节标题 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 382 def chapter replace(@string) do s /^第[一二三四五六七八九十]+[卷部篇]/, 'PART: ' s /^第[一二三四五六七八九十]+[章]/, '# ' s /^第[一二三四五六七八九十]+[节]/, '## ' s /^[一二三四五六七八九十]+、/, '### ' s /^\([一二三四五六七八九十]+\)/, '#### ' end self end
行内代码两边各留一个空格 (未通过验证, 危险等级: 4) jekyll_code
# File lib/replace.rb, line 300 def code replace(@string) do # 行内代码两边各留一个空格 s /([[:alnum:]])`([^`]+?)`([[:alnum:]])/, '\1 `\2` \3' end jekyll_code end
删除多余的空行 (通过验证, 危险等级: 0) del_tail_blank
# File lib/replace.rb, line 272 def del_blank_line replace(@string) do s /(^[[:blank:]]*\r?\n){2,}/, "\n" end del_tail_blank end
删除行首的空白 (通过验证, 危险等级: 3, 可能是 Markdown 缩进) 将看上去像空白的行转化为真真的空白行
# File lib/replace.rb, line 254 def del_head_blank replace(@string) do s /^[[:blank:]]+/, '' end self end
删除加粗斜体样式 (通过验证, 危险等级: 3, 可能是 Markdown 加粗斜体)
# File lib/replace.rb, line 345 def del_italics_and_bold replace(@string) do s /([\W_]|^)(\*\*|__)(?=\S)([^\r]*?\S[\*_]*)\2([\W_]|$)/, '\1\3\4' s /([\W_]|^)(\*|_)(?=\S)([^\r\*_]*?\S)\2([\W_]|$)/, '\1\3\4' end self end
删除一些没必要的分行
# File lib/replace.rb, line 217 def del_line_break replace(@string) do # "无\n法\n处\n理\n这\n种\n情\n况" s /(\p{Han})\r?\n(\p{Han})/, '\1\2' s /(\p{Han})\r?\n([[:punct:]])/, '\1\2' s /…{3,}(\r?\n)+/, '' end self end
删除行尾的空白 (通过验证, 危险等级: 0) 将看上去像空白的行转化为真真的空白行
# File lib/replace.rb, line 263 def del_tail_blank replace(@string) do s /[[:blank:]]+\r?\n/, "\n" end self end
# File lib/replace.rb, line 58 def footnote batch_replace(scan_note) end
# File lib/replace.rb, line 353 def foreign_literature replace(@string) do s /\s*\n/, "\n\n" s /\${4,}\s*/, '#### ' s /[ \u{001A}]/, '' s /# [0-9]+.\s*/, '## ' s /#### 第[^\r\n]+[卷部]\s*(.*)\s*\n/, "PART: "'\1'"\n\n" s /#### 第[^\r\n]+[章]\s*(.*)\s*\n/, "# "'\1'"\n\n" end del_head_blank end
# File lib/replace.rb, line 401 def format_markdown markdown2html.html2markdown end
删除页眉页脚
# File lib/replace.rb, line 289 def head_foot replace(@string) do s /\A(^[^\r\n]*\r?\n){11}\s*/m, '' s /^\[«.*?\z/m, '' # s /(^.*?\r?\n){4}\z/, '' end self end
# File lib/replace.rb, line 12 def help method_comments = {} replace(@string) do s /((.*#.*\r?\n)*)\s*def\s+(\w+)/ do method_comments[$3.to_sym] = $1 end end method_comments end
# File lib/replace.rb, line 411 def html2markdown converter = PandocRuby.new(@string, from: :html, to: :markdown) @string = converter.convert('chapters', 'atx-headers', 'normalize', 'no-wrap') self end
处理插图路径 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 280 def image replace(@string) do s /Insert\s(18333fig\d+)\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, '' s /!\[(.*?)\]\(\S*\/(\S*?)( ".*")?\)/, '' end self end
Jekyll 代码格式转为 Fenced 代码格式 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 309 def jekyll_code replace(@string) do s /\s*\{%\s*highlight\s+(\w+)\s*%\}\s*/, "\n\n"'```{.\1}'"\n" s /\s*\{%\s*endhighlight\s*%\}\s*/, "\n"'```'"\n\n" end self end
# File lib/replace.rb, line 393 def list replace(@string) do s /^(\d.)\s*/, '\1'"\t" s /^[●]\s*/, "-\t" end self end
# File lib/replace.rb, line 405 def markdown2html converter = PandocRuby.new(@string, from: :markdown, to: :html) @string = converter.convert('chapters', 'indented-code-classes' => 'sourceCode') self end
判定段落的起始 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 374 def paragraph replace(@string) do s /^[[:blank:]]{2,}/, "\n" end self end
处理 pdftotext 的转换结果 (未通过验证, 危险等级: 4) paragraph.blank.del_line_break.chapter.list.punct2.add_line_break
# File lib/replace.rb, line 112 def pdftotext replace(@string) do # 删除页码行 s /^[[:blank:]]*[0-9]+[[:blank:]]*\r?\n/, '' end paragraph.blank.del_line_break.chapter.list.punct2.add_line_break end
# File lib/replace.rb, line 94 def post_pandoc_for_latex replace(@string) do s /\{verbatim\}/, '{Verbatim}' s /\\begin\{center\}\\rule\{(.*?)\}\{(.*?)\}\\end\{center\}/, '\newpage' s /\s*\\footnote\{(.*?)\}\s*/, '\footnote{\1}' s /\\footnote\{(.*?)[::]\s*(.*?)\}/, '〔{\kaishu \1: \2}〕' end theorem end
# File lib/replace.rb, line 90 def pre_pandoc_for_latex title end
中文标点转为英文标点
# File lib/replace.rb, line 121 def punct1 replace(@string) do s /,/, ', ' s /:([^\r\n])/, ":\n"'\1' s /;([^\r\n])/, ";\n"'\1' s /。([^\r\n])/, ".\n"'\1' s /?([^\r\n])/, "?\n"'\1' s /!([^\r\n])/, "!\n"'\1' s /:\r?\n/, ":\n" s /;\r?\n/, ";\n" s /。\r?\n/, ".\n" s /?\r?\n/, "?\n" s /!\r?\n/, "!\n" s /(/, ' (' s /)/, ') ' s /\) ([,.])/, ')\1' end self end
中文标点转为英文标点 (通过验证, 危险等级: 3, 可能需要用中文标点) 保留部分中文符号: 、《》〈〉【】〖〗〔〕 ascii2: ?!,;:()
# File lib/replace.rb, line 144 def punct2 replace(@string) do # ‐‑‒–—―‖‗‘’‚‛“”„‟ # †‡•‣․‥…‧ # ‰‱′″‴‵‶‷‸‹›※‼‽‾‿ # ⁀⁁⁂⁃ # ⁅⁆⁇⁈⁉⁊⁋⁌⁍⁎⁏ # ⁐⁑ # ⁓⁔⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞ # ⁽⁾ # 、。〃 # 〈〉《》「」『』 # 【】 # 〔〕〖〗〘〙〚〛〜〝〞〟 # 〰 # 〽 # \p{S}: $+<=>^`|~⁄⁒ # \p{Sm}: +<=>|~⁄⁒ # \p{Sc}: $ # \p{Sk}: ^` # \p{Pi}: ‘‛“‟ # \p{Pf}: ’” # 句末符号 .!?;: # 标点符号 `$()''"" # 句中符号 ,、 s /。/, '.' s /[“”]/, '"' s /[‘’]/, "'" s /──/, '---' s /—/, '--' end ascii2 end
# File lib/replace.rb, line 80 def rename replace(@string) do s /!\[\]\(image(\d+).jpg\)/ do i = $1.to_i - 1 "" % i end end self end
# File lib/replace.rb, line 30 def scan_image @scan = @string.scan(/!\[.*?\]\(([^\s]+?)(?:\s+.*?)?\)/) end
扫描注释列表生成替换字典
# File lib/replace.rb, line 35 def scan_note del_head_blank note = {} # @string.scan(/^[((]\d+[))]\s*(.*?)[::]\s*(.*?)\\?\r?\n/) do |key, value| @string.scan(/^(.*?)〔(.*?〕.*?)\r?\n/) do |key, value| # key_stem = key.gsub(/[((](.*?)[))]/, '') key_stem = "\\^#{key}\\^" # note[key_stem] = "#{key}: #{value}" note[key_stem] = value.sub(/〕/, ': ') end note end
# File lib/replace.rb, line 22 def scan_test @scan = @string.scan(/\w+/) end
# File lib/replace.rb, line 26 def scan_url @scan = @string.scan(/href=['"](.*?)['"]/) end
# File lib/replace.rb, line 62 def simple replace(@string) do s /cc/, 'dd' s /aa/, 'bb' end self end
标准化 Markdown 文件, 处理 HTML 文件的转换结果 (未通过验证, 危险等级: 4) code.punct2.blank
# File lib/replace.rb, line 106 def standard blank.del_line_break.punct2.code.add_line_break.format_markdown end
台湾标点转大陆标点 (通过验证, 危险等级: 0) ascii2
# File lib/replace.rb, line 180 def taiwan replace(@string) do s /「/, '‘' s /」/, '’' s /『/, '“' s /』/, '”' end ascii2 end
定理环境, LaTeX 命令 (未通过验证, 危险等级: 2)
# File lib/replace.rb, line 318 def theorem replace(@string) do s /^(ASSUMPTION|DEFINITION|CONCLUSION|ALGORITHM|EXPERIMENT|EXAMPLE|REMARK|NNOTE|THEOREM|AXIOM|LEMMA|PROPERTY|COROLLARY|PROPOSITION|CLAIM|PROBLEM|QUESTION|CONJECTURE|PROOF|SOLUTION|ANSWER|ANALYSIS)[.:](.*?)(\n(?=\n)|\Z)/mi do css_class = $1.downcase "\\begin{#{css_class}}\n#{$2.strip}\n\\end{#{css_class}}\n" end end replace(@string) do s /^(PART)[.:](.*?)(\n(?=\n)|\Z)/mi do "\\#{$1.downcase}{#{$2.strip}}\n" end end self end
转换 YAML 标题信息 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 334 def title replace(@string) do s /\A^-{3,}\r?\n(.*?)^-{3,}\r?\n/m do doc = YAML::load($1) "# #{doc['title']}\n\n" if doc['title'] end end self end
处理 Shell 命令 tree 的输出 (通过验证, 危险等级: 0)
# File lib/replace.rb, line 71 def tree replace(@string) do s /[│├]/, '|' s /[└]/, '\\' s /[─]/, '-' end self end
台湾正体到简体 brew install opencc sudo gem install ropencc
# File lib/replace.rb, line 420 def tw2s converter = Ropencc.open('tw2s.json') @string = converter.convert(@string) self end
Private Instance Methods
# File lib/replace.rb, line 428 def replace(string, &block) string.instance_eval do alias :s :gsub! instance_eval(&block) end string end