module ZhSieve::Converter
Public Instance Methods
answer_to_markdown(string_contents,question_id,answer_id)
click to toggle source
# File lib/zhSieve/html2md.rb, line 22 def answer_to_markdown string_contents,question_id,answer_id raise NoContents unless string_contents!=nil doc = Nokogiri::HTML(string_contents,'UTF-8') answer_node = doc.search '[data-aid="'+answer_id.to_s+'"]' search_answer = Answer.new # search and set question infomations avatar_raw = answer_node.search '[class="zm-list-avatar avatar"]' author_raw = answer_node.search '[class="author-link"]' bio_raw = answer_node.search '[class="bio"]' content_raw = answer_node.search '[class="zm-editable-content clearfix"]' search_answer.question = doc.title.strip search_answer.link = "https://www.zhihu.com/people/#{question_id}#answer-#{answer_id}" search_answer.avatar= parse_element(avatar_raw.first) search_answer.bio = parse_element(bio_raw.first) search_answer.content = parse_element(content_raw.first) author_info = parse_element(author_raw.first).split(/[\[\]\(\)]/) search_answer.author = author_info[1] search_answer.author_link = "https://www.zhihu.com" + author_info[3] markdown_text = search_answer.format_markdown end
article_to_markdown(string_contents,article_id)
click to toggle source
# File lib/zhSieve/html2md.rb, line 43 def article_to_markdown string_contents,article_id raise NoContents unless string_contents!=nil doc_hash = JSON.parse(string_contents) search_article = Article.new search_article.title = doc_hash["title"] search_article.title_image = "" search_article.published_time = doc_hash['publishedTime'] search_article.link = "https://zhuanlan.zhihu.com/p/#{article_id}" search_article.content = doc_hash["content"] search_article.author = doc_hash["author"]["name"] search_article.bio = doc_hash["author"]["bio"] || "No Bio!" search_article.author_link = doc_hash["author"]["profileUrl"] avatar_template = doc_hash["author"]["avatar"]["template"].gsub('{size}','s') avatar_id = doc_hash["author"]["avatar"]["id"] avatar_uri = avatar_template.gsub('{id}', "#{avatar_id}") search_article.avatar = "" markdown_text = search_article.format_markdown end
debug() { || ... }
click to toggle source
# File lib/zhSieve/html2md.rb, line 126 def debug puts '----------------------------------' puts yield puts '----------------------------------' end
method_missing(name,*args,&block)
click to toggle source
define custom node processor
# File lib/zhSieve/html2md.rb, line 120 def method_missing(name,*args,&block) self.class.send :define_method,"parse_#{name}" do |node,contents| block.call node,contents end end
parse_element(ele)
click to toggle source
# File lib/zhSieve/html2md.rb, line 62 def parse_element(ele) if ele.is_a? Nokogiri::XML::Text return "#{ele.text}\n" else if (children = ele.children).count > 0 return wrap_node(ele,children.map {|ele| parse_element(ele)}.join ) else return wrap_node(ele,ele.text) end end end
people_to_markdown(string_contents)
click to toggle source
# File lib/zhSieve/html2md.rb, line 15 def people_to_markdown string_contents raise NoContents unless string_contents!=nil doc = Nokogiri::HTML(string_contents,'UTF-8') search_user = People.new # set people info end
to_markdown(string_contents)
click to toggle source
# File lib/zhSieve/html2md.rb, line 9 def to_markdown string_contents raise NoContents unless string_contents!=nil doc = Nokogiri::HTML(string_contents,'UTF-8') doc.children.map { |ele| parse_element(ele) }.join end
wrap_node(node,contents=nil)
click to toggle source
wrap node with markdown
# File lib/zhSieve/html2md.rb, line 75 def wrap_node(node,contents=nil) result = '' contents.strip! unless contents==nil # check if there is a custom parse exist if respond_to? "parse_#{node.name}" return self.send("parse_#{node.name}",node,contents) end # skip hidden node return '' if node['style'] and node['style'] =~ /display:\s*none/ # default parse case node.name.downcase when 'i' when 'script' when 'style' when 'li' result << "*#{contents}\n" when 'blockquote' contents.split('\n').each do |part| result << ">#{contents}\n" end when 'p' result << "\n#{contents}\n" when 'strong' result << "**#{contents}**\n" when 'h1' result << "# #{contents}\n" when 'h2' result << "## #{contents}\n" when 'h3' result << "### #{contents}\n" when 'hr' result << "****\n" when 'br' result << "\n" when 'img' result << "![#{node['alt']}](#{node['src']})" when 'a' result << "[#{contents}](#{node['href']})" else result << contents unless contents == nil end result end