class Ronn::RoffFilter
Filter for converting HTML to ROFF
Constants
- HTML_ROFF_ENTITIES
Public Class Methods
Source
# File lib/ronn/roff.rb 11 def initialize(html_fragment, name, section, tagline, manual = nil, 12 version = nil, date = nil) 13 @buf = [] 14 title_heading name, section, tagline, manual, version, date 15 doc = Nokogiri::HTML.fragment(html_fragment) 16 remove_extraneous_elements! doc 17 normalize_whitespace! doc 18 block_filter doc 19 write "\n" 20 end
Convert Ronn
HTML to roff. The html input is an HTML fragment, not a complete document
Public Instance Methods
Protected Instance Methods
Source
# File lib/ronn/roff.rb 88 def block_filter(node) 89 return if node.nil? 90 91 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 92 node.each { |ch| block_filter(ch) } 93 94 elsif node.document? || node.fragment? 95 block_filter(node.children) 96 97 elsif node.text? 98 # This hack is necessary to support mixed-child-type dd's 99 inline_filter(node) 100 101 elsif node.elem? 102 case node.name 103 when 'html', 'body' 104 block_filter(node.children) 105 when 'div' 106 block_filter(node.children) 107 when 'h1' 108 # discard 109 nop 110 when 'h2' 111 macro 'SH', quote(escape(node.inner_html)) 112 when 'h3' 113 macro 'SS', quote(escape(node.inner_html)) 114 when 'h4', 'h5', 'h6' 115 # Ronn discourages use of this many heading levels, but if they are used, 116 # we should make them legible instead of ignoring them. 117 macro 'SS', quote(escape(node.inner_html)) 118 119 when 'p' 120 prev = previous(node) 121 if prev && %w[dd li blockquote].include?(node.parent.name) 122 macro 'IP' 123 elsif prev && !%w[h1 h2 h3].include?(prev.name) 124 macro 'P' 125 elsif node.previous&.text? 126 macro 'IP' 127 end 128 inline_filter(node.children) 129 130 when 'blockquote' 131 prev = previous(node) 132 indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name) 133 macro 'IP', %w["" 4] if indent 134 block_filter(node.children) 135 macro 'IP', %w["" 0] if indent 136 137 when 'pre' 138 prev = previous(node) 139 indent = prev.nil? || !%w[h1 h2 h3].include?(prev.name) 140 macro 'IP', %w["" 4] if indent 141 macro 'nf' 142 # HACK: strip an initial \n to avoid extra spacing 143 if node.children && node.children[0].text? 144 text = node.children[0].to_s 145 node.children[0].replace(text[1..]) if text.start_with? "\n" 146 end 147 inline_filter(node.children) 148 macro 'fi' 149 macro 'IP', %w["" 0] if indent 150 151 when 'dl' 152 macro 'TP' 153 block_filter(node.children) 154 when 'dt' 155 prev = previous(node) 156 macro 'TP' unless prev.nil? 157 inline_filter(node.children) 158 write "\n" 159 when 'dd' 160 if node.at('p') 161 block_filter(node.children) 162 else 163 inline_filter(node.children) 164 end 165 write "\n" 166 167 when 'ol', 'ul' 168 block_filter(node.children) 169 macro 'IP', %w["" 0] 170 when 'li' 171 case node.parent.name 172 when 'ol' 173 macro 'IP', %W["#{node.parent.children.index(node) + 1}." 4] 174 when 'ul' 175 macro 'IP', ['"\(bu"', '4'] 176 else 177 raise "List element found as a child of non-list parent element: #{node.inspect}" 178 end 179 if node.at('p,ol,ul,dl,div') 180 block_filter(node.children) 181 else 182 inline_filter(node.children) 183 end 184 write "\n" 185 186 when 'span', 'code', 'b', 'strong', 'kbd', 'samp', 'var', 'em', 'i', 187 'u', 'br', 'a' 188 inline_filter(node) 189 190 when 'table' 191 macro 'TS' 192 write "allbox;\n" 193 block_filter(node.children) 194 macro 'TE' 195 when 'thead' 196 # Convert to format section and first row 197 tr = node.children[0] 198 header_contents = [] 199 cell_formats = [] 200 tr.children.each do |th| 201 style = th['style'] 202 cell_format = case style 203 when 'text-align:left;' 204 'l' 205 when 'text-align:right;' 206 'r' 207 when 'text-align:center;' 208 'c' 209 else 210 'l' 211 end 212 header_contents << th.inner_html 213 cell_formats << cell_format 214 end 215 write cell_formats.join(' ') + ".\n" 216 write header_contents.join("\t") + "\n" 217 when 'th' 218 raise 'internal error: unexpected <th> element' 219 when 'tbody' 220 # Let the 'tr' handle it 221 block_filter(node.children) 222 when 'tr' 223 # Convert to a table data row 224 node.children.each do |child| 225 block_filter(child) 226 write "\t" 227 end 228 write "\n" 229 when 'td' 230 inline_filter(node.children) 231 232 else 233 warn 'unrecognized block tag: %p', node.name 234 end 235 236 elsif node.is_a?(Nokogiri::XML::DTD) 237 # Ignore 238 nop 239 elsif node.is_a?(Nokogiri::XML::Comment) 240 # Ignore 241 nop 242 else 243 raise "unexpected node: #{node.inspect}" 244 end 245 end
Source
# File lib/ronn/roff.rb 381 def comment(text) 382 writeln %(.\\" #{text}) 383 end
Source
# File lib/ronn/roff.rb 342 def escape(text) 343 return text.to_s if text.nil? || text.empty? 344 ent = HTML_ROFF_ENTITIES 345 text = text.dup 346 text.gsub!(/&#x([0-9A-Fa-f]+);/) { $1.to_i(16).chr } # hex entities 347 text.gsub!(/&#(\d+);/) { $1.to_i.chr } # dec entities 348 text.gsub!('\\', '\e') # backslash 349 text.gsub!('...', '\|.\|.\|.') # ellipses 350 text.gsub!(/[.-]/) { |m| "\\#{m}" } # control chars 351 ent.each do |key, val| 352 text.gsub!(key, val) 353 end 354 text.gsub!('&', '&') # amps 355 text 356 end
Source
# File lib/ronn/roff.rb 247 def inline_filter(node) 248 return unless node # is an empty node 249 250 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 251 node.each { |ch| inline_filter(ch) } 252 253 elsif node.text? 254 text = node.to_html.dup 255 write escape(text) 256 257 elsif node.comment? 258 # ignore HTML comments 259 260 elsif node.elem? 261 case node.name 262 when 'span' 263 inline_filter(node.children) 264 265 when 'code' 266 if child_of?(node, 'pre') 267 inline_filter(node.children) 268 else 269 write '\fB' 270 inline_filter(node.children) 271 write '\fR' 272 end 273 274 when 'b', 'strong', 'kbd', 'samp' 275 write '\fB' 276 inline_filter(node.children) 277 write '\fR' 278 279 when 'var', 'em', 'i', 'u' 280 write '\fI' 281 inline_filter(node.children) 282 write '\fR' 283 284 when 'br' 285 macro 'br' 286 287 when 'a' 288 if node.classes.include?('man-ref') 289 inline_filter(node.children) 290 elsif node.has_attribute?('data-bare-link') 291 write '\fI' 292 inline_filter(node.children) 293 write '\fR' 294 else 295 inline_filter(node.children) 296 write ' ' 297 write '\fI' 298 write escape(node.attributes['href'].content) 299 write '\fR' 300 end 301 302 when 'sup' 303 # This superscript equivalent is a big ugly hack. 304 write '^(' 305 inline_filter(node.children) 306 write ')' 307 308 else 309 warn 'unrecognized inline tag: %p', node.name 310 end 311 312 else 313 raise "unexpected node: #{node.inspect}" 314 end 315 end
Source
# File lib/ronn/roff.rb 321 def macro(name, value = nil) 322 maybe_new_line 323 writeln ".#{[name, value].compact.join(' ')}" 324 end
Source
# File lib/ronn/roff.rb 317 def maybe_new_line 318 write "\n" if @buf.last && @buf.last[-1] != "\n" 319 end
Source
# File lib/ronn/roff.rb 52 def normalize_whitespace!(node) 53 if node.is_a?(Array) || node.is_a?(Nokogiri::XML::NodeSet) 54 node.to_a.dup.each { |ch| normalize_whitespace! ch } 55 elsif node.text? 56 preceding = node.previous 57 following = node.next 58 content = node.content.gsub(/[\n ]+/m, ' ') 59 if preceding.nil? || block_element?(preceding.name) || 60 preceding.name == 'br' 61 content.lstrip! 62 end 63 if following.nil? || block_element?(following.name) || 64 following.name == 'br' 65 content.rstrip! 66 end 67 if content.empty? 68 node.remove 69 else 70 node.content = content 71 end 72 elsif node.elem? && node.name == 'pre' 73 # stop traversing 74 elsif node.elem? && node.children 75 normalize_whitespace! node.children 76 elsif node.elem? 77 # element has no children 78 elsif node.document? || node.fragment? 79 normalize_whitespace! node.children 80 elsif node.is_a?(Nokogiri::XML::DTD) || node.is_a?(Nokogiri::XML::Comment) 81 # ignore 82 nop 83 else 84 warn 'unexpected node during whitespace normalization: %p', node 85 end 86 end
Source
# File lib/ronn/roff.rb 28 def previous(node) 29 return unless node.respond_to?(:previous) 30 prev = node.previous 31 prev = prev.previous until prev.nil? || prev.elem? 32 prev 33 end
Source
# File lib/ronn/roff.rb 358 def quote(text) 359 "\"#{text.gsub('"', '\\"')}\"" 360 end
Source
# File lib/ronn/roff.rb 46 def remove_extraneous_elements!(doc) 47 doc.traverse do |node| 48 node.parent.children.delete(node) if node.comment? 49 end 50 end
Source
# File lib/ronn/roff.rb 35 def title_heading(name, section, _tagline, manual, version, date) 36 comment "generated with Ronn-NG/v#{Ronn.version}" 37 comment "http://github.com/apjanke/ronn-ng/tree/#{Ronn.revision}" 38 return if name.nil? 39 if manual 40 macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}" "#{manual}") 41 else 42 macro 'TH', %("#{escape(name.upcase)}" "#{section}" "#{date.strftime('%B %Y')}" "#{version}") 43 end 44 end
Source
# File lib/ronn/roff.rb 385 def warn(text, *args) 386 Kernel.warn format("warn: #{text}", args) 387 end
Source
# File lib/ronn/roff.rb 363 def write(text) 364 return if text.nil? || text.empty? 365 # lines cannot start with a '.' or "'". insert zero-width character before. 366 text = text.gsub("\n\\.", "\n\\\\&\\.") 367 text = text.gsub("\n'", "\n\\&\\'") 368 buf_ends_in_newline = @buf.last && @buf.last[-1] == "\n" 369 @buf << '\&' if text[0, 2] == '\.' && buf_ends_in_newline 370 @buf << '\&' if text[0, 1] == "'" && buf_ends_in_newline 371 @buf << text 372 end
write text to output buffer
Source
# File lib/ronn/roff.rb 375 def writeln(text) 376 maybe_new_line 377 write text 378 write "\n" 379 end
write text to output buffer on a new line.