class HTMLValidationResult
Attributes
Public Class Methods
takes a .url and loads the data into this object
# File lib/html_validation/html_validation_result.rb, line 34 def self.load_from_files(filepath) resource = File.open("#{filepath}.resource.txt", 'r').read html = File.open("#{filepath}.html.txt", 'r').read HTMLValidationResult.new(resource, html, filepath) end
options ex: options = ['–show-warnings false']
# File lib/html_validation/html_validation_result.rb, line 16 def initialize(resource, html, datapath, tidy_flags = [], options = {}) @resource = resource @html = html @exceptions = '' @datapath = datapath @tidy_flags = (HTMLValidation.default_tidy_flags + tidy_flags).uniq @options = options valid? end
Public Instance Methods
Saves the exception string for the given url or file path. When next run, if the exception string is identical, valid? will return true. Note that exceptions
will still list the exception string, though, even if it is an accepted exception string.
# File lib/html_validation/html_validation_result.rb, line 56 def accept! File.open(data_path("accepted"), 'w') {|f| f.write(@exceptions)} end
# File lib/html_validation/html_validation_result.rb, line 60 def reject! if File.exists?(data_path("accepted")) File.delete data_path("accepted") end end
Validates an html string using html tidy. If there are no warnings or exceptions, or there is a previously accepted exception string that matches exactly, valid? returns true Line numbers of exceptions are likely to change with any edit, so our validation compares the exception strings with the lines and columns removed. Name can be a filename, file system path, or url, so long it is uniquely associated with the passed in html.
# File lib/html_validation/html_validation_result.rb, line 45 def valid? @exceptions = validate File.delete(data_path("accepted")) if File.exists?(data_path("accepted")) if @exceptions == '' valid = (filter(@exceptions) == '' or accepted?(@exceptions)) save_html_and_exceptions valid end
Private Instance Methods
have we previously accepted this exact string for this path?
# File lib/html_validation/html_validation_result.rb, line 91 def accepted?(exception_str) exception_str = filter(exception_str) File.exists?(data_path('accepted')) ? filter(File.open(data_path('accepted'), "r").read) == exception_str : false end
get the filename for storing a type of data
# File lib/html_validation/html_validation_result.rb, line 80 def data_path(filetype) "#{@datapath}.#{filetype}.txt" end
Line numbers of exceptions are likely to change with any minor edit, so our validation compares the result strings with the lines and columns removed. This means that if the errors change position in the file (up or down b/c you add or remove code), accepted exception strings will remain valid.
# File lib/html_validation/html_validation_result.rb, line 100 def filter(str) str = str.gsub(/^line.*trimming empty.*\n/, '') # the messages about empty are overzealous, and not invalid str = str.gsub(/^line.*proprietary.*\n/, '') if options[:ignore_proprietary] # if you use IE only attributes like wrap, or spellcheck or things not in standard str = str.gsub(/^line.*(?:Error|Warning):.*<\/?(?:#{options[:ignored_tag_errors].join('|')})>.*\n/, '') if options[:ignored_tag_errors] && options[:ignored_tag_errors].any? str = str.gsub(/^line.*(?:Error|Warning):.* attribute \"(?:#{options[:ignored_attribute_errors].join('|')})\".*\n/, '') if options[:ignored_attribute_errors] && options[:ignored_attribute_errors].any? if options[:ignored_errors] && options[:ignored_errors].any? && str.gsub(/^line.*(?:Error|Warning):/, '') =~ ignored_errors_regex str = str.gsub(Regexp.new(/^line.*(?:Error|Warning):/.source + '.*' + ignored_errors_regex.source + '.*' + /\n/.source), '') end str.gsub(/line [0-9]+ column [0-9]+ -/, '') # /line [0-9]+ column [0-9]+ - / + =~ "line 1 column 1 - Warning: missing <!DOCTYPE> declaration" end
# File lib/html_validation/html_validation_result.rb, line 112 def ignored_errors_regex /(?:#{options[:ignored_errors].join('|')})/ end
# File lib/html_validation/html_validation_result.rb, line 84 def save_html_and_exceptions File.open(data_path("html"), 'w') {|f| f.write(@html)} File.open(data_path("resource"), 'w') {|f| f.write(@resource)} File.open(data_path("exceptions"), 'w') {|f| f.write(@exceptions)} end
We used to specifically prefer /usr/bin/tidy by default on *nix as there is another “tidy” program that could end up earlier on the path. Tidy was installed at this location for me by default. The norm is now to custom install the tidy fork for HTML 5, though, and respecting the PATH is better philosophically. Now we expect the PATH to be correct. ie, if which Tidy being used is an issue, put the right tidy first on the PATH.
# File lib/html_validation/html_validation_result.rb, line 72 def tidy_command is_windows = (RbConfig::CONFIG['host_os'] =~ /mswin|mingw|cygwin/) bin = is_windows ? 'tidy.exe' : 'tidy' cmd = "#{bin} #{@tidy_flags.join(' ')}" cmd end
# File lib/html_validation/html_validation_result.rb, line 116 def validate stdin, stdout, stderr = Open3.popen3(tidy_command) stdin.puts @html.encode!("UTF-8", invalid: :replace, undef: :replace).force_encoding("utf-8") stdin.close stdout.close result = stderr.read stderr.close result end