class ValidateWebsite::Validator
Document validation from DTD or XSD (webservice for html5)
Constants
- XHTML_SCHEMA_PATH
Attributes
html5_validator_service_url[RW]
body[R]
doc[R]
dtd[R]
html5_validator[R]
namespace[R]
original_doc[R]
Public Class Methods
new(original_doc, body, ignore: nil, html5_validator: :tidy)
click to toggle source
@param [Nokogiri::HTML::Document] original_doc
@param [String] The raw HTTP response body of the page @param [Regexp] Errors to ignore @param [Symbol] html5_validator
default offline :tidy
fallback webservice :nu
# File lib/validate_website/validator.rb, line 44 def initialize(original_doc, body, ignore: nil, html5_validator: :tidy) @errors = [] @document, @dtd_uri = nil @original_doc = original_doc @body = body @ignore = ignore @html5_validator = html5_validator @dtd = @original_doc.internal_subset @namespace = find_namespace(@dtd) end
schema(namespace)
click to toggle source
# File lib/validate_website/validator.rb, line 23 def schema(namespace) @mutex.synchronize do Dir.chdir(XHTML_SCHEMA_PATH) do if File.exist?("#{namespace}.xsd") Nokogiri::XML::Schema(File.read("#{namespace}.xsd")) end end end end
Also aliased as: xsd
Public Instance Methods
errors()
click to toggle source
@return [Array] of errors
# File lib/validate_website/validator.rb, line 63 def errors @errors.map!(&:to_s) @ignore ? @errors.reject { |e| @ignore =~ e } : @errors end
valid?()
click to toggle source
@return [Boolean]
# File lib/validate_website/validator.rb, line 57 def valid? find_errors errors.empty? end
Private Instance Methods
document()
click to toggle source
# File lib/validate_website/validator.rb, line 81 def document return @document if @document @document = if @dtd_uri && @body.match(@dtd_uri.to_s) @body.sub(@dtd_uri.to_s, @namespace + '.dtd') else @body end end
find_errors()
click to toggle source
nokogiri.org/tutorials/ensuring_well_formed_markup.html
# File lib/validate_website/validator.rb, line 106 def find_errors @errors = validate rescue Nokogiri::XML::SyntaxError => e @errors << e end
find_namespace(dtd)
click to toggle source
# File lib/validate_website/validator.rb, line 71 def find_namespace(dtd) return unless dtd.system_id dtd_uri = URI.parse(dtd.system_id) return unless dtd_uri.path @dtd_uri = dtd_uri File.basename(@dtd_uri.path, '.dtd') end
html5_validate()
click to toggle source
# File lib/validate_website/validator.rb, line 112 def html5_validate if html5_validator.to_sym == :tidy && self.class.tidy tidy_validate elsif html5_validator.to_sym == :nu nu_validate else Nokogiri::HTML5(document, max_errors: -1).errors end end
nu_validate()
click to toggle source
# File lib/validate_website/validator.rb, line 131 def nu_validate validator = W3CValidators::NuValidator.new( validator_uri: self.class.validator_uri ) results = validator.validate_text(document) errors.concat(results.errors) end
tidy_validate()
click to toggle source
# File lib/validate_website/validator.rb, line 122 def tidy_validate results = self.class.tidy.new(document) if results.errors errors.concat(results.errors.split("\n")) else [] end end
validate()
click to toggle source
@return [Array] contain result errors
# File lib/validate_website/validator.rb, line 92 def validate if document =~ /^\<!DOCTYPE html\>/i html5_validate elsif self.class.schema(@namespace) self.class.schema(@namespace).validate(xhtml_doc) else # dont have xsd fall back to dtd Dir.chdir(XHTML_SCHEMA_PATH) do Nokogiri::HTML.parse(document) end.errors end end
xhtml_doc()
click to toggle source
# File lib/validate_website/validator.rb, line 139 def xhtml_doc Dir.chdir(XHTML_SCHEMA_PATH) do Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet } end end