class ValidateWebsite::Validator

Document validation from DTD or XSD (webservice for html5)

Constants

XHTML_SCHEMA_PATH

Attributes

html5_validator_service_url[RW]
body[R]
doc[R]
dtd[R]
html5_validator[R]
namespace[R]
original_doc[R]

Public Class Methods

new(original_doc, body, ignore: nil, html5_validator: :tidy) click to toggle source

@param [Nokogiri::HTML::Document] original_doc @param [String] The raw HTTP response body of the page @param [Regexp] Errors to ignore @param [Symbol] html5_validator default offline :tidy

fallback webservice :nu
# File lib/validate_website/validator.rb, line 44
def initialize(original_doc, body, ignore: nil, html5_validator: :tidy)
  @errors = []
  @document, @dtd_uri = nil
  @original_doc = original_doc
  @body = body
  @ignore = ignore
  @html5_validator = html5_validator
  @dtd = @original_doc.internal_subset
  @namespace = find_namespace(@dtd)
end
schema(namespace) click to toggle source

www.w3.org/TR/xhtml1-schema/

# File lib/validate_website/validator.rb, line 23
def schema(namespace)
  @mutex.synchronize do
    Dir.chdir(XHTML_SCHEMA_PATH) do
      if File.exist?("#{namespace}.xsd")
        Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
      end
    end
  end
end
Also aliased as: xsd
xsd(namespace)
Alias for: schema

Public Instance Methods

errors() click to toggle source

@return [Array] of errors

# File lib/validate_website/validator.rb, line 63
def errors
  @errors.map!(&:to_s)
  @ignore ? @errors.reject { |e| @ignore =~ e } : @errors
end
valid?() click to toggle source

@return [Boolean]

# File lib/validate_website/validator.rb, line 57
def valid?
  find_errors
  errors.empty?
end

Private Instance Methods

document() click to toggle source
# File lib/validate_website/validator.rb, line 81
def document
  return @document if @document

  @document = if @dtd_uri && @body.match(@dtd_uri.to_s)
                @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
              else
                @body
              end
end
find_errors() click to toggle source

nokogiri.org/tutorials/ensuring_well_formed_markup.html

# File lib/validate_website/validator.rb, line 106
def find_errors
  @errors = validate
rescue Nokogiri::XML::SyntaxError => e
  @errors << e
end
find_namespace(dtd) click to toggle source

www.w3.org/TR/xhtml1/#dtds

# File lib/validate_website/validator.rb, line 71
def find_namespace(dtd)
  return unless dtd.system_id

  dtd_uri = URI.parse(dtd.system_id)
  return unless dtd_uri.path

  @dtd_uri = dtd_uri
  File.basename(@dtd_uri.path, '.dtd')
end
html5_validate() click to toggle source
# File lib/validate_website/validator.rb, line 112
def html5_validate
  if html5_validator.to_sym == :tidy && self.class.tidy
    tidy_validate
  elsif html5_validator.to_sym == :nu
    nu_validate
  else
    Nokogiri::HTML5(document, max_errors: -1).errors
  end
end
nu_validate() click to toggle source
# File lib/validate_website/validator.rb, line 131
def nu_validate
  validator = W3CValidators::NuValidator.new(
    validator_uri: self.class.validator_uri
  )
  results = validator.validate_text(document)
  errors.concat(results.errors)
end
tidy_validate() click to toggle source
# File lib/validate_website/validator.rb, line 122
def tidy_validate
  results = self.class.tidy.new(document)
  if results.errors
    errors.concat(results.errors.split("\n"))
  else
    []
  end
end
validate() click to toggle source

@return [Array] contain result errors

# File lib/validate_website/validator.rb, line 92
def validate
  if document =~ /^\<!DOCTYPE html\>/i
    html5_validate
  elsif self.class.schema(@namespace)
    self.class.schema(@namespace).validate(xhtml_doc)
  else
    # dont have xsd fall back to dtd
    Dir.chdir(XHTML_SCHEMA_PATH) do
      Nokogiri::HTML.parse(document)
    end.errors
  end
end
xhtml_doc() click to toggle source
# File lib/validate_website/validator.rb, line 139
def xhtml_doc
  Dir.chdir(XHTML_SCHEMA_PATH) do
    Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
  end
end