module JustFlow

Public Instance Methods

convert(url) click to toggle source
# File lib/justflow.rb, line 9
def convert(url)
  puts "Converting".yellow + " #{url}..."
  @url = url
  url_parsed = URI.parse(url)
  url_parsed2 = url_parsed.host.to_s + url_parsed.path.to_s + url_parsed.query.to_s
  target_dir = url_parsed2.gsub(/[\x00\/\\:\*\?\"<>\|]/, '_')
  ensure_mkdir(target_dir)
  Dir.chdir(target_dir)
  @doc = Nokogiri::HTML(open(url))

  get_scripts()
  get_css()
  get_images()

  File.open('index.html', 'wb') { |file|
    file.write(@doc)
  }
  puts "We done.".green
end
download_resource(selector, source_attr, out_path) click to toggle source
# File lib/justflow.rb, line 86
def download_resource(selector, source_attr, out_path)
  resources = @doc.search(selector)
  resources.each { |resource|
    resource_uri = resource[source_attr]

    begin
      resource_uri = fix_uri(@url, resource_uri)
    rescue
      puts "URI is funky. Going for it anyway... #{resource_uri}".red
    end

    save_path = File.join(out_path, File.basename(resource_uri))
    save_path = remove_args(save_path)

    begin
      puts "✓".green + " Downloading ... " + resource_uri
      resp = get_contents(resource_uri)
      ensure_mkdir(out_path)
      save_path = save_contents(resp, save_path)
      resource[source_attr] = save_path
    rescue Exception => ex
      puts "✗".red + " FAIL. Couldn't do it: #{ex}"
    end

  }
end
ensure_mkdir(dirname) click to toggle source
# File lib/justflow.rb, line 61
def ensure_mkdir(dirname)
  if !File.directory?(dirname)
    Dir.mkdir dirname
  end
end
fix_uri(url, uri) click to toggle source
# File lib/justflow.rb, line 38
def fix_uri(url, uri)
  uri = uri.strip
  if uri.start_with?('//')
    uri = 'http:' + uri
  else # relative or absolute
    begin
      uri = URI.join(url, uri).to_s
    rescue Exception => ex
      puts "x".red + " Will try to download anyway. #{ex}"
    end
  end
  return uri
end
get_contents(uri) click to toggle source
# File lib/justflow.rb, line 56
def get_contents(uri)
  uri_parsed = URI.parse(uri)
  Net::HTTP.get_response(uri_parsed)
end
get_css() click to toggle source
# File lib/justflow.rb, line 158
def get_css()
  cloned_doc = @doc.clone
  orig_link_tags = cloned_doc.search('link[rel=stylesheet]')
  download_resource('link[rel=stylesheet]', 'href', 'css')
  link_tags = @doc.search('link[rel=stylesheet]')
  link_tags.each_with_index { |link, idx|

    original_css_path = orig_link_tags[idx]['href']
    local_css_path = link['href']

    if File.exists?(local_css_path)
      src = ""
      File.open(local_css_path, 'r') { |file|
        src = process_css_urls(file.read(), original_css_path)
      }
      File.open(local_css_path, 'w') { |file|
        file.write(src)
      }
    end
  }
end
get_images() click to toggle source
# File lib/justflow.rb, line 150
def get_images()
  download_resource('img[src]', 'src', 'img')
end
get_scripts() click to toggle source
# File lib/justflow.rb, line 154
def get_scripts()
  download_resource('script[src]', 'src', 'js')
end
is_font?(extension) click to toggle source
# File lib/justflow.rb, line 34
def is_font?(extension)
  return !is_img?(extension)
end
is_img?(extension) click to toggle source
# File lib/justflow.rb, line 29
def is_img?(extension)
  img_exts = ['.png', '.jpg', '.jpeg', '.gif', '.svg', '.tif', '.tiff']
  img_exts.include?(extension.downcase)
end
process_css_urls(css_source, original_css_url) click to toggle source
# File lib/justflow.rb, line 113
def process_css_urls(css_source, original_css_url)
  url_regex = /url\(['"]?(.*?)['"]?\)/i

  if original_css_url.start_with?('//')
    original_css_url = 'http:' + original_css_url
  elsif original_css_url.start_with?('.') || original_css_url.start_with?('/')
    original_css_url = fix_uri(@url, original_css_url)
  end

  puts ">".yellow + " Parsing css ... #{original_css_url}"

  css_source = css_source.gsub(url_regex) {
    original_item_url = $1
    absolute_item_url = fix_uri(original_css_url, original_item_url)
    original_item_url = remove_args(original_item_url)

    extension = File.extname(original_item_url)
    basename  = File.basename(original_item_url)

    out_dir = is_img?(extension) ? 'img' : 'fonts'
    ensure_mkdir(out_dir)
    out_path = File.join(out_dir, basename)

    begin
      resp = get_contents(absolute_item_url)
      ensure_mkdir(out_dir)
      save_contents(resp, out_path)
    rescue Exception => ex
      puts "Failed. Couldnt download from CSS: #{ex}".red
    end

    "url('#{File.join('..', out_path)}')"
  }

  return css_source
end
remove_args(url) click to toggle source
# File lib/justflow.rb, line 82
def remove_args(url)
  url[/[^\?]+/]
end
save_contents(resp, save_path) click to toggle source
# File lib/justflow.rb, line 67
def save_contents(resp, save_path)
  if File.file? save_path
    extension = File.extname(save_path)
    basename = File.basename(save_path)
    filename = File.basename(save_path, extension)
    if (!is_img?(extension))
      save_path = save_path.gsub(filename, filename + "_" + Time.now.to_i.to_s)
    end
  end
  File.open(save_path, 'wb') { |file|
    file.write(resp.body)
  }
  return save_path
end
valid_uri_scheme?(uri) click to toggle source
# File lib/justflow.rb, line 52
def valid_uri_scheme?(uri)
  uri.start_with?('http') || uri.start_with?('https')
end