class Pedophile::OfflineTree
Constants
- FIX_RELATIVE_PATH
- TMP_CHANGES_PATH
- TMP_STRUCTURE_PATH
Attributes
downloader[R]
files[R]
Public Class Methods
new(downloader)
click to toggle source
# File lib/pedophile/offline_tree.rb, line 10 def initialize(downloader) @downloader = downloader @files = Array.new @changes = Array.new end
Public Instance Methods
after_process()
click to toggle source
Desctructive part
# File lib/pedophile/offline_tree.rb, line 44 def after_process load_processed remove_bad_suffix rename_files end
analyze()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 55 def analyze # because I don't want to read all wget options... glob_path = "#{path}/**/**" puts "offline path #{path.to_s.cyan}" Dir.glob(glob_path) do |item| next if item == '.' or item == '..' or File.directory?(item) puts "analyze file #{item.to_s.yellow}" h = Hash.new h[:path] = item mime = `file --mime #{item}` if mime =~ /(\w+\/\w+);/ mime = $1 else mime = nil end h[:mime] = mime if mime == 'text/html' or mime == 'text/plain' h[:inside] = analyze_file(item) end @files << h end save_analyzed end
analyze_file(file)
click to toggle source
# File lib/pedophile/offline_tree.rb, line 103 def analyze_file(file) s = File.read(file) possible_paths_regexp = /"([^"]+)"/ possible_paths = s.scan(possible_paths_regexp).flatten.uniq possible_paths_regexp = /'([^']+)'/ possible_paths += s.scan(possible_paths_regexp).flatten.uniq relative_file_path = File.dirname(file) paths = Array.new possible_paths.each do |pp| if is_path_ok?(pp) h = Hash.new f = File.join(relative_file_path, pp) h[:exists] = File.exists?(f) h[:is_file] = File.file?(f) h[:path] = pp paths << h if should_add_path?(h) end end paths end
base_path()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 142 def base_path @base_path ||= self.downloader.wget.offline_path @base_path end
is_path_ok?(pp)
click to toggle source
TODO - check if this string is correct unix path
# File lib/pedophile/offline_tree.rb, line 131 def is_path_ok?(pp) # pp =~ /\A(?:[0-9a-zA-Z\_\-]+\/?)+\z/ pp.size < 200 end
load_analyzed()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 99 def load_analyzed @files = YAML.load_file(TMP_STRUCTURE_PATH) end
make_it_so()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 18 def make_it_so analyze load_analyzed process_bad_suffix1 process_bad_suffix2 process_bad_filenames save_analyzed save_changes end
path()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 50 def path @path ||= self.downloader.wget.offline_path @path end
process_bad_filenames()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 188 def process_bad_filenames @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_') if not new_file == old_file process_rename_file(old_file, new_file) end if f[:inside] f[:inside].each do |fi| old_file = fi[:path] if File.exists?(old_file) new_file = old_file.gsub(/[^0-9A-Za-z.\-\/:]/, '_') if not new_file == old_file process_rename_file(old_file, new_file) end end end end end end
process_bad_suffix1()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 161 def process_bad_suffix1 @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '') if not new_file == old_file process_rename_file(old_file, new_file) end if f[:inside] f[:inside].each do |fi| old_file = fi[:path] if File.exists?(old_file) new_file = old_file.gsub(/\?\d+/, '').gsub(/\%3F\d+/, '') if not new_file == old_file process_rename_file(old_file, new_file) end end end end end process_massive_gsub(/\%3F\d+/, "", false) end
process_bad_suffix2()
click to toggle source
PROCESSING
# File lib/pedophile/offline_tree.rb, line 148 def process_bad_suffix2 @files.each do |f| old_file = f[:path] new_file = old_file.gsub(/\?body=1/, '') if not new_file == old_file process_rename_file(old_file, new_file) end end process_massive_gsub("%3Fbody=1", "", false) end
process_massive_gsub(from, to, check_paths = false)
click to toggle source
# File lib/pedophile/offline_tree.rb, line 262 def process_massive_gsub(from, to, check_paths = false) puts "massive gsub #{from.to_s.blue} to #{to.to_s.green}" @files.each do |f| # must be proper mime before, so not needed to check if f[:inside] file_path = f[:path].clone puts " open #{file_path.to_s.red}" old_from = from.to_s old_to = to.to_s # relative path fix if check_paths and FIX_RELATIVE_PATH absolute_path = File.absolute_path(File.dirname(file_path)) first = Pathname.new(absolute_path) to_path = File.join(path, to) second = Pathname.new(File.absolute_path(to_path)) to = second.relative_path_from(first).to_s end exists = File.exists?(file_path) if exists j = File.open(file_path) s = j.read j.close # logs if s.index(from) @changes << { gsub: { old: from, new: to, file: file_path, old_from: old_from, old_to: old_to } } end s.gsub!(from, to) j = File.open(file_path, "w") j.puts(s) j.close f[:inside].each do |fi| fi[:path].gsub!(from, to) end puts " done #{file_path.to_s.red}" else raise "file #{file_path} not found" end end end end
process_rename_file(old_file_path, new_file_path)
click to toggle source
def process_bad_filenames_links
process_massive_gsub(/\%3F/, "_", false)
end
# File lib/pedophile/offline_tree.rb, line 216 def process_rename_file(old_file_path, new_file_path) puts "rename from #{old_file_path.to_s.blue} to #{new_file_path.to_s.green}" # clone to not allow modify of @files old_file = old_file_path.clone new_file = new_file_path.clone # this will be with full path old_file_with_path = old_file_path.clone old_file.gsub!(base_path, '') new_file.gsub!(base_path, '') # ignore slashes old_file.gsub!(/^\//, '') new_file.gsub!(/^\//, '') # 1. rename 1 file new_file_path = old_file_with_path.gsub(old_file, new_file) File.rename(old_file_with_path, new_file_path) # internal log-like @changes << { rename: { old: old_file_with_path, new: new_file_path } } # 2. rename in @files @files.each do |f| if f[:path] == old_file_with_path f[:path] = new_file_path end if f[:inside] f[:inside].each do |fi| if fi[:path] == old_file_with_path fi[:path] = new_file_path end end end end # 3. gsub all files # gsub files after renaming process_massive_gsub(old_file, new_file, true) process_massive_gsub(old_file.gsub("?", "%3F"), new_file, true) puts "RENAMED #{old_file.to_s.blue} to #{new_file.to_s.green}" end
save_analyzed()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 87 def save_analyzed f = File.new(TMP_STRUCTURE_PATH, "w") f.puts @files.to_yaml f.close end
save_changes()
click to toggle source
# File lib/pedophile/offline_tree.rb, line 93 def save_changes f = File.new(TMP_CHANGES_PATH, "w") f.puts @changes.to_yaml f.close end
should_add_path?(h)
click to toggle source
TODO
# File lib/pedophile/offline_tree.rb, line 137 def should_add_path?(h) return true #return h[:is_file] end
zip(output_file = 'site.zip')
click to toggle source
# File lib/pedophile/offline_tree.rb, line 29 def zip(output_file = 'site.zip') command = "cd #{Wget::TMP_OFFLINE_PATH}; zip -r #{output_file} #{self.downloader.wget.site_last_path}" puts command `#{command}` end
zip_with_custom_dir(output_path_zip, output_directory_name)
click to toggle source
# File lib/pedophile/offline_tree.rb, line 35 def zip_with_custom_dir(output_path_zip, output_directory_name) command = "cd #{Wget::TMP_PATH}; cd #{Wget::TMP_SITE_DIRECTORY}; mv \"#{self.downloader.wget.site_last_path}\" \"#{output_directory_name}\";" command += " zip -r #{output_path_zip} #{output_directory_name}" puts command `#{command}` end