class P3
Constants
- REFERENCE_REGEXP
- REFERENCE_START_REGEXP
Public Class Methods
convertSingleColPdf(file_name)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 54 def self.convertSingleColPdf(file_name) cmd = "k2pdfopt -dev kpw #{file_name}" PTY.spawn(cmd) do |i, o| o.sync = true i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) { o.puts "\n" o.flush } while (i.eof? == false) res = i.gets print res unless res.index('written').nil? return res.match(/\/[\a-zA-Z0-9_]+.pdf/).to_s end end end end
fetchFromPdfPath(path, work_dir=true, use_dir=true)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 106 def self.fetchFromPdfPath(path, work_dir=true, use_dir=true) executed_pdf = convertSingleColPdf(path) references = fetchReference(executed_pdf) if use_dir || use_dir.nil? removeDir(@job_id, work_dir) else removeFile(@job_id, work_dir) end return references end
Also aliased as: parse
fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 117 def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true) @job_id = makeId makeDir(@job_id, work_dir) if use_dir || use_dir.nil? file_name = makeFile(@job_id, work_dir, use_dir) fetchPdfFile(pdfUrl, file_name) return self.parse(file_name, work_dir, use_dir) end
fetchPdfFile(pdfUrl, file_name)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 46 def self.fetchPdfFile(pdfUrl, file_name) open(file_name, 'wb') do |o| open(pdfUrl) do |data| o.write(data.read) end end end
fetchReference(file_name)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 72 def self.fetchReference(file_name) puts file_name reader = PDF::Reader.new(file_name) page_no = reader. pages. reject { |i| i.text.index(REFERENCE_START_REGEXP).nil? }. map(&:number). sort. shift ref_page = reader. pages. select { |i| i.number >= page_no }. map { |i| i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '') } ref_page.shift ref_page = ref_page. join(' '). gsub(REFERENCE_REGEXP, "\n\\1") ref_page = ref_page. split(/\n *\n/). map { |i| i.gsub("\n", '') }. select { |i| i.length > 15 } return ref_page end
getK2Pdf(id, work_dir, use_dir)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 33 def self.getK2Pdf(id, work_dir, use_dir) if use_dir return "#{work_dir}/#{id}/output_k2opt.pdf" else return "#{work_dir}/#{id}-output_k2opt.pdf" end end
makeDir(id, work_dir)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 17 def self.makeDir(id, work_dir) Dir.mkdir("#{work_dir}/#{id}") end
makeFile(id, work_dir, use_dir)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 25 def self.makeFile(id, work_dir, use_dir) if use_dir || use_dir.nil? return "#{work_dir}/#{id}/output.pdf" else return "#{work_dir}/#{id}-output.pdf" end end
makeId()
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 13 def self.makeId return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S') end
removeDir(id, work_dir)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 21 def self.removeDir(id, work_dir) FileUtils.rm_rf("#{work_dir}/#{id}") end
removeFile(id, work_dir)
click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 41 def self.removeFile(id, work_dir) File.delete("#{work_dir}/#{id}-output.pdf") File.delete("#{work_dir}/#{id}-output_k2opt.pdf") end