class P3

Constants

REFERENCE_REGEXP
REFERENCE_START_REGEXP

Public Class Methods

convertSingleColPdf(file_name) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 54
def self.convertSingleColPdf(file_name)
  cmd = "k2pdfopt -dev kpw #{file_name}"
  PTY.spawn(cmd) do |i, o|
    o.sync = true
    i.expect(/\S.*Enter option above \(h=help, q=quit\):/, 10) {
      o.puts "\n"
      o.flush
    }
    while (i.eof? == false)
      res = i.gets
      print res
      unless res.index('written').nil?
        return res.match(/\/[\a-zA-Z0-9_]+.pdf/).to_s
      end
    end
  end
end
fetchFromPdfPath(path, work_dir=true, use_dir=true) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 106
def self.fetchFromPdfPath(path, work_dir=true, use_dir=true)
  executed_pdf = convertSingleColPdf(path)
  references = fetchReference(executed_pdf)
  if use_dir || use_dir.nil?
    removeDir(@job_id, work_dir)
  else
    removeFile(@job_id, work_dir)
  end
  return references
end
Also aliased as: parse
fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 117
def self.fetchFromPdfUrl(pdfUrl, work_dir=true, use_dir=true)
  @job_id = makeId
  makeDir(@job_id, work_dir) if use_dir || use_dir.nil?
  file_name = makeFile(@job_id, work_dir, use_dir)
  fetchPdfFile(pdfUrl, file_name)
  return self.parse(file_name, work_dir, use_dir)
end
fetchPdfFile(pdfUrl, file_name) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 46
def self.fetchPdfFile(pdfUrl, file_name)
  open(file_name, 'wb') do |o|
    open(pdfUrl) do |data|
      o.write(data.read)
    end
  end
end
fetchReference(file_name) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 72
def self.fetchReference(file_name)
  puts file_name
  reader = PDF::Reader.new(file_name)
  page_no = reader.
      pages.
      reject { |i|
    i.text.index(REFERENCE_START_REGEXP).nil?
  }.
      map(&:number).
      sort.
      shift

  ref_page = reader.
      pages.
      select { |i|
    i.number >= page_no
  }.
      map { |i|
    i.text.gsub(/\n\n+/, "\n").gsub(/ +/, ' ').gsub(/-\n +/, '')
  }

  ref_page.shift

  ref_page = ref_page.
      join(' ').
      gsub(REFERENCE_REGEXP, "\n\\1")

  ref_page = ref_page.
      split(/\n *\n/).
      map { |i| i.gsub("\n", '') }.
      select { |i| i.length > 15 }
  return ref_page
end
getK2Pdf(id, work_dir, use_dir) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 33
def self.getK2Pdf(id, work_dir, use_dir)
  if use_dir
    return "#{work_dir}/#{id}/output_k2opt.pdf"
  else
    return "#{work_dir}/#{id}-output_k2opt.pdf"
  end
end
makeDir(id, work_dir) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 17
def self.makeDir(id, work_dir)
  Dir.mkdir("#{work_dir}/#{id}")
end
makeFile(id, work_dir, use_dir) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 25
def self.makeFile(id, work_dir, use_dir)
  if use_dir || use_dir.nil?
    return "#{work_dir}/#{id}/output.pdf"
  else
    return "#{work_dir}/#{id}-output.pdf"
  end
end
makeId() click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 13
def self.makeId
  return Digest::SHA256.hexdigest Time.now.strftime('%F %H:%M:%S')
end
parse(path, work_dir=true, use_dir=true)
Alias for: fetchFromPdfPath
removeDir(id, work_dir) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 21
def self.removeDir(id, work_dir)
  FileUtils.rm_rf("#{work_dir}/#{id}")
end
removeFile(id, work_dir) click to toggle source
# File lib/paper/pdf/parser/P3.rb, line 41
def self.removeFile(id, work_dir)
  File.delete("#{work_dir}/#{id}-output.pdf")
  File.delete("#{work_dir}/#{id}-output_k2opt.pdf")
end