class OnlyofficePdfParser::PdfStructure

Class for working and parsing PDF files

Constants

PAGE_SIZE_FOR_PDF

@return [Hash] list of default page size and names

Attributes

file_path[RW]

@return [String] full path to file

pages[RW]

@return [Array, Pages] array of pages

pages_in_bmp[R]

@return [Array<String>] bin representation of bmps

Public Class Methods

new(pages: [], file_path: nil) click to toggle source
# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 21
def initialize(pages: [], file_path: nil)
  @file_path = file_path
  @pages = pages
  @pages_in_bmp = []
end
parse(filename) click to toggle source

Parse file @param filename [String] path to file @return [PdfStructure] result of parse

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 94
def self.parse(filename)
  file = PdfStructure.new(pages: [], file_path: filename)
  file.pdf_reader_parse
  file.fetch_bmp_binary
  file.page_size
  file
end

Public Instance Methods

[](parameter) click to toggle source

Accessor of attributes like hash @param parameter [Symbol] attribute name @return [Object] value of attribute

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 30
def [](parameter)
  case parameter
  when :pages
    @pages
  when :page_size
    @page_size
  else
    raise "Unknown instance variable - #{parameter}."
  end
end
contain_pattern?(path_to_patter) click to toggle source

@return [True, false] Check if pdf file contains graphic pattern

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 42
def contain_pattern?(path_to_patter)
  pages_in_bmp.each do |current_page|
    bmp = BmpImage.new(current_page)
    array = bmp.get_sub_image_array(path_to_patter)
    return true unless array.empty?
  end
  false
end
page_size() click to toggle source

@return [String, nil] name of page size

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 71
def page_size
  @page_size = PAGE_SIZE_FOR_PDF.key(page_size_points)
  @page_size ||= "Landscape #{PAGE_SIZE_FOR_PDF.key(page_size_points.reverse)}"
end
page_size_points() click to toggle source

@return [Array <Integer>] page size of pdf in points

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 62
def page_size_points
  return @page_size_points if @page_size_points

  pdfinfo = `pdfinfo "#{@file_path}"`
  page_size_fraction = pdfinfo.split('Page size:')[1].split('pts').first.strip.split(', ').first.split(' x ')
  @page_size_points = page_size_fraction.map { |size| size.to_f.round }
end
pdf_reader_parse() click to toggle source

Parse file using `pdf-reader` gem

# File lib/onlyoffice_pdf_parser/pdf_structure.rb, line 52
def pdf_reader_parse
  PDF::Reader.open(file_path.to_s) do |reader|
    reader.pages.each do |page|
      @pages << { text: page.text,
                  fonts: parse_font(page) }
    end
  end
end