class HexaPDF::Font::TrueType::Table::CmapSubtable

Generic base class for all cmap subtables.

cmap format 8.0 is currently not implemented because use of the format is discouraged in the specification and no font with a format 8.0 cmap subtable was available for testing.

The preferred cmap format is 12.0 because it supports all of Unicode and allows for fast and memory efficient code-to-gid as well as gid-to-code mappings.

See:

Constants

PLATFORM_MICROSOFT

The platform identifier for Microsoft.

PLATFORM_UNICODE

The platform identifier for Unicode.

Attributes

code_map[RW]

The complete code map.

Is only fully initialized for existing fonts when a mapping is first accessed via [].

encoding_id[RW]

The platform-specific encoding identifier.

format[R]

The cmap format or nil if the subtable wasn't read from a file.

gid_map[RW]

The complete gid map.

Is only fully initialized for existing fonts when a mapping is first accessed via gid_to_code.

language[RW]

The language code.

platform_id[RW]

The platform identifier.

Public Class Methods

new(platform_id, encoding_id) click to toggle source

Creates a new subtable.

# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 86
def initialize(platform_id, encoding_id)
  @platform_id = platform_id
  @encoding_id = encoding_id
  @supported = true
  @code_map = {}
  @gid_map = {}
  @format = nil
  @language = 0
end

Public Instance Methods

[](code) click to toggle source

Returns the glyph index for the given character code or nil if the character code is not mapped.

# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 104
def [](code)
  @code_map[code]
end
gid_to_code(gid) click to toggle source

Returns a character code for the given glyph index or nil if the given glyph index does not exist or is not mapped to a character code.

Note that some fonts map multiple character codes to the same glyph (e.g. hyphen and minus), i.e. the code-to-glyph mapping is surjective but not injective! In such a case one of the available character codes is returned.

# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 114
def gid_to_code(gid)
  @gid_map[gid]
end
parse!(io, offset) → true or false click to toggle source

Parses the cmap subtable from the IO at the given offset.

If the subtable format is supported, the information is used to populate this object and true is returned. Otherwise nothing is done and false is returned.

# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 125
def parse(io, offset)
  io.pos = offset
  @format = io.read(2).unpack1('n')
  if [8, 10, 12].include?(@format)
    io.pos += 2
    length, @language = io.read(8).unpack('N2')
  elsif [0, 2, 4, 6].include?(@format)
    length, @language = io.read(4).unpack('n2')
  end

  return false unless [0, 2, 4, 6, 10, 12].include?(@format)
  offset = io.pos
  @code_map = lambda do |code|
    parse_mapping(io, offset, length)
    @code_map[code]
  end
  @gid_map = lambda do |gid|
    parse_mapping(io, offset, length)
    @gid_map[gid]
  end
  true
end
unicode?() click to toggle source

Returns true if this subtable contains a Unicode cmap.

# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 97
def unicode?
  (platform_id == PLATFORM_MICROSOFT && (encoding_id == 1 || encoding_id == 10)) ||
    platform_id == PLATFORM_UNICODE
end

Private Instance Methods

parse_mapping(io, offset, length) click to toggle source
# File lib/hexapdf/font/true_type/table/cmap_subtable.rb, line 148
def parse_mapping(io, offset, length)
  io.pos = offset
  @code_map, @gid_map = case @format
                        when 0 then Format0.parse(io, length)
                        when 2 then Format2.parse(io, length)
                        when 4 then Format4.parse(io, length)
                        when 6 then Format6.parse(io, length)
                        when 10 then Format10.parse(io, length)
                        when 12 then Format12.parse(io, length)
                        end
end