class CDMDEXER::Transformer
Attributes
cache_klass[R]
cdm_records[R]
field_mappings[R]
oai_endpoint[R]
oai_request_klass[R]
record_transformer[R]
Public Class Methods
new(cdm_records: [], oai_endpoint: :MISSING_OAI_ENDPOINT, field_mappings: false, record_transformer: RecordTransformer, cache_klass: ::Rails, oai_request_klass: OaiRequest)
click to toggle source
# File lib/cdmdexer/transformer.rb, line 13 def initialize(cdm_records: [], oai_endpoint: :MISSING_OAI_ENDPOINT, field_mappings: false, record_transformer: RecordTransformer, cache_klass: ::Rails, oai_request_klass: OaiRequest) @cdm_records = cdm_records @oai_endpoint = oai_endpoint @field_mappings = field_mappings ? field_mappings : default_field_mappings @record_transformer = record_transformer @cache_klass = cache_klass @oai_request_klass = oai_request_klass end
Public Instance Methods
records()
click to toggle source
# File lib/cdmdexer/transformer.rb, line 27 def records cdm_records.map { |record| to_solr(record) }.compact end
Private Instance Methods
default_field_mappings()
click to toggle source
# File lib/cdmdexer/transformer.rb, line 55 def default_field_mappings [ {dest_path: 'location_llsi', origin_path: '/', formatters: [LocationFormatter]}, {dest_path: 'id', origin_path: 'id', formatters: [StripFormatter]}, {dest_path: 'setspec_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, SetSpecFormatter]}, {dest_path: 'collection_name_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]}, {dest_path: 'collection_name_tei', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]}, {dest_path: 'collection_description_tesi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionDescriptionFormatter, FilterBadCollections]}, {dest_path: 'parent_collection_name_ssi', origin_path: 'par', formatters: [StripFormatter]}, {dest_path: 'parent_collection_name_tei', origin_path: 'par', formatters: [StripFormatter]}, {dest_path: 'parent_collection_description_tei', origin_path: 'par', formatters: [StripFormatter]}, {dest_path: 'title_tesi', origin_path: 'title', formatters: [StripFormatter]}, {dest_path: 'title_ssi', origin_path: 'title', formatters: [StripFormatter]}, {dest_path: 'title_sort', origin_path: 'title', formatters: [StripFormatter]}, {dest_path: 'title_unstem_search', origin_path: 'title', formatters: [StripFormatter]}, {dest_path: 'contributor_teim', origin_path: 'contri', formatters: [StripFormatter]}, {dest_path: 'contributor_unstem_search', origin_path: 'contri', formatters: [StripFormatter]}, {dest_path: 'contributor_ssim', origin_path: 'contri', formatters: [SplitFormatter, StripFormatter]}, {dest_path: 'creator_tesi', origin_path: 'photog', formatters: [JoinFormatter, StripFormatter]}, {dest_path: 'creator_unstem_search', origin_path: 'photog', formatters: [StripFormatter]}, {dest_path: 'creator_ssim', origin_path: 'photog', formatters: [SplitFormatter, StripFormatter]}, {dest_path: 'creator_sort', origin_path: 'photog', formatters: [StripFormatter]}, {dest_path: 'description_tei', origin_path: 'descri', formatters: [StripFormatter]}, {dest_path: 'description_ts', origin_path: 'descri', formatters: [StripFormatter]}, {dest_path: 'dat_ssi', origin_path: 'dat', formatters: [StripFormatter]}, {dest_path: 'dat_tesi', origin_path: 'dat', formatters: [StripFormatter]}, {dest_path: 'dat_sort', origin_path: 'dat', formatters: [StripFormatter]}, {dest_path: 'publishing_agency_tei', origin_path: 'publia', formatters: [StripFormatter]}, {dest_path: 'publishing_agency_unstem_search', origin_path: 'publia', formatters: [StripFormatter]}, {dest_path: 'publishing_agency_ssi', origin_path: 'publia', formatters: [StripFormatter]}, {dest_path: 'dimensions_ssi', origin_path: 'dimens', formatters: [StripFormatter]}, {dest_path: 'topic_teim', origin_path: 'genera', formatters: [StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'topic_ssim', origin_path: 'genera', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'topic_unstem_search', origin_path: 'genera', formatters: [StripSemicolonFormatter, StripFormatter]}, {dest_path: 'type_ssi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]}, {dest_path: 'type_tesi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]}, {dest_path: 'physical_format_ssi', origin_path: 'physic', formatters: [StripSemicolonFormatter]}, {dest_path: 'physical_format_tesi', origin_path: 'physic', formatters: [StripSemicolonFormatter]}, {dest_path: 'formal_subject_unstem_search', origin_path: 'specif', formatters: [StripFormatter]}, {dest_path: 'formal_subject_ssim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'formal_subject_teim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'subject_unstem_search', origin_path: 'subjec', formatters: [StripFormatter]}, {dest_path: 'subject_teim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'subject_ssim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'keyword_unstem_search', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]}, {dest_path: 'keyword_tesi', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, JoinFormatter, StripFormatter]}, {dest_path: 'keyword_ssim', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]}, {dest_path: 'city_ssim', origin_path: 'city', formatters: [StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'city_unstem_search', origin_path: 'city', formatters: [StripFormatter]}, {dest_path: 'district_ssi', origin_path: 'distri', formatters: [StripFormatter]}, {dest_path: 'district_unstem_search', origin_path: 'distri', formatters: [StripFormatter]}, {dest_path: 'county_ssim', origin_path: 'county', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'county_unstem_search', origin_path: 'county', formatters: [StripFormatter]}, {dest_path: 'state_ssi', origin_path: 'state', formatters: [StripFormatter]}, {dest_path: 'state_unstem_search', origin_path: 'state', formatters: [StripFormatter]}, {dest_path: 'country_ssi', origin_path: 'countr', formatters: [StripFormatter]}, {dest_path: 'country_unstem_search', origin_path: 'countr', formatters: [StripFormatter]}, {dest_path: 'language_ssi', origin_path: 'langua', formatters: [StripFormatter]}, {dest_path: 'language_unstem_search', origin_path: 'langua', formatters: [StripFormatter]}, {dest_path: 'contributing_unstem_search', origin_path: 'contra', formatters: [StripFormatter]}, {dest_path: 'contributing_organization_tesi', origin_path: 'contra', formatters: [StripFormatter]}, {dest_path: 'contributing_organization_ssi', origin_path: 'contra', formatters: [Titlieze, StripFormatter]}, {dest_path: 'contact_information_ssi', origin_path: 'contac', formatters: [StripFormatter]}, {dest_path: 'rights_ssi', origin_path: 'righta', formatters: [StripFormatter]}, {dest_path: 'local_identifier_ssi', origin_path: 'identi', formatters: [StripFormatter]}, {dest_path: 'identifier_ssi', origin_path: 'resour', formatters: [StripFormatter]}, {dest_path: 'project_ssi', origin_path: 'projec', formatters: [StripFormatter]}, {dest_path: 'fiscal_sponsor_ssi', origin_path: 'fiscal', formatters: [StripFormatter]}, {dest_path: 'publisher_ssi', origin_path: 'publis', formatters: [StripFormatter]}, {dest_path: 'date_ssi', origin_path: 'date', formatters: [StripFormatter]}, {dest_path: 'format_tesi', origin_path: 'format', formatters: [StripFormatter]}, {dest_path: 'digspa_ssi', origin_path: 'digspa'}, {dest_path: 'digspb_ssi', origin_path: 'digspb'}, {dest_path: 'digspc_ssi', origin_path: 'digspc'}, {dest_path: 'digspd_ssi', origin_path: 'digspd'}, {dest_path: 'digspe_ssi', origin_path: 'digspe'}, {dest_path: 'digspf_ssi', origin_path: 'digspf'}, {dest_path: 'digspg_ssi', origin_path: 'digspg'}, {dest_path: 'digsph_ssi', origin_path: 'digsph'}, {dest_path: 'digspi_ssi', origin_path: 'digspi'}, {dest_path: 'digspj_ssi', origin_path: 'digspj'}, {dest_path: 'digspk_ssi', origin_path: 'digspk'}, {dest_path: 'transcription_tesi', origin_path: 'transc', formatters: [StripFormatter]}, {dest_path: 'translation_tesi', origin_path: 'transl', formatters: [StripFormatter]}, {dest_path: 'fullrs_tesi', origin_path: 'fullrs', formatters: [StripFormatter]}, {dest_path: 'find_ssi', origin_path: 'find', formatters: [StripFormatter]}, {dest_path: 'dmaccess_ssi', origin_path: 'dmaccess', formatters: [StripFormatter]}, {dest_path: 'dmimage_ssi', origin_path: 'dmimage', formatters: [StripFormatter]}, {dest_path: 'dmcreated_ssi', origin_path: 'dmcreated', formatters: [StripFormatter]}, {dest_path: 'dmmodified_ssi', origin_path: 'dmmodified', formatters: [StripFormatter]}, {dest_path: 'dmoclcno_ssi', origin_path: 'dmoclcno', formatters: [StripFormatter]}, {dest_path: 'restriction_code_ssi', origin_path: 'restrictionCode', formatters: [StripFormatter]}, {dest_path: 'cdmfilesize_ssi', origin_path: 'cdmfilesize', formatters: [StripFormatter]}, {dest_path: 'cdmfilesizeformatted_ssi', origin_path: 'cdmfilesizeformatted', formatters: [StripFormatter]}, {dest_path: 'cdmprintpdf_is', origin_path: 'cdmprintpdf', formatters: [ToIFormatter]}, {dest_path: 'cdmhasocr_is', origin_path: 'cdmhasocr', formatters: [ToIFormatter]}, {dest_path: 'cdmisnewspaper_is', origin_path: 'cdmisnewspaper', formatters: [ToIFormatter]}, {dest_path: 'image_uri_ssi', origin_path: 'image_uri', formatters: [StripFormatter]}, {dest_path: 'record_type_ssi', origin_path: 'record_type', formatters: [StripFormatter]}, {dest_path: 'geographic_feature_ssim', origin_path: 'geogra', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'geographic_feature_teim', origin_path: 'geogra', formatters: [StripFormatter]}, {dest_path: 'geographic_feature_unstem_search', origin_path: 'geogra', formatters: [StripFormatter]}, {dest_path: 'geonam_ssi', origin_path: 'geonam', formatters: [StripFormatter]}, {dest_path: 'kaltura_audio_ssi', origin_path: 'audio', formatters: [StripFormatter]}, {dest_path: 'kaltura_audio_playlist_ssi', origin_path: 'audioa', formatters: [StripFormatter]}, {dest_path: 'kaltura_video_ssi', origin_path: 'video', formatters: [StripFormatter]}, {dest_path: 'kaltura_video_playlist_ssi', origin_path: 'videoa', formatters: [StripFormatter]}, {dest_path: 'coordinates_llsi', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToLocation]}, {dest_path: 'placename_ssim', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]}, {dest_path: 'placename_unstem_search', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]}, {dest_path: 'table_ssim', origin_path: 'table', formatters: [StripFormatter, SplitFormatter, StripFormatter]}, {dest_path: 'umedia_ssi', origin_path: 'umedia', formatters: [StripFormatter]}, {dest_path: 'child_index', origin_path: 'child_index', formatters: []} ] end
mappings()
click to toggle source
# File lib/cdmdexer/transformer.rb, line 40 def mappings field_mappings.map { |config| FieldMapping.new(config: config) } end
sets()
click to toggle source
# File lib/cdmdexer/transformer.rb, line 33 def sets @oai_request ||= cache_klass.cache.fetch("cdmdexer_sets", expires_in: 10.minutes) do oai_request_klass.new(endpoint_url: oai_endpoint).set_lookup end end
to_solr(record)
click to toggle source
# File lib/cdmdexer/transformer.rb, line 44 def to_solr(record) # Remove empty records (move this behavior to the CONTENTdm API gem) and # bail early on the transformation process if {'id' => record['id']} == record return nil else record_transformer.new(record: record.merge('oai_sets' => sets), field_mappings: mappings).transform! end end