class SequenceServer::Sequence::Retriever
Retrieve sequences from BLAST
databases.
Attributes
database_ids[R]
in_file[R]
sequence_ids[R]
sequences[R]
Public Class Methods
new(sequence_ids, database_ids, in_file = false)
click to toggle source
# File lib/sequenceserver/sequence.rb, line 178 def initialize(sequence_ids, database_ids, in_file = false) @sequence_ids = Array sequence_ids @database_ids = Array database_ids @in_file = in_file validate && create_entry_batch_file && run end
Public Instance Methods
to_json(*_args)
click to toggle source
# File lib/sequenceserver/sequence.rb, line 188 def to_json(*_args) { error_msgs: error_msgs, sequences: sequences.map(&:info) }.to_json end
Private Instance Methods
create_entry_batch_file()
click to toggle source
Create a temporary file containing sequence ids to fetch.
# File lib/sequenceserver/sequence.rb, line 251 def create_entry_batch_file @batch_file = Tempfile.new("#{Time.now}_batch").tap do |f| f.write(sequence_ids.join("\n")) f.flush end end
database_names()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 215 def database_names Database[database_ids].map(&:name) end
database_titles()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 219 def database_titles Database[database_ids].map(&:title) end
error_msgs()
click to toggle source
rubocop:disable Metrics/MethodLength
# File lib/sequenceserver/sequence.rb, line 259 def error_msgs return [] if sequences.length == sequence_ids.length [ ['ERROR: incorrect number of sequences found.', <<~MSG You requested #{sequence_ids.length} sequence(s) with the following identifiers: #{sequence_ids.join(', ')} from the following databases: #{database_titles.join(', ')} but we found #{sequences.length} sequence(s). This is likley due to a problem with how databases are formatted. Please share this text with the person managing this website. If you are the admin and are confident that your databases are correctly formatted, you have likely encountered a weird bug. In this case, please raise an issue at: https://github.com/wurmlab/sequenceserver/issues If any sequences were retrieved, you can find them below (but some may be incorrect, so be careful!) MSG ] ] end
run()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 197 def run command = "blastdbcmd -outfmt '%g %i %a %t %s'" \ " -db '#{database_names.join(' ')}'" \ " -entry_batch '#{@batch_file.path}'" out, = sys(command, path: config[:bin]) @sequences = out.each_line.map do |line| # Stop codons in amino acid sequence databases show up as invalid # UTF-8 characters in the output and cause the subsequent call to # `split` to fail. We replace invalid UTF-8 characters with X. line = line.encode('UTF-8', invalid: :replace, replace: 'X') Sequence.new(*line.chomp.split(' ')) end @batch_file.unlink extend(IO) && write if in_file end
validate()
click to toggle source
# File lib/sequenceserver/sequence.rb, line 223 def validate ids = Database.ids unless database_ids.is_a?(Array) && !database_ids.empty? && (ids & database_ids).length == database_ids.length fail( DatabaseUnreachableError, "Database id should be one of: #{ids.join("\n")}" ) end invalid_sequence_ids = sequence_ids.reject do |id| id =~ SequenceServer::BLAST::VALID_SEQUENCE_ID end unless invalid_sequence_ids.empty? fail( InvalidSequenceIdError, "Invalid sequence id(s): #{invalid_sequence_ids.join(', ')}" ) end true end