class Diarize::Audio
Attributes
file[R]
path[R]
uri[R]
Public Class Methods
new(uri_url_or_file_name)
click to toggle source
# File lib/diarize/audio.rb, line 5 def initialize(uri_url_or_file_name) if uri_url_or_file_name.is_a?(URI) @uri = uri_url_or_file_name elsif uri_url_or_file_name.is_a?(String) # url or file name @uri = URI.parse(uri_url_or_file_name) if @uri.scheme && @uri.scheme.match(/^(http|https|file)$/) # url or file:/// uri, do nothing else @uri = URI.join('file:///', File.join(File.expand_path(uri_url_or_file_name))) end end if @uri.scheme == 'file' @path = uri.path else # remote file, we download it locally @path = '/tmp/' + Digest::MD5.hexdigest(@uri.to_s) File.open(@path, "wb") {|f| f << open(@uri, {ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE}).read} end raise "Unable to locate '#{@path}' from '#{@uri.inspect}'." unless File.exist?(@path) @file = File.new(@path) end
Public Instance Methods
analyze!(train_speaker_models = true)
click to toggle source
# File lib/diarize/audio.rb, line 31 def analyze!(train_speaker_models = true) # parameter = fr.lium.spkDiarization.parameter.Parameter.new parameter = Rjb::import('fr.lium.spkDiarization.parameter.Parameter').new parameter.show = show # 12 MFCC + Energy # 1: static coefficients are present in the file # 1: energy coefficient is present in the file # 0: delta coefficients are not present in the file # 0: delta energy coefficient is not present in the file # 0: delta delta coefficients are not present in the file # 0: delta delta energy coefficient is not present in the file # 13: total size of a feature vector in the mfcc file # 0:0:0: no feature normalization parameter.parameterInputFeature.setFeaturesDescription('audio2sphinx,1:1:0:0:0:0,13,0:0:0:0') #parameter.parameterDiarization.cEClustering = true # We use CE clustering by default parameter.parameterInputFeature.setFeatureMask(@path) @clusters = ester2(parameter) @segments = Segmentation.from_clusters(self, @clusters).sort_by(&:start) train_speaker_gmms if train_speaker_models end
base_uri()
click to toggle source
# File lib/diarize/audio.rb, line 97 def base_uri # Remove the fragment if there is one base = uri.clone base.fragment = nil base end
clean!()
click to toggle source
# File lib/diarize/audio.rb, line 52 def clean! return if @uri.scheme == 'file' # Don't delete local file if initialised from local URI File.delete(@path) end
duration_by_speaker(speaker)
click to toggle source
# File lib/diarize/audio.rb, line 71 def duration_by_speaker(speaker) return unless speaker segments = segments_by_speaker(speaker) duration = 0.0 segments.each {|segment| duration += segment.duration} duration end
namespaces()
click to toggle source
Calls superclass method
# File lib/diarize/audio.rb, line 85 def namespaces super.merge 'ws' => 'http://wsarchive.prototype0.net/ontology/', 'mo' => 'http://purl.org/ontology/mo/' end
rdf_mapping()
click to toggle source
# File lib/diarize/audio.rb, line 112 def rdf_mapping { 'ws:segment' => segments } end
segments()
click to toggle source
# File lib/diarize/audio.rb, line 57 def segments raise RuntimeError, "You need to run analyze! before being able to access the analysis results" unless @segments @segments end
segments_by_speaker(speaker)
click to toggle source
# File lib/diarize/audio.rb, line 67 def segments_by_speaker(speaker) segments.select {|segment| segment.speaker == speaker} end
show()
click to toggle source
# File lib/diarize/audio.rb, line 116 def show # The LIUM show name will be the file name, without extension or directory File.expand_path(@path).split('/')[-1].split('.')[0] end
speakers()
click to toggle source
# File lib/diarize/audio.rb, line 62 def speakers return @speakers if @speakers @speakers = segments.map {|segment| segment.speaker}.uniq end
top_speakers()
click to toggle source
# File lib/diarize/audio.rb, line 79 def top_speakers speakers.sort {|s1, s2| duration_by_speaker(s1) <=> duration_by_speaker(s2)}.reverse end
type_uri()
click to toggle source
# File lib/diarize/audio.rb, line 104 def type_uri @type_uri || 'mo:AudioFile' end
type_uri=(type_uri)
click to toggle source
# File lib/diarize/audio.rb, line 108 def type_uri=(type_uri) @type_uri = type_uri end
uri=(uri)
click to toggle source
# File lib/diarize/audio.rb, line 93 def uri=(uri) @uri = uri end
Protected Instance Methods
ester2(parameter)
click to toggle source
# File lib/diarize/audio.rb, line 162 def ester2(parameter) # diarization = fr.lium.spkDiarization.system.Diarization.new diarization = Rjb::import('fr.lium.spkDiarization.system.Diarization').new parameterDiarization = parameter.parameterDiarization # clusterSet = diarization.initialize__method(parameter) clusterSet = diarization.initialize(parameter) # featureSet = fr.lium.spkDiarization.system.Diarization.load_feature(parameter, clusterSet, parameter.parameterInputFeature.getFeaturesDescString()) featureSet = Rjb::import('fr.lium.spkDiarization.system.Diarization').load_feature(parameter, clusterSet, parameter.parameterInputFeature.getFeaturesDescString()) featureSet.setCurrentShow(parameter.show) nbFeatures = featureSet.getNumberOfFeatures clusterSet.getFirstCluster().firstSegment().setLength(nbFeatures) unless parameter.parameterDiarization.isLoadInputSegmentation clustersSegInit = diarization.sanityCheck(clusterSet, featureSet, parameter) clustersSeg = diarization.segmentation("GLR", "FULL", clustersSegInit, featureSet, parameter) clustersLClust = diarization.clusteringLinear(parameterDiarization.getThreshold("l"), clustersSeg, featureSet, parameter) clustersHClust = diarization.clustering(parameterDiarization.getThreshold("h"), clustersLClust, featureSet, parameter) clustersDClust = diarization.decode(8, parameterDiarization.getThreshold("d"), clustersHClust, featureSet, parameter) clustersSplitClust = diarization.speech("10,10,50", clusterSet, clustersSegInit, clustersDClust, featureSet, parameter) clusters = diarization.gender(clusterSet, clustersSplitClust, featureSet, parameter) if parameter.parameterDiarization.isCEClustering # If true, the program computes the NCLR/CE clustering at the end. # The diarization error rate is minimized. # If this option is not set, the program stops right after the detection of the gender # and the resulting segmentation is sufficient for a transcription system. clusters = diarization.speakerClustering(parameterDiarization.getThreshold("c"), "ce", clusterSet, clusters, featureSet, parameter) end Rjb::JavaObjectWrapper.new(clusters) end
train_speaker_gmms()
click to toggle source
# File lib/diarize/audio.rb, line 123 def train_speaker_gmms segments # Making sure we have pre-computed segments and clusters # Would be nice to reuse GMMs computed as part of the segmentation process # but not sure how to access them without changing the Java API # Start by copying models from the universal background model, one per speaker, using MTrainInit # parameter = fr.lium.spkDiarization.parameter.Parameter.new parameter = Rjb::import("fr.lium.spkDiarization.parameter.Parameter").new parameter.parameterInputFeature.setFeaturesDescription('audio2sphinx,1:3:2:0:0:0,13,1:1:300:4') parameter.parameterInputFeature.setFeatureMask(@path) parameter.parameterInitializationEM.setModelInitMethod('copy') parameter.parameterModelSetInputFile.setMask(File.join(File.expand_path(File.dirname(__FILE__)), 'ubm.gmm')) # features = fr.lium.spkDiarization.lib.MainTools.readFeatureSet(parameter, @clusters) features = Rjb::import("fr.lium.spkDiarization.lib.MainTools").readFeatureSet(parameter, @clusters.java_object) # init_vect = java.util.ArrayList.new(@clusters.cluster_get_size) init_vect = Rjb::JavaObjectWrapper.new("java.util.ArrayList", @clusters.java_object.cluster_get_size) # fr.lium.spkDiarization.programs.MTrainInit.make(features, @clusters, init_vect, parameter) Rjb::import("fr.lium.spkDiarization.programs.MTrainInit").make(features, @clusters.java_object, init_vect.java_object, parameter) # Adapt models to individual speakers detected in the audio, using MTrainMap # parameter = fr.lium.spkDiarization.parameter.Parameter.new parameter = Rjb::import("fr.lium.spkDiarization.parameter.Parameter").new parameter.parameterInputFeature.setFeaturesDescription('audio2sphinx,1:3:2:0:0:0,13,1:1:300:4') parameter.parameterInputFeature.setFeatureMask(@path) parameter.parameterEM.setEMControl('1,5,0.01') parameter.parameterVarianceControl.setVarianceControl('0.01,10.0') parameter.show = show features.setCurrentShow(parameter.show) # gmm_vect = java.util.ArrayList.new gmm_vect = Rjb::JavaObjectWrapper.new("java.util.ArrayList") # fr.lium.spkDiarization.programs.MTrainMAP.make(features, @clusters, init_vect, gmm_vect, parameter) Rjb::import("fr.lium.spkDiarization.programs.MTrainMAP").make(features, @clusters.java_object, init_vect.java_object, gmm_vect.java_object, parameter) # Populating the speakers with their GMMs gmm_vect.each_with_index do |speaker_model, i| speakers[i].model = speaker_model.java_object end end