class ManMerger

CHANGELOG

2173 Master file: change sp16,17,18 to *_rev

Constants

EPOCH_LENGTH

T_DRIVE_DIR = “/home/pwm4/Windows/tdrive/IPM/Modafinil_FD_42.85h/”

LIST_DIR
T_DRIVE_DIRS

Public Instance Methods

get_subject_year(file_list) click to toggle source
# File lib/man_merger.rb, line 279
def get_subject_year(file_list)
  years = file_list.map do |h|
    matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(h[:pattern])
    matched_date ? matched_date[3] : nil
  end
  years.delete_if {|x| x.nil? }
  years = years.uniq

  raise StandardError, "More than one unique year found in files: #{years}" if years.length > 1
  year = years.first.to_i
  year > 30 ? year + 1900 : year + 2000
end
load_subject_list() click to toggle source
# File lib/man_merger.rb, line 213
def load_subject_list
  subject_info = {}
  Dir.foreach(LIST_DIR) do |file|
    next if file == '.' or file == '..'
    #MY_LOG.info "#{file}"
    csv_file = CSV.open("#{LIST_DIR}#{file}", {headers: true})

    # Match and Validate File Name
    matched_sc = /(.*)SLEEP\.csv/i.match(File.basename(csv_file.path))
    if matched_sc
      subject_code = matched_sc[1].upcase
    else
      next
    end

    subject_info[subject_code] = []
    csv_file.each do |row|
      file_info = {}
      pattern = /(.*)\.man/i.match(row[0])

      matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[1])
      if matched_time
        file_info[:start_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
      else
        MY_LOG.error "No Valid Start Time Found: #{row}"
        next
      end

      matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(row[4])
      if matched_time
        file_info[:last_line_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
      else
        MY_LOG.error "No Valid End Time Found: #{row}"
        next
      end

      file_info[:start_labtime] = row[2].to_f
      file_info[:last_line_number] = row[3].to_i
      file_info[:last_line_labtime] = row[5].to_f

      if pattern
        file_info[:pattern] = pattern[1]
        subject_info[subject_code] << file_info

        # Determine if sleep or wake file
        raise StandardError, "CAN'T DETERMINE SP/WP (none match): #{pattern[1]}" unless (/_sp?\d/i.match(pattern[1]) or /_wp?\d/i.match(pattern[1]))
        raise StandardError, "CAN'T DETERMINE SP/WP (both match): #{pattern[1]}" if (/_sp?\d/i.match(pattern[1]) and /_wp?\d/i.match(pattern[1]))

        if /_sp?\d/i.match(pattern[1])
          file_info[:type] = :sleep
        elsif /_wp?\d/i.match(pattern[1])
          file_info[:type] = :wake
        else
          raise StandardError, "Didn't match any SP/WP..."
        end
      else
        MY_LOG.info "No Valid File Name Found: #{row}"
        next
      end
    end
    #MY_LOG.info subject_info[subject_code]
  end
  #MY_LOG.info subject_info.inspect
  subject_info
end
merge_files() click to toggle source
# File lib/man_merger.rb, line 12
  def merge_files
    subject_list = load_subject_list
    subject_list.each do |subject_code, file_list|
      merged_file = CSV.open("/usr/local/htdocs/access/lib/data/etl/klerman_merge_man_files/merged_files/#{subject_code}_merged.csv", "wb")
      merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
      MY_LOG.info "---- #{subject_code}"

      previous_first_labtime = nil
      previous_last_labtime = nil
      subject_year = get_subject_year(file_list)

      file_list.each do |file_hash|
        matched_files = Dir.glob("#{T_DRIVE_DIRS[0]}#{subject_code}/PSG/SCORED/**/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD)
        matched_files = Dir.glob("#{T_DRIVE_DIRS[1]}#{subject_code}/Sleep/#{file_hash[:pattern]}.man", File::FNM_CASEFOLD) if matched_files.length != 1

        ## Validate File List
        if matched_files.length != 1
          raise StandardError, "None or more than one matched file. #{file_hash[:pattern]} #{matched_files} #{matched_files.length} #{subject_code}"
        else
          man_file_path = matched_files[0]
        end

        man_file = File.open(man_file_path)
        LOADER_LOGGER.info "--- Loading #{man_file_path}"
        file_info = {}


        ## Ignore Corrupted Files
        #next if tasci_file_path == "/home/pwm4/Windows/tdrive/IPM/AFOSR9_Slp_Restrict//24B7GXT3/PSG/TASCI_SEM/24b7gxt3_082907_wp19ap1_PID_24B7GXT3_082907_WP19AP1_RID_0_SEM.TASCI"

        # Date from file name
        matched_date = /_(\d\d)(\d\d)(\d\d)_/.match(man_file_path)
        file_info[:fn_date] = (matched_date ? Time.zone.local((matched_date[3].to_i > 30 ? matched_date[3].to_i + 1900 : matched_date[3].to_i + 2000), matched_date[1].to_i, matched_date[2].to_i) : nil)

        # read file
        lines = man_file.readlines("\r")
        # delete possible empty last line
        lines.pop if lines.last.blank?

        # get file first and last times
        matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.first)
        file_info[:first_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}
        matched_time = /(\d\d):(\d\d):(\d\d):(\d\d\d)/.match(lines.last)
        file_info[:last_time] = {hour: matched_time[1].to_i, min: matched_time[2].to_i, sec: matched_time[3].to_i}

        # validate first/last times
        if file_hash[:start_time] != file_info[:first_time]
          MY_LOG.error "---- FIRST TIME MISMATCH ---\n#{man_file_path}\n#{file_hash[:start_time]} #{file_info[:first_time]}\n\n"
        end
        if file_hash[:last_line_time] != file_info[:last_time]
          MY_LOG.error "---- LAST TIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_time]} #{file_info[:last_time]}\n\n"
        end
        if file_hash[:last_line_number] != lines.length
          MY_LOG.error "---- LINE COUNT MISMATCH ----\n#{man_file_path}\n#{file_hash[:last_line_number]} #{lines.length}\n\n"
        end

        ##
        # VALIDATION
        file_hash[:start_labtime] = Labtime.from_decimal(file_hash[:start_labtime], subject_year)
        file_hash[:last_line_labtime] = Labtime.from_decimal(file_hash[:last_line_labtime], subject_year)

        start_realtime = file_hash[:start_labtime].to_time
        last_line_realtime = file_hash[:last_line_labtime].to_time

        first_realtime = file_hash[:start_labtime].time_zone.local(start_realtime.year, start_realtime.month, start_realtime.day, file_info[:first_time][:hour], file_info[:first_time][:min], file_info[:first_time][:sec])
        last_realtime = file_hash[:last_line_labtime].time_zone.local(last_line_realtime.year, last_line_realtime.month, last_line_realtime.day, file_info[:last_time][:hour], file_info[:last_time][:min], file_info[:last_time][:sec])

        file_info[:first_labtime] = Labtime.parse(first_realtime)
        file_info[:last_labtime] = Labtime.parse(last_realtime)
        predicted_last_labtime = Labtime.parse(file_info[:first_labtime].to_time + ((lines.length - 1) * 30).seconds)

        sep = false
        if (file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds).abs > 2
          MY_LOG.error "---- FIRST LABTIME MISMATCH ----\n#{man_file_path}\n#{file_hash[:start_labtime].time_in_seconds - file_info[:first_labtime].time_in_seconds} | #{file_hash[:start_labtime].to_time}\n#{file_hash[:start_labtime]} | #{file_info[:first_labtime]}\n"
          sep = true
        end

        # These checks fail if DST TRANSITION HAPPENS
        if last_line_realtime.dst? == start_realtime.dst?
          if (file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds).abs > 2
            MY_LOG.error "---- LAST LABTIME MISMATCH  ----\n#{man_file_path}\n#{file_hash[:last_line_labtime].time_in_seconds - file_info[:last_labtime].time_in_seconds} | #{file_hash[:last_line_labtime].to_time}\n#{file_hash[:last_line_labtime]} | #{file_info[:last_labtime]}\n"
            sep = true
          end
          if (file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 0
            MY_LOG.error "---- PRED LABTIME MISMATCH  ----\n#{man_file_path}\n#{(file_info[:last_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_labtime]} | #{predicted_last_labtime}\n"
            sep = true
          end
        end

        if (file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds).abs > 2
          MY_LOG.error "---- !PRED LABTIME MISMATCH ----\n#{man_file_path}\n#{(file_hash[:last_line_labtime].time_in_seconds - predicted_last_labtime.time_in_seconds)} | #{predicted_last_labtime.to_time}\nl: #{file_info[:last_line_labtime]} | #{predicted_last_labtime}\n"
          sep = true
        end

        unless previous_first_labtime.nil? or previous_last_labtime.nil?
          MY_LOG.error "Start time is before previous end labtime for #{man_file_path}" if file_info[:first_labtime] < previous_last_labtime
        end

        raise StandardError, "AHHHHH" if file_info[:first_labtime].sec != first_realtime.sec
        raise StandardError, "AHHHHH" if file_info[:last_labtime].sec != last_realtime.sec

        MY_LOG.info "-----------------------------------\n\n" if sep

        last_labtime = nil
        ibob_flag = 0

        lines.each_with_index do |line, line_number|
          #merged_file << %w(SUBJECT_CODE LABTIME SLEEP_STAGE SLEEP_PERIOD SEM_FLAG)
=begin
sleep man file:
0      undef/unscored
1      stage 1
2      stage 2
3      stage 3
4      stage 4
5      wake
6      REM
7      MVT
8      LOff and LOn

wake man file:
0      undef/un
cored
1      stage 1
2      stage 2
3      stage 3
4      stage 4
5      wake
6      REM
7      MVT
8      SEM
=end


          line_labtime = file_info[:first_labtime].add_seconds(EPOCH_LENGTH * line_number)
          line_code = /(\d)\s\d\d:\d\d:\d\d:\d\d\d/.match(line)[1].to_i

          # Sleep Period Coding:
          # 1      Sleep Onset (Lights Off) (IN BED)
          # 2      Sleep Offset (Lights On) (OUT OF BED)
          if file_hash[:type] == :sleep and line_code == 8
            if ibob_flag == 0
              sleep_period = 1
              ibob_flag = 1
            else
              sleep_period = 2
              ibob_flag = 0
            end
          else
            sleep_period = nil
          end

          # Sleep Stage Coding:
          # 1      stage 1
          # 2      stage 2
          # 3      stage 3
          # 4      stage 4
          # 6      MT
          # 7      Undef
          # 5      REM
          # 9      Wake
          if line_code >= 1 and line_code <= 4
            line_event = line_code
          elsif line_code == 0
            line_event = 7
          elsif line_code == 5 or line_code == 8
            line_event = 9
          elsif line_code == 6
            line_event = 5
          elsif line_code == 7
            line_event = 6
          else
            raise StandardError, "Cannot map the following event: #{line_code}"
          end

          # SEM Event Coding:
          # 1      Slow Eye Movement
          # 0      No Slow Eye Movement
          if file_hash[:type] == :wake and line_code == 8
            sem_event = 1
          else
            sem_event = 0
          end

          last_labtime = line_labtime

          output_line = [subject_code.upcase, line_labtime.to_decimal, line_event, sleep_period, sem_event]
          merged_file << output_line
        end


        previous_first_labtime = file_info[:first_labtime]
        previous_last_labtime = last_labtime

      end
      merged_file.close
      MY_LOG.info "---- end #{subject_code}\n\n"

    end
  end