class FuzzyDate

Constants

DATE_SEPARATOR
VERSION

Attributes

circa[R]
day[R]
era[R]
fixed[R]
full[R]
long[R]
month[R]
month_name[R]
original[R]
short[R]
year[R]

Public Class Methods

new( date, euro = false ) click to toggle source
# File lib/fuzzy-date/fuzzy-date.rb, line 3
def initialize( date, euro = false )
  setup
  analyze date, euro
end
parse( date, euro = false ) click to toggle source
# File lib/fuzzy-date.rb, line 8
def self.parse( date, euro = false )
  FuzzyDate.new date, euro
end

Public Instance Methods

to_hash() click to toggle source
# File lib/fuzzy-date/fuzzy-date.rb, line 8
def to_hash
  {
    circa:      @circa,
    day:        @day,
    era:        @era,
    fixed:      @fixed,
    full:       @full,
    long:       @long,
    month:      @month,
    month_name: @month_name,
    original:   @original,
    short:      @short,
    year:       @year
  }
end

Private Instance Methods

analyze( date, euro ) click to toggle source

Note: This is only for single dates - not ranges.

Possible incoming date formats:

  • YYYY-MM-DD - starts with 3 or 4 digit year, and month and day may be 1 or 2 digits

  • YYYY-MM - 3 or 4 digit year, then 1 or 2 digit month

  • YYYY - 3 or 4 digit year

  • MM-DD-YYYY - 1 or 2 digit month, then 1 or 2 digit day, then 1 to 4 digit year

  • DD-MM-YYYY - 1 or 2 digit day, then 1 or 2 digit month, then 1 to 4 digit year if euro is true

  • MM-YYYY - 1 or 2 digit month, then 1 to 4 digit year

  • DD-MMM - 1 or 2 digit day, then month name or abbreviation

  • DD-MMM-YYYY - 1 or 2 digit day, then month name or abbreviation, then 1 to 4 digit year

  • MMM-YYYY - month name or abbreviation, then 1 to 4 digit year

  • MMM-DD-YYYY - month name or abbreviation, then 1 or 2 digit day, then 1 to 4 digit year

  • YYYY-MMM - 1 to 4 digit year, then month name or abbreviation

Notes:

  • Commas are optional.

  • Delimiters can be most anything non-alphanumeric.

  • All dates may be suffixed with the era (AD, BC, CE, BCE). AD is assumed.

  • Dates may be prefixed by circa words (Circa, About, Abt).

# File lib/fuzzy-date/analyze.rb, line 30
def analyze( date, euro )

  date = clean_parameter date

  @original = date

  date = massage date
  @fixed = date

  @year, @month, @day = nil

  if date =~ @date_patterns[ :yyyy ]
    @year  = $1.to_i

  elsif date =~ @date_patterns[ :yyyy_mm_dd_and_yyyy_mm ]
    @year  = $1.to_i
    @month = $2.to_i unless $2.nil?
    @day   = $3.to_i unless $3.nil?

  elsif date =~ @date_patterns[ :dd_mm_yyyy ] and euro
    @day   = $1.to_i
    @month = $2.to_i
    @year  = $3.to_i

  elsif date =~ @date_patterns[ :mm_dd_yyyy ]
    @month = $1.to_i
    @day   = $2.to_i
    @year  = $3.to_i

  elsif date =~ @date_patterns[ :mm_yyyy ]
    @month = $1.to_i
    @year  = $2.to_i

  elsif date =~ @date_patterns[ :dd_mmm_yyyy_and_dd_mmm ]
    month_text  = $2.to_s.capitalize
    @month      = @month_names.key( @month_abbreviations[ month_text ] )
    @day        = $1.to_i
    @year       = $3.to_i unless $3.nil?

  elsif date =~ @date_patterns[ :mmm_dd_yyyy ]
    month_text  = $1.to_s.capitalize
    @month      = @month_names.key( @month_abbreviations[ month_text ] )
    @day        = $2.to_i
    @year       = $3.to_i unless $3.nil?

  elsif date =~ @date_patterns[ :mmm_yyyy_and_mmm ]
    month_text  = $1.to_s.capitalize
    @month      = @month_names.key( @month_abbreviations[ month_text ] )
    @year       = $2.to_i unless $2.nil?

  elsif date =~ @date_patterns[ :yyyy_mmm_dd ]
    @year       = $1.to_i unless $1.nil?
    month_text  = $2.to_s.capitalize
    @month      = @month_names.key( @month_abbreviations[ month_text ] )
    @day        = $3.to_i

  elsif date =~ @date_patterns[ :yyyy_mmm ]
    @year       = $1.to_i unless $1.nil?
    month_text  = $2.to_s.capitalize
    @month      = @month_names.key( @month_abbreviations[ month_text ] )

  else
    raise ArgumentError.new( 'Cannot parse date.' )
  end

  #- Make sure the dates make sense
  if @month and @month > 13
    raise ArgumentError.new( 'Month cannot be greater than 12.' )
  elsif @month and @day and @day > @days_in_month[ @month ]
    unless @month == 2 and @year and Date.parse( '1/1/' + @year ).leap? and @day == 29
      raise ArgumentError.new( 'Too many days in this month.' )
    end
  elsif @month and @month < 1
    raise ArgumentError.new( 'Month cannot be less than 1.' )
  elsif @day and @day < 1
    raise ArgumentError.new( 'Day cannot be less than 1.' )
  end

  @month_name = @month_names[ @month ]

  # ----------------------------------------------------------------------

  show_era    = @eras[@era] == :bce ? ' ' + @era : ''
  show_circa  = @circa ? 'About ' : ''

  if @year and @month and @day
    @short = show_circa + @month.to_s + '/' + @day.to_s + '/' + @year.to_s + show_era
    @long  = show_circa + @month_name + ' ' + @day.to_s + ', ' + @year.to_s + show_era
    modified_long = show_circa + @month_name + ' ' + @day.to_s + ', ' + @year.to_s.rjust( 4, "0" ) + show_era
    @full  = show_circa + Date.parse( modified_long ).strftime( '%A,' ) + Date.parse( @day.to_s + ' ' + @month_name + ' ' + @year.to_s.rjust( 4, "0" ) ).strftime( ' %B %-1d, %Y' ) + show_era
  elsif @year and @month
    @short = show_circa + @month.to_s + '/' + @year.to_s + show_era
    @long  = show_circa + @month_name + ', ' + @year.to_s + show_era
    @full  = @long
  elsif @month and @day
    month_text = @month_abbreviations.key( month_text ) || month_text
    @short = show_circa + @day.to_s + '-' + month_text
    @long  = show_circa + @day.to_s + ' ' + @month_name
    @full  = @long
  elsif year
    @short  = show_circa + @year.to_s + show_era
    @long   = @short
    @full   = @long
  end

end
clean_parameter( date ) click to toggle source
# File lib/fuzzy-date/analyze.rb, line 137
def clean_parameter( date )
  date.to_s.strip if date.respond_to? :to_s
end
massage( date ) click to toggle source
# File lib/fuzzy-date/analyze.rb, line 141
def massage( date )

  date_in_parts = []

  date_separator = Regexp.new DATE_SEPARATOR, true

  #- Split the string

  date_in_parts = date.split date_separator
  date_in_parts.delete_if { |d| d.to_s.empty? }
  if date_in_parts.first.match Regexp.new( @circa_words.join( '|' ), true )
    @circa = true
    date_in_parts.shift
  end
  if date_in_parts.last.match Regexp.new( @eras.keys.join( '|' ), true )
    @era = date_in_parts.pop.upcase.strip
  end

  date_in_parts.join '-'
end
set_up_date_parts() click to toggle source
# File lib/fuzzy-date/variables.rb, line 98
def set_up_date_parts
  @original = nil
  @circa    = false
  @year     = nil
  @month    = nil
  @day      = nil
  @era      = 'AD'
end
setup() click to toggle source
# File lib/fuzzy-date/variables.rb, line 6
def setup

  set_up_date_parts

  @month_names = {
    1 => 'January',
    2 => 'February',
    3 => 'March',
    4 => 'April',
    5 => 'May',
    6 => 'June',
    7 => 'July',
    8 => 'August',
    9 => 'September',
    10 => 'October',
    11 => 'November',
    12 => 'December'
    }

  @month_abbreviations = {
    'Jan' => 'January',
    'Feb' => 'February',
    'Mar' => 'March',
    'Apr' => 'April',
    'May' => 'May',
    'Jun' => 'June',
    'Jul' => 'July',
    'Aug' => 'August',
    'Sep' => 'September',
    'Oct' => 'October',
    'Nov' => 'November',
    'Dec' => 'December'
    }

  @days_in_month = {
    1 => 31,
    2 => 28,
    3 => 31,
    4 => 30,
    5 => 31,
    6 => 30,
    7 => 31,
    8 => 31,
    9 => 30,
    10 => 31,
    11 => 30,
    12 => 31
    }

  @range_words = [
    'Between',
    'Bet',
    'Bet.',
    'From'
    ]

  @middle_range_words = [
    # '-',  -  Not used because it is more commonly used as a delimiter
    'To',
    'And'
    ]

  @circa_words = [
    'Circa',
    'About',
    'Abt',
    'Abt.',
    '~'
    ]

  @eras = {
    'AD' => :ce,
    'BC' => :bce,
    'CE' => :ce,
    'BCE' => :bce
    }

  @date_patterns = {
    yyyy:                   /^(\d{1,4})$/,
    yyyy_mmm:               /^(\d{1,4})-(#{ @month_abbreviations.keys.join( '|' ) }).*?$/i,
    yyyy_mmm_dd:            /^(\d{1,4})-(#{ @month_abbreviations.keys.join( '|' ) }).*?-(\d{1,2})$/i,
    yyyy_mm_dd_and_yyyy_mm: /^(\d{3,4})(?:-(\d{1,2})(?:-(\d{1,2}))?)?$/,
    dd_mm_yyyy:             /^(\d{1,2})-(\d{1,2})-(\d{1,4})$/,
    mm_dd_yyyy:             /^(\d{1,2})-(\d{1,2})-(\d{1,4})$/,
    mm_yyyy:                /^(\d{1,2})-(\d{1,4})?$/,
    dd_mmm_yyyy_and_dd_mmm: /^(\d{1,2})(?:-(#{ @month_abbreviations.keys.join( '|' ) }).*?(?:-(\d{1,4}))?)?$/i,
    mmm_dd_yyyy:            /^(#{ @month_abbreviations.keys.join( '|' ) }).*?-(\d{1,2})-(\d{1,4})$/i,
    mmm_yyyy_and_mmm:       /^(#{ @month_abbreviations.keys.join( '|' ) }).*?(?:-(\d{1,4}))?$/i
    }

end