Table of Contents - pragmatic_segmenter-0.3.24 Documentation
Classes and Modules
- PragmaticSegmenter
- PragmaticSegmenter::AbbreviationReplacer
- PragmaticSegmenter::BetweenPunctuation
- PragmaticSegmenter::Cleaner
- PragmaticSegmenter::Cleaner::Rules
- PragmaticSegmenter::Cleaner::Rules::HTML
- PragmaticSegmenter::Cleaner::Rules::PDF
- PragmaticSegmenter::ExclamationWords
- PragmaticSegmenter::Languages
- PragmaticSegmenter::Languages::Amharic
- PragmaticSegmenter::Languages::Amharic::AbbreviationReplacer
- PragmaticSegmenter::Languages::Arabic
- PragmaticSegmenter::Languages::Arabic::Abbreviation
- PragmaticSegmenter::Languages::Arabic::AbbreviationReplacer
- PragmaticSegmenter::Languages::Armenian
- PragmaticSegmenter::Languages::Armenian::AbbreviationReplacer
- PragmaticSegmenter::Languages::Bulgarian
- PragmaticSegmenter::Languages::Bulgarian::Abbreviation
- PragmaticSegmenter::Languages::Bulgarian::AbbreviationReplacer
- PragmaticSegmenter::Languages::Burmese
- PragmaticSegmenter::Languages::Burmese::AbbreviationReplacer
- PragmaticSegmenter::Languages::Chinese
- PragmaticSegmenter::Languages::Chinese::AbbreviationReplacer
- PragmaticSegmenter::Languages::Chinese::BetweenPunctuation
- PragmaticSegmenter::Languages::Common
- PragmaticSegmenter::Languages::Common::Abbreviation
- PragmaticSegmenter::Languages::Common::AbbreviationReplacer
- PragmaticSegmenter::Languages::Common::Abbreviations
- PragmaticSegmenter::Languages::Common::AmPmRules
- PragmaticSegmenter::Languages::Common::DoublePunctuationRules
- PragmaticSegmenter::Languages::Common::EllipsisRules
- PragmaticSegmenter::Languages::Common::ExclamationPointRules
- PragmaticSegmenter::Languages::Common::Numbers
- PragmaticSegmenter::Languages::Common::ReinsertEllipsisRules
- PragmaticSegmenter::Languages::Common::SingleLetterAbbreviationRules
- PragmaticSegmenter::Languages::Common::SubSymbolsRules
- PragmaticSegmenter::Languages::Danish
- PragmaticSegmenter::Languages::Danish::Abbreviation
- PragmaticSegmenter::Languages::Danish::AbbreviationReplacer
- PragmaticSegmenter::Languages::Danish::Cleaner
- PragmaticSegmenter::Languages::Danish::Numbers
- PragmaticSegmenter::Languages::Deutsch
- PragmaticSegmenter::Languages::Deutsch::Abbreviation
- PragmaticSegmenter::Languages::Deutsch::AbbreviationReplacer
- PragmaticSegmenter::Languages::Deutsch::BetweenPunctuation
- PragmaticSegmenter::Languages::Deutsch::Numbers
- PragmaticSegmenter::Languages::Deutsch::Processor
- PragmaticSegmenter::Languages::Dutch
- PragmaticSegmenter::Languages::Dutch::Abbreviation
- PragmaticSegmenter::Languages::English
- PragmaticSegmenter::Languages::English::AbbreviationReplacer
- PragmaticSegmenter::Languages::English::Cleaner
- PragmaticSegmenter::Languages::French
- PragmaticSegmenter::Languages::French::Abbreviation
- PragmaticSegmenter::Languages::French::AbbreviationReplacer
- PragmaticSegmenter::Languages::Greek
- PragmaticSegmenter::Languages::Greek::AbbreviationReplacer
- PragmaticSegmenter::Languages::Hindi
- PragmaticSegmenter::Languages::Hindi::AbbreviationReplacer
- PragmaticSegmenter::Languages::Italian
- PragmaticSegmenter::Languages::Italian::Abbreviation
- PragmaticSegmenter::Languages::Italian::AbbreviationReplacer
- PragmaticSegmenter::Languages::Japanese
- PragmaticSegmenter::Languages::Japanese::AbbreviationReplacer
- PragmaticSegmenter::Languages::Japanese::BetweenPunctuation
- PragmaticSegmenter::Languages::Japanese::Cleaner
- PragmaticSegmenter::Languages::Kazakh
- PragmaticSegmenter::Languages::Kazakh::Abbreviation
- PragmaticSegmenter::Languages::Kazakh::AbbreviationReplacer
- PragmaticSegmenter::Languages::Kazakh::Processor
- PragmaticSegmenter::Languages::Persian
- PragmaticSegmenter::Languages::Persian::AbbreviationReplacer
- PragmaticSegmenter::Languages::Polish
- PragmaticSegmenter::Languages::Polish::Abbreviation
- PragmaticSegmenter::Languages::Polish::AbbreviationReplacer
- PragmaticSegmenter::Languages::Russian
- PragmaticSegmenter::Languages::Russian::Abbreviation
- PragmaticSegmenter::Languages::Russian::AbbreviationReplacer
- PragmaticSegmenter::Languages::Spanish
- PragmaticSegmenter::Languages::Spanish::Abbreviation
- PragmaticSegmenter::Languages::Spanish::AbbreviationReplacer
- PragmaticSegmenter::Languages::Urdu
- PragmaticSegmenter::Languages::Urdu::AbbreviationReplacer
- PragmaticSegmenter::List
- PragmaticSegmenter::Processor
- PragmaticSegmenter::PunctuationReplacer
- PragmaticSegmenter::PunctuationReplacer::Rules
- PragmaticSegmenter::PunctuationReplacer::Rules::EscapeRegexReservedCharacters
- PragmaticSegmenter::PunctuationReplacer::Rules::SubEscapedRegexReservedCharacters
- PragmaticSegmenter::Rule
- PragmaticSegmenter::Segmenter
- Unicode
Methods
- ::apply — PragmaticSegmenter::Rule
- ::apply_rules — PragmaticSegmenter::ExclamationWords
- ::downcase — Unicode
- ::get_language_by_code — PragmaticSegmenter::Languages
- ::new — PragmaticSegmenter::AbbreviationReplacer
- ::new — PragmaticSegmenter::BetweenPunctuation
- ::new — PragmaticSegmenter::Cleaner
- ::new — PragmaticSegmenter::List
- ::new — PragmaticSegmenter::Processor
- ::new — PragmaticSegmenter::PunctuationReplacer
- ::new — PragmaticSegmenter::Segmenter
- #abbreviations — PragmaticSegmenter::Cleaner
- #abbreviations — PragmaticSegmenter::Languages::Danish::Cleaner
- #abbreviations — PragmaticSegmenter::Languages::English::Cleaner
- #abbreviations_replacer — PragmaticSegmenter::Processor
- #add_line_break — PragmaticSegmenter::List
- #add_line_breaks_for_alphabetical_list_with_parens — PragmaticSegmenter::List
- #add_line_breaks_for_alphabetical_list_with_periods — PragmaticSegmenter::List
- #add_line_breaks_for_numbered_list_with_parens — PragmaticSegmenter::List
- #add_line_breaks_for_numbered_list_with_periods — PragmaticSegmenter::List
- #between_punctuation — PragmaticSegmenter::Languages::Kazakh::Processor
- #between_punctuation — PragmaticSegmenter::Processor
- #between_punctuation_processor — PragmaticSegmenter::Processor
- #btwn_dbl_quote — PragmaticSegmenter::BetweenPunctuation
- #btwn_dbl_quote — PragmaticSegmenter::Languages::Deutsch::BetweenPunctuation
- #check_for_no_space_in_between_sentences — PragmaticSegmenter::Cleaner
- #check_for_parens_between_quotes — PragmaticSegmenter::Processor
- #check_for_punctuation — PragmaticSegmenter::Processor
- #clean — PragmaticSegmenter::Cleaner
- #clean — PragmaticSegmenter::Languages::Danish::Cleaner
- #clean — PragmaticSegmenter::Languages::English::Cleaner
- #clean — PragmaticSegmenter::Languages::Japanese::Cleaner
- #clean_consecutive_characters — PragmaticSegmenter::Cleaner
- #clean_quotations — PragmaticSegmenter::Cleaner
- #clean_quotations — PragmaticSegmenter::Languages::Danish::Cleaner
- #clean_quotations — PragmaticSegmenter::Languages::English::Cleaner
- #clean_table_of_contents — PragmaticSegmenter::Cleaner
- #cleaner — PragmaticSegmenter::Segmenter
- #consecutive_underscore? — PragmaticSegmenter::Processor
- #format_alphabetical_lists — PragmaticSegmenter::List
- #format_numbered_list_with_parens — PragmaticSegmenter::List
- #format_numbered_list_with_periods — PragmaticSegmenter::List
- #format_roman_numeral_lists — PragmaticSegmenter::List
- #iterate_alphabet_array — PragmaticSegmenter::List
- #last_array_item_replacement — PragmaticSegmenter::List
- #other_items_replacement — PragmaticSegmenter::List
- #post_process_segments — PragmaticSegmenter::Processor
- #process — PragmaticSegmenter::Processor
- #process_text — PragmaticSegmenter::Processor
- #processor — PragmaticSegmenter::Segmenter
- #remove_all_newlines — PragmaticSegmenter::Cleaner
- #remove_newline_in_middle_of_sentence — PragmaticSegmenter::Cleaner
- #remove_newline_in_middle_of_word — PragmaticSegmenter::Cleaner
- #remove_newline_in_middle_of_word — PragmaticSegmenter::Languages::Japanese::Cleaner
- #remove_pdf_line_breaks — PragmaticSegmenter::Cleaner
- #replace — PragmaticSegmenter::AbbreviationReplacer
- #replace — PragmaticSegmenter::BetweenPunctuation
- #replace — PragmaticSegmenter::Languages::Deutsch::AbbreviationReplacer
- #replace — PragmaticSegmenter::Languages::Kazakh::AbbreviationReplacer
- #replace — PragmaticSegmenter::PunctuationReplacer
- #replace_abbreviation_as_sentence_boundary — PragmaticSegmenter::AbbreviationReplacer
- #replace_abbreviation_as_sentence_boundary — PragmaticSegmenter::Languages::Danish::AbbreviationReplacer
- #replace_abbreviations — PragmaticSegmenter::Processor
- #replace_alphabet_list — PragmaticSegmenter::List
- #replace_alphabet_list_parens — PragmaticSegmenter::List
- #replace_continuous_punctuation — PragmaticSegmenter::Processor
- #replace_correct_alphabet_list — PragmaticSegmenter::List
- #replace_double_newlines — PragmaticSegmenter::Cleaner
- #replace_escaped_newlines — PragmaticSegmenter::Cleaner
- #replace_multi_period_abbreviations — PragmaticSegmenter::AbbreviationReplacer
- #replace_newlines — PragmaticSegmenter::Cleaner
- #replace_numbers — PragmaticSegmenter::Languages::Deutsch::Processor
- #replace_numbers — PragmaticSegmenter::Processor
- #replace_parens — PragmaticSegmenter::List
- #replace_parens_in_numbered_list — PragmaticSegmenter::List
- #replace_period_in_deutsch_dates — PragmaticSegmenter::Languages::Deutsch::Processor
- #replace_period_of_abbr — PragmaticSegmenter::AbbreviationReplacer
- #replace_period_of_abbr — PragmaticSegmenter::Languages::Bulgarian::AbbreviationReplacer
- #replace_period_of_abbr — PragmaticSegmenter::Languages::Russian::AbbreviationReplacer
- #replace_periods_before_numeric_references — PragmaticSegmenter::Processor
- #replace_periods_in_numbered_list — PragmaticSegmenter::List
- #replace_possessive_abbreviations — PragmaticSegmenter::AbbreviationReplacer
- #replace_pre_number_abbr — PragmaticSegmenter::AbbreviationReplacer
- #replace_prepositive_abbr — PragmaticSegmenter::AbbreviationReplacer
- #replace_punctuation — PragmaticSegmenter::PunctuationReplacer
- #replace_punctuation_in_brackets — PragmaticSegmenter::Cleaner
- #scan_for_replacements — PragmaticSegmenter::AbbreviationReplacer
- #scan_for_replacements — PragmaticSegmenter::Languages::Arabic::AbbreviationReplacer
- #scan_for_replacements — PragmaticSegmenter::Languages::Deutsch::AbbreviationReplacer
- #scan_for_replacements — PragmaticSegmenter::Languages::Persian::AbbreviationReplacer
- #scan_lists — PragmaticSegmenter::List
- #search_for_abbreviations_in_string — PragmaticSegmenter::AbbreviationReplacer
- #search_for_connected_sentences — PragmaticSegmenter::Cleaner
- #segment — PragmaticSegmenter::Segmenter
- #sentence_boundary_punctuation — PragmaticSegmenter::Processor
- #split_into_segments — PragmaticSegmenter::Processor
- #sub_characters — PragmaticSegmenter::PunctuationReplacer
- #sub_punctuation_between_double_angled_quotation_marks — PragmaticSegmenter::Languages::Chinese::BetweenPunctuation
- #sub_punctuation_between_double_quotes — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_em_dashes — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_l_bracket — PragmaticSegmenter::Languages::Chinese::BetweenPunctuation
- #sub_punctuation_between_parens — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_parens_ja — PragmaticSegmenter::Languages::Japanese::BetweenPunctuation
- #sub_punctuation_between_quotes_and_parens — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_quotes_and_parens — PragmaticSegmenter::Languages::Chinese::BetweenPunctuation
- #sub_punctuation_between_quotes_and_parens — PragmaticSegmenter::Languages::Japanese::BetweenPunctuation
- #sub_punctuation_between_quotes_arrow — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_quotes_ja — PragmaticSegmenter::Languages::Japanese::BetweenPunctuation
- #sub_punctuation_between_quotes_slanted — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_single_quote_slanted — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_single_quotes — PragmaticSegmenter::BetweenPunctuation
- #sub_punctuation_between_square_brackets — PragmaticSegmenter::BetweenPunctuation
- #substitute_found_list_items — PragmaticSegmenter::List