<?xml version=“1.0” encoding=“UTF-8”?> <srx xmlns=“www.lisa.org/srx20” xmlns:okpsrx=“okapi.sf.net/srx-extensions” version=“2.0”> <header segmentsubflows=“yes” cascade=“yes”> <formathandle type=“start” include=“no”></formathandle> <formathandle type=“end” include=“yes”></formathandle> <formathandle type=“isolated” include=“no”></formathandle> <okpsrx:options oneSegmentIncludesAll=“no” trimLeadingWhitespaces=“no” trimTrailingWhitespaces=“no” useJavaRegex=“yes” useIcu4JBreakRules=“no” treatIsolatedCodesAsWhitespace=“no”></okpsrx:options> <okpsrx:sample language=“sr” useMappedRules=“yes”>Поштовани господине одн. госпођо. Видео сам </okpsrx:sample> <okpsrx:rangeRule></okpsrx:rangeRule> </header> <body> <languagerules> <languagerule languagerulename=“Greek”> <!–κ.λπ. - και λοιπά–> <rule break=“no”> <beforebreak>bκ.λπ.s</beforebreak> <afterbreak></afterbreak> </rule> <!–π.χ. - παραδείγματος χάριν–> <rule break=“no”> <beforebreak>bπ.χ.s</beforebreak> <afterbreak></afterbreak> </rule> <!–months–> <rule break=“no”> <beforebreak>b(Ιαν|Φεβ|Μα|Απρ|Ιου|Αυγ|Σεπ|Οκτ|Νοε|Δεκ).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“yes”> <beforebreak>[u00BBu2019u201Du203A“'pPe}u0002]*s [.!;…][‘”u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} b[Aa]dw?.s bafr.s bakad.s b[Aa]l.s bam.s bamer.s barch.s b[Aa]rt.s bartyst.s bastr.s baustr.s bbałt.s bbdb.s bbł.s bbm.s bbr.p{Pe}?s [^p{Lu}] bbry[gt].s bcentr.s bces.s bchem.s bchiń.s bchir.s bc.k.s bc.o.s bcyg.s bcyw.s bcyt.s bczes.s bczw?.s b[Cc]d.s bczyt.s bćw.s bćwicz.s bdaw.s bdcn.s bdekl.s bdemokr.s bdet.s bdiec.s bdł.s bdn.s bdo[tlp].s [^p{Lu}] bdost.s bdosł.s bh.c.s bds.s bdst.s bduszp.s bdypl.s begz.s bekol.s bekon.s belektr.s bem.s bew.s bfab.s bfarm.s bfot.s bfr.s bgat.s bgastr.s bgeogr.s bgeol.s bgimn.s bgłęb.s bgm.s bgodz.s bgórn.s bgosp.s bgr.s bgram.s bhist.s bhiszp.s b[Hh]r.s bhot.s bid.s bin.s bim.s biron.s bjn.s bkard.s bkat.s bkatol.s bk.k.s bkk.s bko?l.s bk.p.a.s bkpc.s bk.p.c.s bkpt.s bkr.s bk.r.s bkrak.s bk.r.o.s bkryt.s bkult.s blaic.s błac.s bniem.s bwoj.s b[Nn][bp].s bpo[lw].s bm.in.s b[Pp][ts].s bcdn.s bjw.s b[Rr]y[cs].s btj.s b[Tt]zw.s btzn.s b[Zz]ob.s bsłow.s [^s].pl.s p{Lu}p{Ll}+ bp[wnl].s bang.s bu[lb].s? bal.s bk.s bn.s b[Oo]k.s [p{Ll}d] b[d,.]*ty[sś].p{Pe}?s [p{Ll}d]+ b[Nn]r.s d bw[wł].s bur.s bzm.s bżyd.s bżarg.s bżyw.s bwy[łdm].s b[bu]p.s bwyst.s b(?iu)mazełstow.s b[Tt]ow.s bo.s b([sS]p|st|[Ss]półdz|społ|spółgł|[Ss]to[łw]).s bzn.s bzew.s bzewn.s bzdr.s bzazw.s bzast.s bzaw.s bzał.s bzal.s bzam.s bzak.s bzakł.s bzagr.s bzach.s b[Aa]dw.s b[Ll]ek.s bmed.s b[Mm]ec.s b[Dd]oc.s b[Dd]y[wr].s b[Ii]nż.s b(?iu)mgr.s b[dD][hr].s b[pP].s p{Lu}p{Ll}+ b[Rr]ed.)?s b(?iu)pro[fk].s bhab.s b[Pp]łk.s b([Nn]a|[Pp]o)dkom.s b[kK]s.s b(?iu)gen.s b(?iu)por.s b[Rr]eż.s b[Pp]rzyp.s bp.n.e.s bdyr.smuz.s b[śŚ][pwW].s bw.s ((?:X{0,2})(?:V?I{0,3}|I[VX]))(?<=[XVI]+) bII?społ.s [IVX]+ betc.s p{Ll} bit[dp].s [„”“]?p{Ll} bproc.)?s p{Ll}|p{Lu}p{Lu}+ b[rwn].s p{Ll} bit[dp].s p{Ll} bcdn.s p{Ll} b[Ss]zer.s bjw.s p{Ll} bn.e.s p{Ll} bw.s p{Ll} bn[tn].s p{Ll} b[dm]l.s p{Ll} bdag.s p{Ll} b[cd]?m.s p{Ll} b[Zz][Łł].s p{Ll} b[Gg][rR].s p{Ll} b[d]?kg.s p{Ll} bml[nd].s p{Ll} bnpl.s p{Ll} bpkt.s [p{Ll}d] bstr.s [p{Ll}d] b[Tt]ab.s [p{Ll}d] btel.s b[ptw]g.s p{Ll} bcos.s p{Ll} bcosec.s p{Ll} bsec.s p{Ll} bsin.s p{Ll} brkm.s p{Ll} bust.s d bpar.s d blit.s p{Ll}p{Pe}?p{P}?s b[Pp]on.s p{Ll} b[Ss]ob.s p{Ll} bba!s p{Ll} bpo[zk].s bop.s cit. b[Oo][Oo].s b([CDSR]z|Ch).s p{Ll} b[rls].s [1-9]+ [d-–]+sl.s p{Ll} ((?:od|do|w)s[1-9]d*sr.s d+sr.s [^p{Lu}] bn.e.s [^p{Lu}] bt.s d+ b[,uoi]ss.s b[Nn]ajśw.s b[Nn]asos.s barab.s [^x00-x80]+ bT. Loveb bpl. Open[oO]ffice bha.s [p{Ll}] bmin.s [p{Ll}d] bWsd{4}sr.s [p{Lu}-–—„”] bd+.s p{Ll}|p{Lu}{2,} bp{L}.s p{L}.s bp{L}. p{L}. .p{L}+.s p{Ll} [s([][p{L}&&[^rwn]].s+ p{Ll}p{Ll}|p{Lu</afterbreak> </rule> <rule break=“no”> <beforebreak>[.!?…]['“”]s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”no“> <beforebreak>s*</beforebreak> <afterbreak>s*pLl} [!?]+p{Pe} p{Ll} [p{Ps}][!?]+[p{Pe}] [[(]*...[])]* p{Ll} [[(]*…[])]* p{Ll} bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s bp{Lu}p{Ll}.s? p{Lu</afterbreak> </rule> <rule break=”no“> <beforebreak>.pLu}p{Ll}.s? p{Lu</afterbreak> </rule> <rule break=”no“> <beforebreak>s(?<!.)?[A-Z].s</beforebreak> <afterbreak>pLl}|p{Lu}p{Ll}p{Ll}+ ^[„”]?[A-ZŚĆŻŹ].s p{Ll}|p{Lu}p{Ll}p{Ll}+ (p{Lu}?p{Ll}+.s s[A-Z].s (d bDz.s?U.s [p{Ll}&&[^aeouiyęąó]][p{Ll}&&[^aeouiyęąó]]+.s p{Ll}+ [.!?…]+[’“p{Pe}u00BBu2019u201Du203Au0002]*s [p{Ps}-–—]s?p{Ll} [.!?…][‘”u00BBu2019u201Du203Au0002]*p{Pe}s p{Ll} [.!?…][’“p{Pe}u00BBu2019u201Du203Au0002¹²³]*s [.!?…][‘»””p{Pe}]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} : [—-–] p{Lu} [a-zA-Z][!?]s )s[a-zA-Z] Yahoo!s p{Ll} [A-Z].[A-Z]. [A-Z]b bA. Ib bL. Ab bU. [SK]b b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?[A-Za-z0-9-]+. [A-Za-z0-9-]+(.|b) b[A-Za-z0-9-]+. [A-Za-z0-9-]+.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl|be|dev|co|fr|dk|se)(.|b) b[nN]o.s p{N} bP[Hh].s? D.? b([Ee]d|pp|[Vv]iz|i.?s*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|max|[Gg]ovt|lb|lbf|ft|c.?s*f|vs|dia|lbs|d+-(:?oz|kc|in|h[rp]|ml)|M?sec).s [^p{Lu}]|I b(hr).s [^p{Lu}]|I b([Vv]ol|[Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?.s p{N}|[IXV]+ b([Ff]ig|[Dd]ef|[Ee]q|[Ll]em|[Pp]rop|[Tt]hm)s?.s (p{N}) (…|...)s?)s [^p{P}] (…|...)s??)s [^p{P}] be.g.s bvs.s be[sx]p.s b[Ee]tc.s [^p{Lu}] b([Bb]tw|BTW).s bJan.s bFeb.s bMar.s bApr.s bJu[nl].s bAug.s bSept?.s bOct.s bNov.s bDec.s (?i)FRITZ! (?i)Box ID. 3|Buzz|Crozz bP[Hh].?s?[Dd].s b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp).s bLL.s?[BM].s b[BM].s? Eng.? bLL.s? [BM].? b[BM].s? Sc.? b[BM].s? Comp?.? b[BM].s? Arch.? b[BM].?s?(Sc|Eng|Comp|Arch).s betbsbal.s b(a(?:bbrev|uth|bl|bsol|bstr|cc|ccus|dv|dvb|dvs|gst|lt|phet|pp|ppos|cc|dj|djs|rch|rt|ttrib)|A(?:bbrev|uth|bd|berd|berdeensh|bol|borig|bp|br|bridg|bridgem|bsol|bst|bstr|cad|cc|ccept|ccomm|ccompl|ccs|cct|ccts|chievem|dd|ddit|ddr|dm|dmin|dmir|dmon|dmonit|dv|dvancem|dvert|dvoc|dvt|dvts|erodynam|eronaut|ff|ffect|fr|gric|lch|lg|lleg|llit|lm|lph|mer|nal|nalyt|nat|nc|necd|ng|ngl|nim|nn|nniv|nnot|nsw|nt|nthrop|nthropol|ntiq|poc|pol|pp|ppl|pplic|rch|rchaeol|rchipel|rchit|rgt|rith|rithm|rrangem|rtic|rtific|rtill|ssemb|ssoc|ssyriol|str|strol|stron|stronaut|tt|ttrib|ustral|uth|utobiog|utobiogr|yrsh|rab)|B(?:acteriol|edford|edfordsh|elg|erks|erksh|erw|erwicksh|ibliogr|iochem|iog|iogr|iol|ks|ord|ot|raz|rit|ucks|uild|ull|ur)|b(?:ef|etw)|c(?:ent|ollect|olloq|ompar|ompl|onc|oncr|omp|onj|ons|onst|ontempt|orresp|pd|ontr)|C(?:ontradict|ontrib|ontrov|onv|onvent|onversat|onvoc|ornw|oron|orr|orresp|ounc|ourtsh|raniol|raniom|rim|rit|rt|rts|ryptogr|rystallogr|umb|umberld|umbld|ycl|ytol|ollect|onn|al|alc|alend|alif|alligr|amb|ambr|ampanol|anad|anterb|artogr|atal|atech|ath|ent|eram|ert|ertif|hamb|har|harac|has|hem|hesh|hr|hron|hronol|hrons|inematogr|irc|lass|lassif|limatol|lin|oll|olloq|om|omb|ombs|omm|ommandm|ommend|ommerc|ommiss|ommonw|ommunic|omp|ompan|ompar|ompend|ompl|ompos|onc|onch|oncl|onf|onfid|onfl|onfut|ongr|ongreg|ongress|onsc|onsecr|onsid|onsol|onstit|onstr|ontemp|ontempl|ontend|ontent|ontin)|d(?:at|em|ial|im|yslog|ef|eriv|erog)|D(?:au|eb|eclar|ed|ef|eliv|emonstr|ep|epred|epredat|erbysh|escr|evel|evonsh|ial|ict|iffic|irect|is|isc|iscipl|iscov|iscrim|iscuss|iss|istemp|istill|istrib|iv|ivers|oc|octr|omest|urh)|e(?:tym|tymol|uphem|xc|ast|llipt|mph|rron)|E(?:val|vang|ven|vid|vol|xalt|xam|xch|xec|xerc|xhib|xped|xper|xplan|xplic|xplor|xpos|tymol|ccl|ccles|col|con|din|dinb|duc|dw|gypt|gyptol|lectr|lectro-magn|lectro-physiol|lem|liz|lizab|mb|mbryol|ncycl|ng|ngin|nglishw|nq|nt|nthus|ntom|ntomol|nzymol|pil|pisc|pist|pit|quip|ss|ssent|stabl|thnol)|f(?:em|req|ut|am|amil)|F(?:ifesh|ootpr|orfarsh|ortif|ortn|ound|ragm|ratern|riendsh|und|urnit|ab|am|arew)|G(?:ard|astron|az|eo|eog|eogr|eol|eom|eomorphol|er|lac|lasg|los|loss|louc|loucestersh|osp|ram|ynaecol)|g(?:erund|en)|H(?:aematol|ampsh|andbk|ants|eb|en|er|erb|eref|ereford|erefordsh|ertfordsh|ierogl|ist|istol|om|orol|ort|osp|ouseh|ousek|usb|ydraul|ydrol)|hist|I(?:nd|ndustr|nfl|nnoc|norg|nq|nst|ntell|ntellect|nterc|nterl|nternat|nterpr|chth|cthyol|deol|dol|llustr|mag|mpr|naug|nclos|nd|nstr|tal|ntro|ntrod|nv|nvent|nvertebr|nvestig|nvestm|nvoc|rel|mmunol)|i(?:nt|nterj|nterrog|ntr|ntrans|mp|mperf|mpers|mpf|mprop|nstr|nd|ndef|ndic|ndir|nfin|nfl|ron|rreg|mit)|J(?:ahrb|ap|as|rnl|rnls|urisd|urisdict|urispr|ustif|ustific)|joc|K(?:ent|ingd|nowl|pr)|L(?:ab|anc|ancash|ancs|ang|angs|at|d|ds|ect|eechd|eg|eicest|eicester|eicestersh|eics|et|ett|ex|ibr|imnol|incolnsh|incs|ing|inn|it|ithogr|ithol|iturg|ond)|m(?:asc|ed|etaphor|idl|ispr|od)|M(?:ach|ag|agn|an|anagem|anch|anip|anuf|ath|eas|easurem|ech|ed|edit|em|erc|erch|etall|etallif|etallogr|etamorph|etaph|eteorol|eth|etrop|ex|ich|icrobiol|icrosc|il|ilit|in|ineral|isc|iscell|od|onum|orphol|SS|tg|unic|unif|unim|us|yst|yth|ythol)|n(?:once-wd|orth|om)|N(?:arr|arrat|at|aut|av|avig|eighb|erv|eurol|eurosurg|ewc|ewspr|onconf|orf|orthamptonsh|orthants|orthumb|orthumbld|orthumbr|orw|orweg|otts|ucl|umism|on-conf)|o(?:ccas|pp|rig|bj|bl|bs)|O(?:bs|bserv|bstet|bstetr|ccas|ccup|ccurr|ceanogr|ff|ffic|kla|nt|phthalm|phthalmol|ppress|pt|rac|rd|rg|rig|rkn|rnith|rnithol|rthogr|utl|xf|xfordsh|xon|bed|bj)|p(?:ass|erf|ers|ersonif|honet|hr|op|lur|oet|ref|rep|riv|rob|oss|pl|ple|ples|rec|red|redic|ron|ronunc|rop|rov|ropr|seudo-arch|seudo-dial|seudo-Sc|erh|res)|P(?:eriodontol|redict|rerog|sych|sychoanal|sychoanalyt|sychol|sychopathol|ubl|urg|erf|alaeobot|alaeogr|alaeont|alaeontol|araphr|arasitol|arl|arnass|ath|athol|eculat|enins|ers|ersec|erthsh|etrogr|etrol|harm|harmaceut|harmacol|hil|hilad|hilol|hilos|hoen|honol|hotog|hotogr|hrenol|hys|hysiogr|hysiol|ict|oet|ol|olit|olytechn|op|orc|ort|osth|ostm|ott|ract|ref|reh|rehist|resb|reserv|rim|rinc|rint|robab|robl|roc|rod|rol|rov|rovid|rovinc|rovis|ronunc|rop|ros)|Qld|q(?:uot|uots)|r(?:edupl|eg|epr|het|efash|efl|el)|R(?:adiol|eas|eb|ebell|ec|eclam|ecoll|edempt|ef|efl|efus|efut|eg|egic|egist|egr|el|elig|eminisc|emonstr|enfrewsh|eprod|ept|epub|es|esid|et|etrosp|evol|het|ich|om|oxb|oy|udim|uss)|s(?:ing|outh|pec|tr|ubj|ubjunct|ubord|ubseq|ubst|uff|uperl|yll)|S(?:ubj|uff|ubscr|ubscript|uppl|upplic|uppress|urg|urv|ymmetr|ymp|yst|pan|ask|at|ax|cand|ch|ci|cot|cotl|cript|culpt|eismol|el|elect|er|erm|ess|ettlem|ev|hakes|haks|heph|hetl|hropsh|oc|ociol|om|onn|pec|pecif|pecim|pectrosc|taff|tafford|taffordsh|taffs|tand|tat|tatist|tratigr|truct|tud)|t(?:echn|rans|ransf|ransl)|T(?:ransl|ransubstant|rav|reas|reat|reatm|rib|rig|rigonom|rop|roub|roubl|ypog|ypogr|axon|rans|echn|echnol|el|elecomm|elegr|eleph|eratol|erminol|errestr|est|extbk|heat|heatr|heol|heoret|hermonucl|hes|opogr|rag)|U(?:niv|rin)|u(?:nkn|nstr|lt|su)|U(?:nnat|noffic|tilit)|V(?:ac|aledict)|v(?:ar|arr|ars|bl|bs|ulg)|V(?:eg|enet|ertebr|et|ic|ict|ind|indic|irg|irol|oc|ocab|ol|oy|ulg)|W(?:estm|estmld|estmorld|estmrld|ill|ilts|iltsh|is|isd|kly|ks|onderf|orc|orcestersh|orcs|rit|arwicksh)|west|Y(?:earbk|ng|orks|orksh|rs)|Z(?:eitschr|oogeogr|ool)).b b(Atty|Sg?t|[SG]en|Ft|Gov|Hon|Prof|Mr?s|Mt|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep).s b(Atty|Sg?t|[SG]en|Gov|Hon|Prof|Mr?s|[DMJS]r|Col|Maj|L(ieu)?t|Brig|Capt|Cmdr|Cmnd|Revd?|Rep).s[A-Z].s b(Drs|Messrs|Mmes).s (ands)|p{Lu}p{Ll}+ bcf.s bI(nc|NC).s bCorp.s bBros.s bDist.s bCo.s bo’clocks bfo’c’sles bLtd.s p{Ll}+ [[(]*…[])]* p{Ll} p{Ps+pPe} [.!?…]+p{Pe} p{Ll} [“”‘’]s* s*p{Ll} [’”„][.!?…][‘“”]s bp{L}.s p{L}.s bp{L}. p{L}. p{Lu}p{L}+sv.s p{Lu}p{L}+ [^,][s]p{L}{2}.s p{N}+)s bOK.s p{Ll}+ [.s](?!(on|it|of|to|be|by|at|he|we|so|do|if|up|my|me|us|go|am))p{L}{1,2}.s [p{N}p{Ll}] [[(]*...[])]* [^p{Lu}] bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s [^.]s[A-Z].s b(:?Blvd|Ave|Mts?).s p{Ll}+ b(?:Kan|Ill|M[ai]ss).s p{Ll}+ (p{Ll}+.s i.e.s [.!?…][u00BBu2019u201Du203A”’p{Pe}u0002¹²³]*s [.!?…][‘“u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} bd+.s p{Ll}|p{Lu}{2,} [””’]s* s*p{Ll} [‘“„][.!?…][’””]s bp{L}.s p{L}.s bp{L}. p{L}. [.s]p{L}{1,2}.s [p{N}p{Ll}] [!?]+p{Pe} p{Ll} [p{Ps}][!?]+[p{Pe}] [[(]*...[])]* [^p{Lu}] b(etc|șamd).s [A-Z] b(pag|leg|art).s b(ian|febr?|mart?|apr|iu[nl]|aug|sept?|oct|nov|dec).s [^p{Lu}] bdpdv.s b(etc|șamd).s b(M). Ap.N.s b(M).Ap. N.s b([Dd]l|[Dd]-na|[Dd]vs|[Pp]t).s b([Dd]l|[Dd]-na|[Dd]vs|[Pp]t).s[A-Z].s [[(]*...[])]* p{Ll} [[(]*…[])]* p{Ll} [.!?…][‘“u00BBu2019u201Du203Au0002¹²³]*s [.!?…][’»””p{Pe}]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} [.?!][’‘“] [a-z] b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?[A-Za-z0-9-]+. [A-Za-z0-9-]+(.|b) b[A-Za-z0-9-]+. [A-Za-z0-9-]+.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(.|b) b(Drs|Art|Afr|Am|Ar|Br|Cie|Comp|Dhr|([Pp]rof.)?[Dd]r|Em|Fa|Kon|Bros).s b(Mej|Mevr|Mgr|Mw|Ndl|Ned|Nl|No|Prof|Secr|Chr|Jac).s b(Sr|St|Ued|Vz|aanh|aanw|aardew|aardr).s b(abs|abstr|adj|adm|afb|[Aa]fd|afk|afl|milj|zgn|plv|bvb|bv|afm|evt|exp).s b(al|ald|alg|amb|ambt|anat|antrop|apoth).s b(arch|archeol|art|bc|betr|bez|bibl|bijl|bijv).s b(bijz|blz|bw|ca|cat|centr|cf|cfr|cmpl).s b(conf|ct|dal|derg|dhr|dir|div|dra|drs|ds).s b(ed|em|enz|etc|ev|excl|fa|fam|fig|fl|fr.).s b(geb|get|gld|id|incl|ing|intern|ir|jhr|jkvr).s b(jl|jr|kr|kt|lab|lic|ll|lt|lw|max|mi|min|mld).s b(mln|mr|mw|nl|no|nr|nrs|ob|obl|ong|onov|o.a).s b(opm|org|ov|pag|par|penn|plm|plv).s b(prov|pseud|qty|red|ref|resp|soc|st|tab|tel|temp|tk).s b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v.(s)?d).(s)? [A-Z] b[vn].s Chr b(uitsl|ver|vgl|vnl|vnw|voorz|ww|zat|zg).s b(mm|cm|km|mg|kg|h|kW|mW).s p{Ll}|p{Lu}{2,} b(mm|cm|km|ml|kg|kW|h|mg).s [[(]*…[])]* p{Ll} p{Ps+pPe} [.!?…]+p{Pe} p{Ll} [””’]s* s*p{Ll} [‘“„][.!?…][’””]s bp{L}.s p{L}.s bp{L}. p{L}. [.s]p{L}{1,2}.s [p{N}p{Ll}] [[(]*...[])]* [^p{Lu}] bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s [^.]s[A-Z].s bp{Lu}p{Ll}.s? p{Lu</afterbreak> </rule> <rule break=“no”> <beforebreak>.pLu}p{Ll}.s? p{Lu</afterbreak> </rule> <!–a number with a dot before a lowercase char–> <rule break=“no”> <beforebreak>bd+.s</beforebreak> <afterbreak>pLl}|p{Lu}{2,} beensprof.s [^p{Ll}] bprof.s [.!?…][’‘“]s [a-z] [.][.]s [a-z] SP[.] A Warner Bros. [a-z] [.!?…][’’”u00BBu2019u201Du203Au00ABp{Pe}u0002¹²³]*s [.!?…][’‘“u00BBu2019u201Du203Au00ABp{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} p{L}! [^ ] ) . . ) ). [A-Z] .) [A-Z] bmax.s p{Ll} [?!.][’”u00BBu2019u201Du203Au00ABp{Pe}u0002]s [A-Z][a-z] [?!.]s [‘“u00BBu2019u201Du203Au00ABp{Pe}u0002][A-Z][a-z] b(Bc|Mgr|RNDr|PharmDr|PhDr|JUDr|PaedDr|ThDr|Ing|MUDr|MDDr|MVDr|Dr|ThLic|PhD|ArtD|ThDr|Dr|DrSc|CSs|prof).s b([Oo]br|[Čč]).s p{N} babl.s babsol.s badj.s badmin.s b[Aa]dr.s badv.s badvok.s bafr.s bak.s bakad.s bakc.s bakuz.s betbsbal.s bal.s balch.s bamer.s banat.s b[Aa]ngl.s banglosas.s banorg.s bap.s bapod.s barch.s barcheol.s barchit.s barg.s bart.s bastr.s bastrol.s bastron.s batp.s batď.s b[Aa]ustr.s baut.s b[Bb]elg.s b[Bb]ibl.s bbiol.s bbot.s bbud.s bbás.s bbýv.s bcest.s bchem.s bcirk.s bcsl.s b[Čč]s.s bdat.s bdep.s bdet.s bdial.s bdiaľ.s bdipl.s bdistrib.s bdokl.s bdosl.s bdopr.s bdram.s bduš.s bdv.s bdvojčl.s bdór.s bekol.s bekon.s bel.s belektr.s belektrotech.s benerget.s bepic.s best.s betc.s betonym.s beufem.s b[Ee]uróp.s bev.s bevid.s bexpr.s bfa.s bfam.s bfarm.s bfem.s bfeud.s bfil.s bfilat.s bfiloz.s bfi.s bfon.s bform.s bfot.s b[Ff]r.s b[Ff]ranc.s bfraz.s bfut.s bfyz.s bfyziol.s bgarb.s bgen.s bgenet.s bgenpor.s bgeod.s bgeogr.s bgeol.s bgeom.s bgerm.s b[Gg]r.s b[Gg]réc.s bgréckokat.s bhebr.s bherald.s bhist.s bhl.s bhlav.s bhosp.s bhromad.s bhud.s bhypok.s bident.s bi.?e.s bident.s bimp.s bimpf.s bindoeur.s binf.s binform.s binstr.s bint.s binterj.s binšt.s binštr.s biron.s b[Jj]ap.s bjaz.s bjedn.s bjuhoamer.s bjuhových.s bjuhozáp.s bjuž.s b[Kk]anad.s bkanc.s bkapit.s bkpt.s bkart.s bkatastr.s bknih.s bkniž.s bkomp.s bkonj.s bkonkr.s bkozmet.s bkrajč.s bkresť.s bkt.s bkuch.s blat.s blatinskoamer.s blek.s blex.s blingv.s blit.s blitur.s blog.s blok.s b[Mm]ax.s b[Mm]aď.s bmedzinár.s bmest.s bmetr.s b[Mm]il.s b[Mm]in.s bminer.s bml.s bmld.s bmn.s bmod.s bmytol.s bnapr.s b[Nn]ar.s bnasl.s bnedok.s bneg.s bnegat.s bneklas.s b[Nn]em.s bneodb.s bneos.s bneskl.s bnesklon.s bnespis.s bnespráv.s bneved.s bnež.s bniekt.s bniž.s bnom.s bnáb.s bnákl.s bnámor.s bnár.s bobch.s bobj.s bobv.s bobyč.s bobč.s bobčian.s bodb.s bodd.s bods.s bojed.s b[Oo]kr.s bopt.s bopyt.s borg.s bos.s bosob.s bot.s bovoc.s bpar.s bpart.s bpejor.s bpers.s b(pf|Pf|P.f|p.f).s bpl.s bPlk.s bpod.s bpodst.s bpokl.s bpolit.s bpolitol.s bpolygr.s bpomn.s bpopl.s bpor.s bporad.s bporov.s bposch.s bpotrav.s bpouž.s bpoz.s bpozit.s bpoľ.s bpoľno.s bpoľnohosp.s bpoľov.s bpošt.s bpož.s bprac.s bpredl.s bpren.s bprep.s bpreuk.s b[Pp]riezv.s bprivl.s bprof.s bpráv.s bpríd.s bpríj.s bprík.s bpríp.s bprír.s bprísl.s bpríslov.s bpríč.s bpsych.s bpubl.s bpís.s bpísm.s bpôv.s brefl.s breg.s brep.s bresp.s brozk.s brozlič.s brozpráv.s b[Rr]oč.s bryb.s brádiotech.s brím.s bsamohl.s bsemest.s bsev.s bseveroamer.s bseverových.s bseverozáp.s bsg.s bskr.s bskup.s bsl.s bSloven.s bsoc.s bsoch.s bsociol.s bsp.s b[Ss]pol.s bspoloč.s bspoluhl.s bspráv.s bspôs.s bst.s bstar.s bstarogréc.s bstarorím.s bs.r.o.s bstol.s bstor.s bstr.s bstredoamer.s bstredoškol.s bsubj.s bsubst.s bsuperl.s bsv.s bsz.s bsúkr.s bsúp.s bsúvzť.s b[Tt]al.s btech.s b[Tt]el.s btelef.s bteles.s btelev.s bteol.s btrans.s bturist.s btuzem.s btypogr.s btzn.s btzv.s bukaz.s b[Uu]l.s bumel.s buniv.s bust.s bved.s bvedľ.s bverb.s bveter.s bvin.s bviď.s bvl.s bvod.s bvodohosp.s bp?nl.s bvulg.s bvyj.s bvys.s bvysokoškol.s bvzťaž.s bvôb.s bvých.s bvýd.s bvýrob.s bvýsk.s bvýsl.s bvýtv.s bvýtvar.s bvýzn.s bvčel.s bvš.s bvšeob.s bzahr.s bzar.s bzariad.s bzast.s bzastar.s bzastaráv.s bzb.s bzdravot.s bzdruž.s bzjemn.s bzlat.s b[Zz]n.s bzool.s bzr.s bzried.s bzv.s bzáhr.s bzák.s bzákl.s bzám.s bzáp.s bzápadoeur.s bzázn.s bázij.s búzem.s búčt.s bčast.s b[Čč]es.s bčl.s bčísl.s bživ.s bpr.s bfak.s b[Ss]lov.s p{Ll} bKr.s bp.n.l.s [^p{Lu}] b[Jj]r.s b(P.s?S|p.s?s|P.s?s).s bd+.s p{Ll}|p{Lu}{2,} p{Ps+pPe} [[(]*…[])]* p{Ll} [.!?…]+p{Pe} p{Ll} [””’]s* s*p{Ll} [‘“„][.!?…][’””]s bp{L}.s p{L}.s bp{L}. p{L}. [.s]p{L}{1,2}.s [p{N}p{Ll}] [[(]*...[])]* [^p{Lu}] bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s [^.]s[A-Z].s bp{Lu}p{Ll}.s? p{Lu</afterbreak> </rule> <rule break=“no”> <beforebreak>pL}{1,2}.s [p{N}p{Ll}] [.!?…][u00BBu2019u201Du203A“‘p{Pe}u0002¹²³]*s [.!?…][’”u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} b[nN]o.s p{N} b[nN][rR].s p{N} bp{N}+.s b(janúar|febrúar|mars|apríl|maí|júní|júlí|ágúst|september|október|nóvember|desember) bp{N}+.s b(jan|feb|mar|apr|maí|jún|júl|ágú|sep|sept|okt|nóv|des) b[a-z[^íáóæ]].s bp{L}. p{L}. bab.fn.s ba.fn.s bafs.s bal.s balm.s balg.s bandh.s bath.s baths.s batr.s bao.s bau.s baukaf.s báfn.s báhrl.s.s báhrs.s bákv.gr.s bákv.s bbh.s bbls.s bdr.s be.Kr.s bet.s bef.s befn.s bennfr.s beink.s bend.s be.st.s berl.s bfél.s bfskj.s bfh.s bf.hl.s bfísl.s b[A-ZÁ-Þ][a-zá-þ]+fj.s bfl.s bfn.s bfo.s bforl.s bfrb.s bfrl.s bfrh.s bfrt.s bfsl.s bfsh.s bfs.s bfsk.s bfst.s bf.Kr.s bft.s bfv.s bfyrrn.s bfyrrv.s bgerm.s bgm.s bgr.s bhdl.s bhdr.s bhf.s bhl.s bhlsk.s bhljsk.s bhljv.s bhljóðv.s bhr.s b[A-ZÁ-Þ][a-zá-þ]+hr.s bhv.s bhvk.s bholl.s bHos.s bhöf.s bhk.s bhrl.s bísl.s bkaf.s bkap.s bKhöfn.s bkk.s bkg.s bkk.s bkm.s bkl.s bklst.s bkr.s bkt.s bkgúrsk.s bkvk.s bleturbr.s blh.s blh.nt.s blh.þt.s blo.s bltr.s bmlja.s bmljó.s bmillj.s bmm.s bmms.s bm.fl.s bmiðm.s bmgr.s bmst.s bmín.s bnf.s bnh.s bnhm.s bnl.s bnk.s bnmgr.s bno.s bnúv.s bnt.s bo.áfr.s bo.m.fl.s bohf.s bo.fl.s bo.s.frv.s bófn.s bób.s bóákv.gr.s bóákv.s bpfn.s bPR.s bpr.s bRitstj.s bRvík.s bRvk.s bsamb.s bsamhlj.s bsamn.s bsamn.s bsbr.s bsek.s bsérn.s bsf.s bsfn.s bsh.s bsfn.s bsh.s bs.hl.s bsk.s bskv.s bsl.s bsn.s bso.s bss.us.s bs.st.s bsamþ.s bsbr.s bshlj.s bsign.s bskál.s bst.s bst.s.s bstk.s bsþ.s bteg.s btbl.s btfn.s btl.s btvíhlj.s btvt.s btill.s bto.s bumr.s buh.s bus.s buppl.s bútg.s bvb.s bVf.s bvh.s bvkf.s bVl.s bvl.s bvlf.s bvmf.s b8vo.s bvsk.s bvth.s bþt.s bþf.s bþjs.s bþgf.s bþlt.s bþolm.s bþm.s bþml.s bþýð.s [.!?…][u00BBu2019u201Du203A“‘p{Pe}u0002¹²³]*s [.!?…][’”u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} bd+.s p{Ll}|p{Lu}{2,} b[А-ЯЁ].s b[A-Z].s b[А-ЯЁ]. [А-ЯЁ][а-яё]+ b[А-ЯЁ].[А-ЯЁ]. [А-ЯЁ][а-яё]+ bp{L}. p{L}. b[0-9]+(гг|г).s b[XVILMC]+(в|вв).s b[0-9]+(.|:)[0-9][0-9]s b[0-9]+(.|:)[0-9][0-9](.|:)[0-9][0-9]s b[0-9]+(м|мм|см|дм|л|км|га|кг|т|г|мг).s p{Ll} b[0-9]+(руб|Руб|тыс|Тыс|трлн|млн|млрд).s b[0-9]+ b(бульв|г|д|доп|др|е|зам|Зам|и|им|инд|исп|Исп).s b(англ|в|вв|га|гг|гл|гос|грн|дм|долл|е|ед).s p{Ll} b(к|кап|кав|кв|кл|кол|комн|куб|л|лиц|лл|м|макс).s b(кг|км|коп|л|лл|м|мг|мин|мл|млн|Млн|млрд|Млрд|мм).s p{Ll} b(н|наб|нач|неуд|нем|ном|о|обл|обр|общ|ок|ост|отл|п|пер|Пер|перераб|пл|пос|пр|пром|просп|Просп|проф|Проф).s b(р|ред|Рис|рус|с|сб|св|См|см|сов|соч|соц|спец|ср|ст|стр|т|тел|Тел|тех|тов|тт|туп).s b(руб|Руб|тыс|Тыс|трлн).s p{Ll} b(уд|ул|уч|физ|х|хор|э).s b(ч|чел|шт|экз).s p{Ll} [‘“„“][.!?…][’””]s [u00AB][.!?…][u00BB]s [[(]*...[])]* p{Ll} [[(]*…[])]* p{Ll} [“”‘u00BB]s* s*p{Ll} [.!?…][’”u00BBu2019u201Du203Au0002¹²³]*s u005Du005Ds [.!?…][‘»“”p{Pe}]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} p{Lu00A0+s+</beforebreak> <afterbreak>pLu}p{Ll} p{Ls+u00A0+</beforebreak> <afterbreak>pLu}p{Ll} u2029 <0} {0> d{2}:d{2}:d{2},d{3}r?n r?n r?ns*r?n[t]* [.!?]u00A0r?n b[dD]r.s bitd.s bitn.s b[šŠ]t.s p{N} [[(]*...[])]* p{Ll} [[(]*…[])]* p{Ll} [’”„][.!?…][‘“”]s bd.s b[jJ]an.s b[fF]eb.s b[mM]ar.s b[aA]pr.s b[jJ]u[ln].s b[aA]vg.s b[sS]ept?.s b[oO]kt.s b[nN]ov.s b[dD]ec.s b[tT]j.s b[nN]pr.s b[sS]l.s b[oO]p.s b[gG]l.s b[oO]z.s bprev.s bdipl.s bing.s b[pP]rim.s b[cC]f.s b[0-9]+(.|:)[0-9][0-9](.|:)[0-9][0-9]s b[0-3][0-9]+(.|:)[0-9][0-9](.|:)[0-9][0-9]s b[0-9]+.s b[XVILMC]+.s b[gG]l.s [.!?…][u00BBu2019u201Du203A”’p{Pe}u0002¹²³]*s [.!?…][‘“u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll}</afterbreak> </rule> </languagerule> <languagerule languagerulename=”Catalan“> <rule break=”no“> <beforebreak>Yahoo!s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <rule break=”yes“> <beforebreak>w[nNtT].s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”yes“> <beforebreak>.s</beforebreak> <afterbreak></afterbreak> </rule> <!– initials: A. C. Jones. Problem: […] d'Alfons I. Ell era […] –> <rule break=”no“> <beforebreak>b.s</beforebreak> <afterbreak></afterbreak> </rule> <!– Abbreviations that cannot finish sentences–> <rule break=”no“> <beforebreak>b(dc|(?iu)(n|Mr|C|Dr|Dra|Dra. Ma|Sta. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd)).s</beforebreak> <afterbreak></afterbreak> </rule> <!– Abbreviations that can finish sentences –> <rule break=”no“> <beforebreak>b(s|ca).s</beforebreak> <afterbreak>+b</afterbreak> </rule> <rule break=”no“> <beforebreak>b(min|m|ca).s</beforebreak> <afterbreak>+b</afterbreak> </rule> <rule break=”no“> <beforebreak>b([Cc]ap|[Aa]rts?|pp|ol).s</beforebreak> <afterbreak>+b</afterbreak> </rule> <rule break=”no“> <beforebreak>b([Ee]ds?|[Cc]oords?|d+(r|n|t|è|é|ns|es)|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d?).[p{Pe}p{Pf}p{Pd}”']*s</beforebreak> <afterbreak>*p{Ll}</afterbreak> </rule> <!– Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. –> <rule break=“no”> <beforebreak>b(p{L}.)+[p{Pe}p{Pf}p{Pd}“']*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Any word in acronyms like EE.UU. or BB. DD. –> <rule break=”no“> <beforebreak>b({2}.)+[p{Pe}p{Pf}p{Pd}”']*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <rule break=“no”> <beforebreak>bEE.s?</beforebreak> <afterbreak>UU</afterbreak> </rule> <rule break=“no”> <beforebreak>EE.s?UU.s?</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– max min etc –> <rule break=“no”> <beforebreak>b([Ee]tc|m[aáà]x|m[ií]n|aprox|d+o).*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Composed abbrev. –> <rule break=“no”> <beforebreak>bet al.*s</beforebreak> <afterbreak></afterbreak> </rule> <!– Units –> <rule break=“no”> <beforebreak>b([Pp]ta?|K[s]|?[gmls]|(rs)?).*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Ellipsis: … lowercase –> <rule break=“no”> <beforebreak>(Q…E|…)s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– (enum…) –> <rule break=“no”> <beforebreak>b(Q…E|…)s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– pero ¡ah! no estaba <rule break=“no”> <beforebreak>b¡p{L}+!s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule>
-->
<rule break=“yes”> <beforebreak>[u00BBu2019u201Du203A“'u0002]*s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”yes“> <beforebreak>b+[pPf}p{Pe}u00BBu2019u201Du203A”‘u0002]*[.:!?…]+s [¡¿«»“’u2018u201C”p{Ps}]*p{Lu}p{L}* [.:!?…»]+s »[^s.:!?…] Yahoo!s p{Ll} .[d+]s b[A-ZÀÉÈÍÓÒÚ].s [^s](Q…E|…)s p{Ll} b(Q…E|…)[p{Pe}»“’”]s p{Ll} b(s|ca).s [XIV]+b b(min|m|ca).s [0-9]+b b([Cc]ap|[Aa]rts?|pp|[Vv]ol|p|[Pp][aá]gs?|ps).s [XIVd]+b b(d+(r|er|n|ero|era|mo|ma|vo|va|no|na|to|ta|do|da|h|hr|gr|grs|o|a)s?|g|kg|m|km|cm|ha|u|h|hrs|H|HR|HRS|s|ss|alt|cant|cast|cert|com|dir|gr|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?).[p{Pe}p{Pf}p{Pd}”‘]*s [-¡¿«»“’u2018u201Cp{Ps}u2012u2013u2014u2015u2053]*p{Ll} b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?[A-Za-z0-9-]+. [A-Za-z0-9-]+(.|b) b[A-Za-z0-9-]+. [A-Za-z0-9-]+.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(.|b) b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd)).s b([Aa]vda|[Pp][ol]|Pl?za|[Aa]dm|[Dd]pto|Sr|Mr|Srta|ej).s b(Dña|Dr[a]?|Sra|Sto|S(ri)?ta|Ldo|Ing|Prof|Excmo|Ilmo|Mgfco|admdor|admdora).s b([Aa]rt|[Cc]ód|[Ss]ecc|[Tt]ít).s b([Ee]d(it)?|[Nn]o|n|[Nn]úm|[Pp]ág|p|c|d+er)|[V.]gr.s b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[VU]d[s]?).[p{Pe}p{Pf}p{Pd}”‘]*s [-¡¿«»“’u2018u201Cp{Ps}u2012u2013u2014u2015u2053]*p{Ll} b(p{L}.)+[p{Pe}p{Pf}p{Pd}”‘]*s p{Ll} b([p{Lu}]{2}.)+[p{Pe}p{Pf}p{Pd}“’]*s p{Ll} bEE.s? UU EE.s?UU.s? p{Ll} b([Ee]tc|m[aá]x|m[ií]n|aprox|d+o).[p{Pe}p{Pf}p{Pd}”‘]*s p{Ll} bet al.[p{Pe}p{Pf}p{Pd}“’]*s b([Pp]ta[s]?|K[gm][s]|[mc]?[gmls]|[Hh](rs)?).[p{Pe}p{Pf}p{Pd}”‘]*s p{Ll} [.…][u00BBu2019u201Du203A“’u0002]*s b[p{L}‘’·-]+[p{Pf}p{Pe}u00BBu2019u201Du203A”’u0002]*[.:!?…]+s [¡¿«»“‘u2018u201C”p{Ps}]*p{Lu}p{L}* [.:!?…»]+s »[^s.:!?…] b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?[A-Za-z0-9-]+. [A-Za-z0-9-]+(.|b) b[A-Za-z0-9-]+. [A-Za-z0-9-]+.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(.|b) r?ns*[-*]+s d+[a-z].s p{Lu} [^-p{L}’’/]p{L['|“|“|«|)|]|}]?s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”no“> <beforebreak>Ust.</beforebreak><!– needed for German rule UST_ID –> <afterbreak>Id</afterbreak> </rule> <rule break=”no“> <beforebreak>Prof.</beforebreak><!– needed for German rule ABKUERZUNG_LEERZEICHEN –> <afterbreak>Dr</afterbreak> </rule> <rule break=”no“> <beforebreak>Dr.</beforebreak><!– needed for German rule ABKUERZUNG_LEERZEICHEN –> <afterbreak>iur|med|oec|phil|rer|theol</afterbreak> </rule> <rule break=”no“> <beforebreak>(?i)FRITZ!</beforebreak> <afterbreak>(?i)Box</afterbreak> </rule> <rule break=”no“><!– de.wikipedia.org/wiki/VW_ID.3 –> <beforebreak>ID.</beforebreak> <afterbreak>3|Buzz|Crozz</afterbreak> </rule> <rule break=”no“> <beforebreak>.s</beforebreak> <afterbreak>Liga|Bundesliga|Fußball(-B|b)undesliga</afterbreak> </rule> <rule break=”no“> <beforebreak>bP.</beforebreak> <afterbreak>D.</afterbreak> </rule> <!– Don't split at e.g. ”U.S.A.“ –> <rule break=”no“> <beforebreak>bp{L}.</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break=“no”> <beforebreak>sp{L}.s</beforebreak> <afterbreak>p{L}.</afterbreak> </rule> <!– Don't split at “bla bla… yada yada” –> <rule break=“no”> <beforebreak>[?...[])]?s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Don't split [.?!] when they're quoted –> <rule break=“no”> <beforebreak>[.!?…]['““]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said. -->
<rule break=“no”> <beforebreak>s</beforebreak> <afterbreak>pLl} s([.!?]{1,3}|…)[‘|“|“|«|)|]|}]?s bd+.s p{Ll}|p{Lu}{2,} [([][!?]{1,3)]s</beforebreak> <afterbreak></afterbreak> </rule> <!– z.B. ”Das hier ist (genau!) ein Satz.“ –> <rule break=”no“> <beforebreak>[!?]{1,3}]s</beforebreak> <afterbreak></afterbreak> </rule> <!– z.B. ”bla (…) blubb“ -> kein Satzende –> <rule break=”no“> <beforebreak>[()]s</beforebreak> <afterbreak></afterbreak> </rule> <!– don't split at cases like ”Friedrich II. wird auch…“ –> <rule break=”no“> <beforebreak>[s ][IVX]+.s</beforebreak> <afterbreak>+</afterbreak> </rule> <!– don't split at cases like ”im 13. oder 14. Jahrhundert“ –> <rule break=”no“> <beforebreak>d+.s</beforebreak> <afterbreak>(und|oder|bis)s</afterbreak> </rule> <!– einige deutsche Monate, vor denen eine Zahl erscheinen kann,
ohne dass eine Satzgrenze erkannt wird
(z.B. "am 13. Dezember" -> keine Satzgrenze) -->
<rule break=“no”> <beforebreak>d+.s</beforebreak> <afterbreak>Januar|Jänner|Februar|März|Merz|April|Mai|Jui|August|September|Oktober|November|Dezember</afterbreak> </rule> <rule break=“no”> <beforebreak>d+.s</beforebreak> <afterbreak>J[aä]n|Febr?|Mär|Apr|Mai|Ju|Aug|Sept?|Okt|Nov|Dez</afterbreak> </rule> <rule break=“no”> <beforebreak>(Jan|Jän|Febr?|Mär|Apr|Mai|Ju|Aug|Sept?|Okt|Nov|Dez).s</beforebreak> <afterbreak>dd(dd)?</afterbreak> </rule> <!– ähnliche Fälle außerhalb der Monatsnamen –> <rule break=“no”> <beforebreak>d+.s</beforebreak> <afterbreak>Amtsperiode|Breitengrads?|Breitengrades|Jubiläum|Jhd?|Jhdts?|Konferenz|(Jahres|Partei)(-K|k)onferenz|Längengrade?s?|Tags?|Tages|(Jahres|Spiel|Partei|Geburts)tag|(Jahres|Spiel|Partei|Geburts)tages|(Jahres|Spiel|Partei|Geburts)tags|Jahrhunderts?|Jahrtausend|Platz|Platzes|Lebensjahrs?|Lebensjahres|Lochs?|Loches|Grads|Grades|Obergeschoss|Stock(werk)?s?|Etage|Klasse|Runde|Bezirk|Etappe|Staffel|Sinfonie</afterbreak> </rule> <!– English abbreviations - but these work globally for all languages –> <rule break=“no”> <beforebreak>b(Mrs?|No|pp|St|no|Sr|Jr|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju|Aug|Sept?|Ot|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd).s</beforebreak> <afterbreak></afterbreak> </rule> <!– Latin abbreviations - but these work globally for all languages –> <rule break=“no”> <beforebreak>b(spp?).s</beforebreak> <afterbreak></afterbreak> </rule> <!– German abbreviations –> <rule break=“no”> <beforebreak>b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab|abzgl|Abzw|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(cts?|Ca|ca|chem|chin|Chr|cresc|dat|Dat|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|eigtl|Eigtl|Engl|engl|Erg|al|et|Etw|ev(tl)?|Evtl|exkl|Expl|Exz).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>bDipl.-[a-z]2,4}.s b(ff|Fa|fachspr|fam|fem|Fem|Fr|franz|frz?|frdl|Frl|Fut|Gd|gebr?|Gebr|geh|geleg|gen|Gen|germ|gesch|ges|get|ggf|Ggf|Ggs|ggT|Gr|[Gg]rds|griech).s b(hebr|hg|hl|Hrsg|Hg|hist|hochd|hochspr|Hptst|Hr|hrsg|Allg|IdNr|ill|inkl|incl|Ind|Inf|Ing|ital|Tr|jap|Jb|Jg|Jhd?|Jhdts?|jmd[mns]?|jur|Kap|kart|kath|kfm|kaufm|Kfm|kgl|Kl|Konj|königl|Krs?|Kto).s b(lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz).s b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ).s b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt-IdNr|sw|vgl|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t).s [.!?…][u0002|‘|“|“|«|‹|)|]|}¹²³]?s+ [.!?…][’”“p{Pe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} [.!?][”“] [“„] ‘p{Ls</beforebreak> <afterbreak>pLu} sno[.!?…]s p{Lu} [ap].m.s p{Lu} [^-p{L}]p{L['|“|«|)|]|}]?s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't split at e.g. ”U.S.A.“ –> <rule break=”no“> <beforebreak>bp{L}.</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break=“no”> <beforebreak>sp{L}.s</beforebreak> <afterbreak>p{L}.</afterbreak> </rule> <!– Don't split at “bla bla… yada yada” –> <rule break=“no”> <beforebreak>[?...[])]?s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Don't split [.?!] when they're quoted –> <rule break=“no”> <beforebreak>[.!?…]['“]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said. -->
<rule break=“no”> <beforebreak>s</beforebreak> <afterbreak>pLl} bd+.s p{Ll}|p{Lu}{2,} [([][!?]{1,3)]s</beforebreak> <afterbreak></afterbreak> </rule> <!– f.eks. “Dette er (nøjagtig!) en sætning.” –> <rule break=“no”> <beforebreak>[!?]{1,3}]s</beforebreak> <afterbreak></afterbreak> </rule> <!– f.eks. “bla (…) blubb” -> ingen sætning –> <rule break=“no”> <beforebreak>[()]s</beforebreak> <afterbreak></afterbreak> </rule> <!– don't split at cases like “Fra den 13. til 14. i måneden.” –> <rule break=“no”> <beforebreak>d+.s</beforebreak> <afterbreak>(og|eller|til)s</afterbreak> </rule> <rule break=“no”> <beforebreak>bp{N}+.s</beforebreak> <afterbreak>januar|februar|marts|april|maj|jui|august|september|oktober|november|december</afterbreak> </rule> <!– einige deutsche Monate, vor denen eine Zahl erscheinen kann,
ohne dass eine Satzgrenze erkannt wird
(z.B. "am 13. Dezember" -> keine Satzgrenze) -->
<rule break=“no”> <beforebreak>d+.s</beforebreak> <afterbreak>januar|februar|marts|april|maj|jui|august|september|oktober|november|december</afterbreak> </rule> <!– English abbreviations - but these work globally for all languages –> <rule break=“no”> <beforebreak>b(Mrs?|No|pp|St|no|Sr|Jr|Bros|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju|Aug|Sep|Sept|Oct|Okt|Nov|Dec|PhD|al|cf|Inc|Ms|Gen|Sen|Prof|Corp|Co).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”><!– Ph.D. –> <beforebreak>bP.s?</beforebreak> <afterbreak>D.?</afterbreak> </rule> <rule break=“no”><!– B.Eng. (Bachelor of Engineering) –> <beforebreak>b.s?</beforebreak> <afterbreak>Eng.?</afterbreak> </rule> <rule break=“no”><!– LL.B. (Bachelor of Laws) –> <beforebreak>bLL.s?</beforebreak> <afterbreak>.?</afterbreak> </rule> <rule break=“no”><!– B.Sc. (Bachelor of Science) –> <beforebreak>b.s?</beforebreak> <afterbreak>Sc.?</afterbreak> </rule> <rule break=“no”><!– B.Comp. (Bachelor of Computing) –> <beforebreak>b.s?</beforebreak> <afterbreak>Comp?.?</afterbreak> </rule> <rule break=“no”><!– B.Arch. (Bachelor of Architecture) –> <beforebreak>b.s?</beforebreak> <afterbreak>Arch.?</afterbreak> </rule> <!– Danish abbreviations - Word Boundary b abbreviation dot . –> <rule break=“no”> <beforebreak>b(abs|abstr|adj|adm|adr|adv|afd|afg|afl|afs|afvig|agro|akad|akk|allr|alm|amer|anat|ang|anm|anv|apot|appos|apr|arab|arb|arkais|arkæol|arp|arr|art|ass|astr|att|attrib|aud|aug|aut).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(bag|barb|barnespr|bd|bdt|beg|besl|best|bet|bhk|biavl|bibet|bibl|bibliot|billard|billedl|biol|bjergv|bk|bl|bogb|bogh|bogtr|bornh|bot|br|bryg|bto|bygn|bødk).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(ca|cand|Chr|cirk|cit|co|d|da|dagl|dans|dat|dec|def|demonstr|dep|dial|diam|dim|dipl|disp|distr|distrib|dobb|dok|dr|dvs|e|egl|ejd|eks|eksam|ekskl|eksp|ekspl|el|ell|ellipt|emb|endv|eng|enk|ent|etnogr|eufem|eur|event|evt).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(f|fagl|fakt|farv|feb|ff|fhv|fig|filos|fin|fisk|fk|fl|flg|flt|flyv|fmd|fon|foragt|forb|foreg|forf|forsikr|fors|forsk|forst|forv|foræld|fot|fr|fre|fris|frk|fsv|fuldm|fx|fys|fysiol|fægt|gart|gartn|garv|gdr|gen|genopt|geogr|geol|geom|germ|gl|glarm|glda|gldgs|glholl|glno|gns|got|gr|gradbøjn|graf|gram|gross|grundbet|græc|guldsm|gym).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(hat|hd?|hebr|henh|hensobj|herald|hhv|hist|hj|holl|hovedbet|hr|hty|højtid|haandarb|haandv|if?|iflg|ifm|ift|iht|imp|incl|indb|indik|inf|ing|Inkl|inkl|insp|instr|interj|intk|intr|iron|isl|ital|jan|jarg|jernb|jf|jnr|jr|jul|jun|jur|jy|jæg|jærnb|jød).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Kbh|kbh|kem|kgl|kirk|kl|kld|knsp|kog|koll|komm|komp|konj|konkr|kons|Kr|kr|kurv|kvt|køkkenspr|l|landbr|landmaaling|lat|lb|lic|lign|litt|Ll|log|Loll|loll|lrs|lør).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(m|maj|maks|mal|man|mar|mark|mat|mdl|mdr|med|medl|meng|merc|meteorol|meton|metr|mf|mfl|mht|mia|mil|min|mineral|mio|ml|mlat|mm|mnt|mods|modsætn|modt|mr|mrk|mur|mvh|mytol|møl|mønt|n|naturv|ndf|Ndr|nedsæt|nht|no|nom|nov|nr|nt|num|nyda|nydann|nylat|naal).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(obj|obl|oblik|obs|odont|oecon|oeng|ofl|ogs|oht|okt|oldfr|oldfris|oldn|olgn|omg|omkr|omtr|ons|opr|ordspr|org|osax|ovenst|overf|overs|ovf|p|pag|parl|part|pass|pat|pct|perf|pers|pga|ph|pharm|phil|pk|pkt|pl|plur|poet|pol|polit|pop|port|poss|post|pott|pr|prod|pron|propr|prov|præd|præp|præs|præt|psych|pt|pæd|paavirkn).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(reb|ref|refl|regn|rekl|relat|relig|resp|retor|rev|rid|rigsspr|run|russ|s|sa|sanskr|scient|sdjy|sdr|sek|sen|sep|sept|shetl|sj|sjæll|skibsbygn|sko|skol|skr|skriftspr|skræd|Skt|slagt|slutn|smed|sml|smsat|smst|snedk|soc|soldat|sp|spec|sport|spot|spr|sprogv|spøg|ssg|ssgr|st|stat|stk|str|straf|stud|subj|subst|superl|sv|sætn|søfors|søn).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(talem|talespr|tandl|td|tdl|teat|techn|telef|telegr|telekom|teol|th|theol|tir|tirs|tlf|told|tor|tors|trans|tsk|ty|tyrk|tøm|u|ubesl|ubest|udd|uddan|udenl|udg|udtr|uegl|ugtl|ult|underbet|undt|univ|upers|ur|urnord).s</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(vs?|var|varem|vbs|vedk|vedl|vedr|vejl|verb|vet|vha|vol|vsa|vulg|væv|zool|æ|æda|ænht|ænyd|æstet|ø|økon|å|årg|årh).s</beforebreak> <afterbreak></afterbreak> </rule> <!– Danish abbreviations - These can appear at the end of sentence, so watch out for the next letter: it shouldn't be uppercase. –> <rule break=“no”> <beforebreak>b(etc|mv|osv).s</beforebreak> <afterbreak>pLl} [.!?…][u0002|‘|“|«|)|]|}¹²³]?s+ [.!?…][’”p{Pe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} b(div|[Ee]kz|h|[Ii]nkl|p|[Rr]ed|[Rr]im|ktp).s p{Ll} b([Bb]v|[Ĉĉ]|i.a|k.[acs]|[Tt].[ne]|k.t.p|n.b|P.S).s b[Ll]ernu!s p{Ll} [.!?…][u0002|‘|“|«|)|]|}¹²³]?s+ [.!?…][’”p{Pe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} Yahoo!s p{Ll} .[d+]s b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?[A-Za-z0-9-]+. [A-Za-z0-9-]+(.|b) b[A-Za-z0-9-]+. [A-Za-z0-9-]+.(fr|com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(.|b) b[A-Za-z0-9-]+. [A-Za-z]{2,5}(.|b) b((?iu)J.-C|art|app|cf|chap|e(nv|tc)|fém|fig|masc|p|sing|suiv|suppl|tél|op).s p{Ll} b(etc).)s b(apr|ave?|boul|Mr?|Mrs|MM?|Mlle).s [[(]*…[])]* p{Ll} p{Ps+pPe} [.!?…]+p{Pe} p{Ll} [“”‘’]s* s*p{Ll} [’”„][.!?…][‘“”]s bp{L}.s p{L}.s bp{L}. p{L}. (…|...)s?)s [^p{P}] (…|...)s??)s [^p{P}] p{Lu}p{L}+sv.s p{Lu}p{L}+ [^,][s]p{L}{2}.s p{N}+)s [.s]p{L}{1,2}.s [p{N}p{Ll}] [[(]*...[])]* [^p{Lu}] bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s [^.]s[A-Z].s b(:?Blvd|Ave|Mts?).s p{Ll}+ b(?:Kan|Ill|M[ai]ss).s p{Ll}+ (p{Ll}+.s i.e.s [A-Z].[A-Z]. [A-Z]b bL. Ab bU. [SK]b b[nN]o.s p{N} bP[Hh].s? D.? be.g.s bvs.s b[Ee]tc.s [^p{Lu}] b([Bb]tw|BTW).s (?i)FRITZ! (?i)Box ID. 3|Buzz|Crozz bP[Hh].?s?[Dd].s b(P[hH][dD]|BSc|BEng|BComp|BArch|MSc|MEng|MComp).s bLL.s?[BM].s b[BM].s? Eng.? bLL.s? [BM].? b[BM].s? Sc.? b[BM].s? Comp?.? b[BM].s? Arch.? b[BM].?s?(Sc|Eng|Comp|Arch).s bI(nc|NC).s bCorp.s bBros.s bLtd.s p{Ll}+ bCo.s [.!?…][u0002|’|”|«|)|]|}¹²³]?s+ [.!?…][‘“p{Pe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll}</afterbreak> </rule> </languagerule>
<languagerule languagerulename=“Ukrainian”> <!– when sentence starts with ellipsis: …Мазій і Юхим теж. –> <rule break=“no”> <beforebreak>(^|)(...|…)</beforebreak> <afterbreak>p{Lu}</afterbreak> </rule> <!– Наші в… Лос-Анджелесі –> <rule break=“no”> <beforebreak>b(в|у|на|за|з|із|зі|зо)(...|…)*</beforebreak> <afterbreak>p{Lu}</afterbreak> </rule> <rule break=“no”> <beforebreak>[h]+</beforebreak> <afterbreak>*([«“„“(]|[‐-―-])p{Ll}</afterbreak> </rule> <rule break=”yes“> <beforebreak>v*</beforebreak> <afterbreak>(...|…)</afterbreak> </rule> <!– Digit as a point number: 1. перший пункт –> <rule break=”no“> <beforebreak>bd{1,3}.[h]+</beforebreak> <afterbreak>p{Ll}|p{Lu}{2,}</afterbreak> </rule> <!– various punctuation between lowercase letters –> <rule break=”no“> <beforebreak>bp{Ll}+[.!?][hv]*</beforebreak> <afterbreak>h*((|[[‐-―-]*)?p{Ll})</afterbreak> </rule> <rule break=”no“> <beforebreak>([*[])]*|...|…)[hv]+</beforebreak> <afterbreak>*p{Ll}</afterbreak> </rule> <!– lowercase letter abbreviations together: н.е., кв.м. –> <rule break=”no“> <beforebreak>bp{L}{1,2}.</beforebreak> <afterbreak>p{L}{1,2}.</afterbreak> </rule> <!– latin capital char abbreviations A. B. C. –> <rule break=”no“> <beforebreak>b?[A-Z].?</beforebreak> <afterbreak>[a-zA-Z’’.-]|.</afterbreak> </rule> <!– capital char abbreviations А. Б. В. –> <rule break=”no“> <beforebreak>(^*|([hv]*||(b[А-ЯІЇЄҐACEIHOPX].-))[А-ЯІЇЄҐA-Z].*</beforebreak> <afterbreak></afterbreak> </rule> <!– І. В. Коваль, Т. 2, C. 202 –> <!– Іван Ч. (1914 р. н.) –> <rule break=“no”> <beforebreak>[А-ЯІЇЄҐ].*</beforebreak> <afterbreak>.|[0-9]|*,|[hv]*|([0-9]{4}</afterbreak> </rule> <!– І. Коваль –> <rule break=“no”> <beforebreak>([А-ЯІЇЄҐACEIHOPX].-)?(?<!°)(?<!(Куан+Ю|(Петр|Олександр)(|ові|ом)?+[IІ]+)).*</beforebreak> <afterbreak>(?!Від|Але)[а-яіїєґА-ЯІЇЄҐ'’ʼ]{2}</afterbreak> </rule> <!– Ів. Франко (але Ів Бутільє) –> <rule break=“no”> <beforebreak>(^|)(Ів|Дж).+</beforebreak> <afterbreak></afterbreak> </rule> <!– Year: 2000 р.:
виробила у 2018 р. 8,1 млн декалітрів
від 26 квітня 2017 р. №35
а до лютого 2020 р. — затвердити
–> <rule break=“no”> < (?<!d[h]*)bр.[hv]* [h]*(?!(На|Але|Так?)[hv]+)[А-ЯІЇЄҐA-Z][^h] [А-ЯІЇЄҐ][а-яіїєґ‘’-]*([h]+[а-яіїєґ’’-]+)?[h](d{4}[‐-―-“>beforebreak>b([0-9]{2}|[0-9]{4})[hv]+р.)*d4*р.*</beforebreak> <afterbreak>*(?!(На|Але|Так?)+)[А-ЯІЇЄҐA-Z]</afterbreak> </rule> <!– 15 вересня 1995 р. Україною було підписно –> <rule break=”no“> <beforebreak>d1,2+[а-яіїєґ]+d4*р.*</beforebreak> <afterbreak>*(?!(На|Але|Так?)+)[А-ЯІЇЄҐA-Z]</afterbreak> </rule> <!– Years: рр. –> <rule break=”no“> <beforebreak>b([0-9]0|{3}0)(-)?рр.*</beforebreak> <afterbreak></afterbreak> </rule> <!– млн./млрд./грн. — frequent mistake –> <rule break=”no“> <beforebreak>b(тис|млн|млрд|грн).*</beforebreak> <afterbreak>*(d|[КМ]Вт)</afterbreak> </rule> <!– усталені скорочення, що не збігаються з нескороченими словами –> <rule break=”no“> <!– unfortunately b ignores u0301 –> <beforebreak>b(укр|рос|англ|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)).*</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”no“> <!– unfortunately b ignores u0301 –> <beforebreak>b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|к|кв|канд|кн|напр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|]ел|ч|част).*</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”no“> <!– unfortunately b ignores u0301 –> <beforebreak>bст.</beforebreak> <afterbreak>*(?!([АВУОІЄ]|На|Але|Так?))</afterbreak> </rule> <rule break=”no“> <!– no break only for дол. США –> <beforebreak>bдол.*</beforebreak> <afterbreak>США</afterbreak> </rule> <!– п. 10 від 11.10.1933 –> <rule break=”no“> <beforebreak>(?<!т.?)bп.[hv]*</beforebreak> <afterbreak></afterbreak> </rule> <!– усталені скорочення, що збігаються з нескороченими словами –> <rule break=”no“> <beforebreak>b(див).</beforebreak> <afterbreak>*[^А-ЯІЇЄҐ]</afterbreak> </rule> <!– Верховний орган, див. Африканський національний конгрес –> <rule break=”no“> <beforebreak>[hv]*(див).*</beforebreak> <afterbreak></afterbreak> </rule> <!– скорочення в дужках:
України (див. Зимові походи)
–> <rule break=“no”> <beforebreak>((*|[[^]]*|,[hv]*)b(див).*</beforebreak> <afterbreak></afterbreak> </rule> <!– abbreviation with proper noun: проф. Грицько, о. Лісове –> <rule break=“no”> <beforebreak>b([Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|ім|упоряд|чл.-кор).*</beforebreak> <afterbreak>*[А-ЯІЇЄҐA-Z]</afterbreak> </rule> <!– смерть гр. Болтаровича –> <rule break=“no”> <beforebreak>[hv]+[Гг]р.*</beforebreak> <afterbreak>*[А-ЯІЇЄҐA-Z]</afterbreak> </rule> <!– арт. - артикул –> <!– TODO: арт. - артист –> <rule break=“no”> <beforebreak>bарт.*</beforebreak> <afterbreak>*[0-9]</afterbreak> </rule> <!– ХІІ р., 3-6 арт. –> <rule break=“no”> <beforebreak>[hv]+арт.*</beforebreak> <afterbreak></afterbreak> </rule> <!– місто, але принаймні з парою літер в назві бо є ще метри (м) –> <!– але розбиваємо «всього 20 м. Почалося» –> <rule break=“no”> <beforebreak>(?<!d*)bм.[hv]*</beforebreak> <afterbreak>[а-яіїєґ]</afterbreak> </rule> <!– село/сторінка/місто, але щоб не збігалося з секундами/метрами –> <rule break=“no”> <beforebreak>([«(][см]|[^0-9/. ][hv]+).[hv]+</beforebreak> <afterbreak></afterbreak> </rule> <!– (реж. Емманюель –> <rule break=“no”> <beforebreak>[(«“„“”[а-яіїєґ]+.+</beforebreak> <afterbreak></afterbreak> </rule> <!– оренда кабінетів коштувала (!) 22,36 млн грн. –> <rule break=”no“> <beforebreak>[”«„“”[.!?…]1,3][hv]+</beforebreak> <afterbreak></afterbreak> </rule> <!– статус правових держав. — Авт.). –> <rule break=“no”> <beforebreak></beforebreak> <afterbreak>*[‐-―-]*([Рр]ед|[Аа]вт)*.[)]]</afterbreak> </rule> <!– force the break –> <!– часто зустрічається крапка+U+202F+пробіл, який srx чомусь не розбиває на речення –> <!– але лишаємо ініціали: С.u202F Шелухин –> <rule break=“yes”> <beforebreak>(?<!h)[.!?…]1,3}u202F[hv]+ [.!?…][‘»“„“”]›u0002]*[hv]+ [.!?…][’»”„“”)]›]* p{Lu</afterbreak> </rule> <!– “Слон” (2008 р.) У минулому харків’янка –> <rule break=“yes”> <beforebreak>['»“„“”)]›]?[hv]+</beforebreak> <afterbreak>([hv]*)?pLu</afterbreak> </rule> </languagerule>
<languagerule languagerulename=“Belarusian”> <rule break=“no”> <beforebreak>bd+.s</beforebreak> <afterbreak>pLl}|p{Lu}{2,} b[А-ЯЁ].s bЎ.s b[A-Z].s bp{L}. p{L}. b[0-9]+(г).s b[XVILMC]+(ст).s b[0-9]+(.|:)[0-9][0-9]s b[0-9]+(.|:)[0-9][0-9](.|:)[0-9][0-9]s b[0-9]+(г|гг|грн|млн|млрд|руб|тыс).s b(в|вв|г|гг|грн|млн|млрд|руб|ст|р|тыс).s [‘“„][.!?…][’””]s [u00AB][.!?…][u00BB]s [[(]*...[])]* p{Ll} [[(]*…[])]* p{Ll} [“”‘u00BB]s* s*p{Ll} [.!?…][’”u00BBu2019u201Du203Au0002¹²³]*s u005Du005Ds [.!?…][‘»“”p{Pe}]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll} bs([eé]c)?.s? [IVXVDMCL]+ b[Ee]tc.[p{Pe}p{Pf}p{Pd}”’]*s p{Ll} b(m[aá]x|m[ií]n|[aA]prox).[p{Pe}p{Pf}p{Pd}“‘]*s [p{Ll}p{N}] b([aA]pt?do|[aA]sdo|[aA]vd?a?|[Cc]ód|[Dd]e?pto|[Ff]ac|[Ii]nst).s b(S.A.[IRS].|S.R.M.|A.R.|S.[ME].)s p{Lu} b([p{Ll}p{Lu}].)+[p{Pe}p{Pf}p{Pd}”’]*s p{Ll} bp{L}.s? ((p{L}.s?)+|p{Ll}) bp{Lu}{2}.s? (p{Lu}{2}.?s?|p{Ll}) b([Aa]fm[oa]s?|Emcia|Ilt?m[ao]s?|Iltres?|MM|Exc?m[ao]s?|Magf[oa]|D(na)?|Sra?|Sr[ea]s|Srta|Dra?|Dr[ea]s?|Rm[ao]|Rev|Revm[ao]|Mons|Emmo|Rv?d[ao]|[Ll]icd[oa]|[Ll]ic|[Ll]d[oa]|[pP]rofs?).s b([Nn][úu]ms?|[fF]igs?|[Pp][aá]xs?|pp?|cc?a).s p{N} b([Vv]ols?|[Cc]aps?).s p{N}|[IVXVDMCL]+ b([Aa]dmóns?|[aA]d(mt)?v[oa]s?).s p{Ll} b([pP]pal|[Vv]des?|[Ii]b[íi]d|[Rr]efs?|[Cc]it|[Aa]brevs?|[Aa]bl|[Aa]dx|[Aa]dv|[Aa]cr|[Cc]onx|[Ll]oc|[Pp]rep|[Ss]ubst).s [^p{Lu}] b([vV]id|[Cc]fr?).s b([Cc]oord|[Ee]d)s?.s b([Cc]t[ae]|[Tt]e?lf|[Tt]fno|[Tt]el[eé]f|[Tt]el)s?.s [^p{Lu}] b([oO]p|[lL]oc).s? cit. bet [aá]l.s? [p{Ll}p{N}] [‘“(][.!?…][’”)]s bvs.s besp.s (…|...)s p{Ll} [.!?…]s [-–—],?sp{Ll}.+ [.…][u00BBu2019u201Du203A“‘u0002]*s sp{L}+[p{Pf}p{Pe}u00BBu2019u201Du203A”’u0002]*[.:!?…]+s* [¡¿«»“‘p{Ps}]*p{Lu}p{L}* [^s]:s [’”«¡¿p{Ps}p{Pi}]?p{Lu}p{Ll}* [:]+[p{Pe}p{Pf}p{Po}“-[u002Cu003Au003Bu055Du060Cu061Bu0703u0704u0705u0706u0707u0708u0709u07F8u1363u1364u1365u1366u1802u1804u1808u204Fu205Du3001uA60DuFE10uFE11uFE13uFE14uFE50uFE51uFE54uFE55uFF0CuFF1AuFF1BuFF64]]* s+P{Lu} [:]+[p{Pe}p{Pf}p{Po}”-[u002Cu003Au003Bu055Du060Cu061Bu0703u0704u0705u0706u0707u0708u0709u07F8u1363u1364u1365u1366u1802u1804u1808u204Fu205Du3001uA60DuFE10uFE11uFE13uFE14uFE50uFE51uFE54uFE55uFF0CuFF1AuFF1BuFF64]]* s [。.!?…]+ . ... s+P{Lu} ^s*p{Nd}+[p{Nd}.)]]\s
p{Lu} [.?!]+ s [.!?…][u00BBu2019u201Du203A“‘p{Pe}u0002¹²³]*s sp{L}[.!?…]s</beforebreak> <afterbreak>pLu}p{Ll} b[dD].l.e.? [.!?…][’””»p{Pe}u00BBu201D]?s* p{Lu</afterbreak> </rule> <!– Break rules –> <rule break=“yes”> <beforebreak>[u0002|'|“|«|)|]|}¹²³]?s+</beforebreak> <afterbreak></afterbreak> </rule> <rule break=”yes“> <beforebreak>['”pPe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll}</afterbreak> </rule> </languagerule> <languagerule languagerulename=“Portuguese”> <rule break=“no”><!– URLs without “www.”–> <beforebreak>b(https?|ftp|file|chrome|chromium|android|(chrome|moz)-extension):///?+.</beforebreak> <afterbreak>+(.|b)</afterbreak> </rule> <rule break=“no”><!– Subdomains without “www.” (e.g. foo.MyDomain.com)–> <beforebreak>b+.</beforebreak> <afterbreak>+.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(.|b)</afterbreak> </rule> <!– Abbreviations that cannot finish sentences–> <rule break=“no”> <beforebreak>b(a|Ab|abrev|absol|acad|Açor|A. ?D|add|adj|adv|advers|Aeron|afér|Agric|Álg|aprox|rts?|Artilh|auxil|av|Av).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Bot|barb|B.el|Bibl|Biol|Bioquím|burl).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(ca|card|cat|caus|cf|cit|cód|comp|compar|conj|contr|coord|cop).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(D|def|dem|deprec|deriv|det|disj|ra?s?).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Ecol|Econ|ed|elem|Eng|erud|estrang|ex|Ex).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(etc).s?</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <rule break=“no”> <beforebreak>b(f|fam|Farm|fem|fig|fin|fl|fr|frac).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(gén|geog|Geogr|Geol|Geom|gír|gloss|Gram).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(hab|hist|Hort).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Ibid|id|i.e|incompat|indef|inf|infant|Inform|integr|interj|interr|intr|inv).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Jorn|Jur).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(lat|Lat|Lda|Ling|Lit|liv|loc|log|Lóg|long).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(m|masc|Mat|máx|Mecân|ed|Mil|mín|mult|Mús).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(n|N|Náut|N.B|neg|neol|num|núm).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(ord).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(pág|págs|Paleont|part|pass|[Pp]edag|pejor|pess|Pesc|p|Pe|p.f|pl|pleb|p.m|poét|[Pp]olít|pop|pov|poss|p.p|p.p.m|pp|pref|prep|rof|pron|P.S).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(q.b|q.do|Q.E|Q.I|ql).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(R|rel|Relig|Rev).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(S|S.A|símb|S. ?M|[Ss]ra?s?|rta|suf|superl).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(t|tip|Tip|tít|top|opogr|tr|trad|Trás-os-M|trim).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Univ).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(v|V|vd|vid|voc|vol|V.S|vs|vulg).s?</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“no”> <beforebreak>b(Zool).s?</beforebreak> <afterbreak></afterbreak> </rule> <!– s. XIX; s.IX; sec. XX; séc. XX –> <rule break=“no”> <beforebreak>bs(c)?.s?</beforebreak> <afterbreak>+</afterbreak> </rule> <!– English abbreviations - but these work globally for all languages –> <rule break=“no”> <beforebreak>b(Mr|Mrs|No|pp|St|Jr|Bros|etc|vs|esp|ig|PhD|al|cf|Inc|Ms|Gen|Sen|Prof|Corp|Co|Ltd).s?</beforebreak> <afterbreak></afterbreak> </rule> <!– Latin abbreviations - but these work globally for all languages –> <rule break=“no”> <beforebreak>b(sp|spp).s?</beforebreak> <afterbreak></afterbreak> </rule> <!– initials: A. C. Jones. –> <rule break=“no”> <beforebreak>b.s?</beforebreak> <afterbreak></afterbreak> </rule> <!– Two final stop abbreviations –> <rule break=“no”> <beforebreak>b.s?</beforebreak> <afterbreak>C.</afterbreak> </rule> <rule break=“no”> <beforebreak>p.s?</beforebreak> <afterbreak>d.</afterbreak> </rule> <rule break=“no”> <beforebreak>p.s?</beforebreak> <afterbreak>ex.</afterbreak> </rule> <rule break=“no”> <beforebreak>P.s?</beforebreak> <afterbreak>S.</afterbreak> </rule> <rule break=“no”> <beforebreak>bP.s?</beforebreak> <afterbreak>D.?</afterbreak> </rule> <rule break=“no”> <beforebreak>V.s?</beforebreak> <afterbreak>Rev.</afterbreak> </rule> <rule break=“no”> <beforebreak>v.s?</beforebreak> <afterbreak>g.</afterbreak> </rule> <!– Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break=“no”> <beforebreak>sp{L}.s?</beforebreak> <afterbreak>p{L}.</afterbreak> </rule> <!– Any word in acronyms like E.U.A. or P.S.P. or C. or c.s.p. or p. e. –> <rule break=“no”> <beforebreak>b(p{L}.)+[p{Pe}p{Pf}p{Pd}“”']*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Any word in acronyms like EE.UU. or BB. DD. –> <rule break=”no“> <beforebreak>b({2}.)+[p{Pe}p{Pf}p{Pd}””']*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– máx. mín. etc. –> <rule break=“no”> <beforebreak>b([Ee]tc|m[aá]x|m[ií]n|aprox|d+o).*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Composed abbrev. e.g. et al. –> <rule break=“no”> <beforebreak>bet al.*s</beforebreak> <afterbreak></afterbreak> </rule> <!– Units –> <rule break=“no”> <beforebreak>b([Ee]sc|K[gm]s?|?[gml]s]|(rs)?).*s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– Split at e.g. “1a. There is…” –> <rule break=“yes”> <beforebreak>d+.s?</beforebreak> <afterbreak>p{Lu}</afterbreak> </rule> <!– Don't split at cases like “in 13. or 14. paragraphs” –> <rule break=“no”> <beforebreak>d+.s?</beforebreak> <afterbreak>(e|ou|até)s</afterbreak> </rule> <!– Don't split [.?!] when they're quoted –> <rule break=“no”> <beforebreak>[.!?…]['“”]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Not break for ellipses (…) –> <rule break=”no“> <beforebreak>(Q…E|…)s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!– z.B. ”bla (…) blubb“ -> without ending sentence –> <rule break=”no“> <beforebreak>[()]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said. -->
<rule break=“no”> <beforebreak>s</beforebreak> <afterbreak>pLl} [([][!?]{1,3)]s</beforebreak> <afterbreak></afterbreak> </rule> <!– z.B. “This here is an (awesome!) phrase.” –> <rule break=“no”> <beforebreak>[!?]{1,3}]s</beforebreak> <afterbreak></afterbreak> </rule> <!– narrator comments in dialogs –> <rule break=“no”> <beforebreak>s</beforebreak> <afterbreak>,?spLl}.+ b(etc).s? p{Lu}p{Ll}* [.!?][u0002|‘|“|“|«|)|]|}¹²³]?s+ [.!?…][’””»p{Pe}u00BBu201D]?s* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll}* [^s]:s [‘“«¡¿p{Ps}p{Pi}]p{Lu}p{Ll}* r?n p{Lu}p{Ll}* b(a.c|a.C|ad es|all|Amn|Arch|Avv|Bcc|Cav|c.a|C.A.P|Cc|banc|post|c.c.p|c.m|Co|c.p|C.P|C.p.r|corr|c.s|c.v).s b(Chia.mo|C.so|Circ.ne)s b(d.C|Dott|Dr|ecc|Egr|e.p.c|fatt|FF.AA|FF.SS|Geom|Gen|g|gg|Id|Ing|int|lett).s b(Dott.ssa|Egr.i|Egr.ia|F.lli|Gent.mo|Gent.mi|Gent.ma|Gent.me|Ill.mo|L.go)s b(Mo|Mons|N.B|n|ogg|On|p|pag|par|pp|p.c|p.c.c|p.es|p.f|p.r|P.S|p.v|P.T|Prof).s b(P.zza|P.le|Preg.mo|Prof.ssa)s b(R|racc|Rag|Rev|ric|Rif|R.P|R.S.V.P|S.A|S. acc|S.B.F|seg|sgg|ss|S|Ss|Sig|Sigg|s.n.c|Soc|S.p.A|Spett|S.P.M|S.r.l).s b(Sig.na|Sig.ra|Stim.mo)s b(tel|u.s|V|V.P|v.r|v.s).s b(V.le)s b(abbr|acron|agg|art|avv|card|compar|conf|cong|det|dim|f|fonosimb|ger|impers|indef|indet|inter|intr|inv|lat|loc|m|n|num|ord|p|pers|pl|pass|pres|pref|prep|pron|ponom|rel|s|sost|simb|suff|ter|tr|v|var).s [.!?…][u00BBu2019u201Du203A”’p{Pe}u0002¹²³]*s [.!?…][‘“u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} bஎ.கா.s b(ஜன|பிப்|மார்|ஏப்|ஆக|செப்|அக்|நவ|டிச).s b(ரூ|ரி.ம|பக்).s p{N} b(கி.பி|கி.மு).s p{N} b(ஐ.நா|தி.மு.க|அ.இ.அ.தி.மு.க|அ.தி.மு.க|ம.தி.மு.க|ம.இ.கா|இ.ஆ.ப|ஐ.ஏ.எஸ்|எம்.பி|எம்.எல்.ஏ|எம்.ஜி.ஆர்|டி.எம்.எஸ்).s bபி.கு.:?s b(பி.இ|பி.ஏ|ஏம்.பி.பி.எஸ்|பி.ஏ.பி.எல்|எம்.ஏ|எம்.எஸ்.சி|எம்.இ|எம்.லிட்|பி.எச்.டி).s [.!?…][u00BBu2019u201Du203A”’p{Pe}u0002]}¹²³]*s [.!?…][u00BBu2019u201Du203A“‘p{Pe}u0002¹²³]*s [.!?…][’”u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} b(نه|بله)!s p{N} [[(]*…[])]* p{Ll} p{Ps+pPe} [.!?؟…]+p{Pe} p{Ll} [«»“”‘]s* s*p{Ll} [«’”„][.!?؟…][‘“”»]s bp{L}.s p{L}.s bp{L}. p{L}. [^,،][s]p{L}{2}.s p{N}+)s [.s]p{L}{1,2}.s [p{N}p{Ll}] [[(]*...[])]* [^p{Lu}] bp{Lu}.sp{Lu}.s bp{Lu}.p{Lu}.s [^.]s[ضصثقفغعهخحجچشسیبلاتنمکگظطزرذدپوًٌٍَُِّْA-Z].s (p{Ll}+.s [.!?؟…][«»u00BBu2019u201Du203A”’p{Pe}u0002¹²³]*s [.!?؟…][«»‘“u00BBu2019u201Du203Ap{Pe}u0002]* p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>pLu}p{Ll} [^-p{L}’’]p{L['|”|“|«|)|]|}]?s</beforebreak> <afterbreak></afterbreak> </rule> <!–Не раздвајати код на пр. “U.S.A.” –> <rule break=“no”> <beforebreak>bp{L}.</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break=“no”> <beforebreak>sp{L}.s</beforebreak> <afterbreak>p{L}.</afterbreak> </rule> <!–Не раздвајати код “бла бла… трућ трућ”.–> <rule break=“no”> <beforebreak>[?...[])]?s</beforebreak> <afterbreak>p{Ll}</afterbreak> </rule> <!–Не раздвајати [.?!] када се налазе унутар знакова навода, једноструких или двоструких. –> <rule break=“no”> <beforebreak>[.!?…]['““]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said.
Не раздвајати после наводника осим ако нису праћени великим словом. На пример:
"Тако је!", рече он.-->
<rule break=“no”> <beforebreak>,s</beforebreak> <afterbreak>pLl} s([.!?]{1,3}|…)[‘|“|“|«|)|]|}]?s bd+.s p{Ll}|p{Lu}{2,} [([][!?]{1,3)]s</beforebreak> <afterbreak></afterbreak> </rule> <!–На пр.: ”Ово овде је (такође!) једна реченица.“–> <rule break=”no“> <beforebreak>[!?]{1,3}]s</beforebreak> <afterbreak></afterbreak> </rule> <!–Не раздвајај у случају као на пр.: ”Петар I дошао је …“–> <rule break=”no“> <beforebreak>[s ][IVX]+s</beforebreak> <afterbreak>+</afterbreak> </rule> <!–Не раздвајај у случају као ”од 13. до 14. века“–> <rule break=”no“> <beforebreak>d+.s</beforebreak> <afterbreak>(и|или|до)s</afterbreak> </rule> <!–Не раздвајај у случају датума писаног мешовито - дан арапским цифрама, а назив месеца словима: ”Дне 28. јуна“–> <rule break=”no“> <beforebreak>d+.s</beforebreak> <afterbreak>јануар|јануара|фебруар|фебруара|март|марта|април|априла|мај|маја|јун|јуна|јул|јула|август|августа|септембар|септембра|октобар|октобра|новембар|новембра|децембар|децембра</afterbreak> </rule> <!–Не раздвајај у случајевима као на пр.: ”у 1. степену сродства“.–> <rule break=”no“> <beforebreak>d+.s</beforebreak> <afterbreak>степен(у)</afterbreak> </rule> <!– German abbreviations –> <rule break=”no“> <beforebreak>b(versch|d|Übers|usw|Ab|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|Az|Bat|bayr|Bd|Bde|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|bzw).s</beforebreak> <afterbreak></afterbreak> </rule> <!–Српске скраћенице–> <rule break=”no“> <beforebreak>b(одн|тј).s+</beforebreak> <afterbreak></afterbreak> </rule> <!–Раздвој после ових знакова, ако су праћени једним или већим бројем размака.–> <rule break=”yes“> <beforebreak>[u0002|'|”|“|)|]|}¹²³]?s+</beforebreak> <afterbreak></afterbreak> </rule> <rule break=“yes”> <beforebreak>['““pPe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=”yes“> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll}</afterbreak> </rule> </languagerule> <languagerule languagerulename=”Irish“> <rule break=”no“> <beforebreak>bPh.</beforebreak> <afterbreak>D.</afterbreak> </rule> <!– Don't split at e.g. ”U.S.A.“ –> <rule break=”no“> <beforebreak>bp{L}.</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't split after a white-space followed by a single letter followed
by a dot followed by another whitespace. e.g. " p. " -->
<rule break=“no”> <beforebreak>sp{L}.s</beforebreak> <afterbreak>p{L}.</afterbreak> </rule> <!– Don't split [.?!] when they're quoted –> <rule break=“no”> <beforebreak>[.!?u0085]s</beforebreak> <afterbreak></afterbreak> </rule> <!– Don't break after quote unless there's a capital letter
e.g.: "That's right!" he said. -->
<rule break=“no”> <beforebreak>s</beforebreak> <afterbreak>pLl} (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec).s dd(dd)? (Ean|Fea|Már|Aib|Bea|Mei|Iúl|Lún|M.?Fr|D.?Fr|Sam|Nol).s dd(dd)? b(Mr|Mrs|Ms|No|pp|St|no|Sr|Jr|Bros|etc|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Sept|Oct|Okt|Nov|Dec|PhD|al|cf|Inc|Ms|Gen|Sen|Prof|Corp|Co|Ltd).s b(sp|spp).s ([Ll]ch|[Ll]gh|[Uu]imh).s d .i.s Msc.s Uas.s Teo.s m.sh.s [.!?u0085][u0002|‘|“|«|)|]|}¹²³]?s+ [.!?u0085][’”p{Pe}u00BBu201D]? p{Lu</afterbreak> </rule> <rule break=“yes”> <beforebreak>spLs</beforebreak> <afterbreak>p{Lu}p{Ll}</afterbreak> </rule> </languagerule> <languagerule languagerulename=“GeneralImportant”> <!–Don't split up URLs.–> <rule break=“no”> <beforebreak>bwww.</beforebreak> <afterbreak>w</afterbreak> </rule> <!–Don't split up e-mail addresses.–> <rule break=“no”> <beforebreak></beforebreak> <afterbreak>S*@</afterbreak> </rule> </languagerule> <languagerule languagerulename=“Arabic”>
<rule break="no">
<beforebreak>\bwww\.</beforebreak>
<afterbreak>\w</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*…[\]\)]* </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
<beforebreak>\p{Ps}[!?؟]+\p{Pe} </beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>[\.!?؟…]+\p{Pe} </beforebreak>
<afterbreak>\p{Ll}</afterbreak>
</rule>
<rule break="no">
<beforebreak>[«»"”']\s*</beforebreak>
<afterbreak>\s*\p{Ll}</afterbreak>
</rule>
<rule break="no">
<beforebreak>[«'"„][\.!?؟…]['"”»]\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{L}\.\s</beforebreak>
<afterbreak>\p{L}\.\s</afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{L}\.</beforebreak>
<afterbreak>\p{L}\.</afterbreak>
</rule>
<rule break="yes">
<beforebreak>[^,،][\s]\p{L}{2}\.\s</beforebreak>
<afterbreak>\p{N}+\)\s</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak>
<afterbreak>[\p{N}\p{Ll}]</afterbreak>
</rule>
<rule break="no">
<beforebreak>[\[\(]*\.\.\.[\]\)]* </beforebreak>
<afterbreak>[^\p{Lu}]</afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{Lu}\.\s\p{Lu}\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\b\p{Lu}\.\p{Lu}\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>[^\.]\s[ابتقجحخدذصضعغفقكلمنهوىيءةأ١٢٣٤٥٦٧٨٩٠A-Z]\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>[^\.]\s[\u064B\u064C\u064D\u064E\u064F\u0650\u0651\u0652\u0653\u0654\u0655\u0656\u0640]\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="no">
<beforebreak>\(\p{Ll}+\.\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
<beforebreak>[\.!?؟…][«»\u00BB\u2019\u201D\u203A"'\p{Pe}\u0002¹²³]*\s</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
<beforebreak>[\.!?؟…][«»'"\u00BB\u2019\u201D\u203A\p{Pe}\u0002]*</beforebreak>
<afterbreak>\p{Lu}[^\p{Lu}]</afterbreak>
</rule>
<rule break="yes">
<beforebreak>\s\p{L}[\.!?؟…]\s</beforebreak>
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
</rule>
</languagerule>
</languagerules> <maprules> <languagemap languagepattern=“.*” languagerulename=“GeneralImportant”></languagemap> <languagemap languagepattern=“{2,3}_one” languagerulename=“ByLineBreak”></languagemap> <languagemap languagepattern=“{2,3}_two” languagerulename=“ByTwoLineBreaks”></languagemap> <languagemap languagepattern=“(EL|el).*” languagerulename=“Greek”></languagemap> <languagemap languagepattern=“(PL|pl).*” languagerulename=“Polish”></languagemap> <languagemap languagepattern=“(EN|en).*” languagerulename=“English”></languagemap> <languagemap languagepattern=“(NL|nl).*” languagerulename=“Dutch”></languagemap> <languagemap languagepattern=“(RO|ro).*” languagerulename=“Romanian”></languagemap> <languagemap languagepattern=“(SK|sk).*” languagerulename=“Slovak”></languagemap> <languagemap languagepattern=“(IS|is).*” languagerulename=“Icelandic”></languagemap> <languagemap languagepattern=“(RU|ru).*” languagerulename=“Russian”></languagemap> <languagemap languagepattern=“(SL|sl).*” languagerulename=“Slovenian”></languagemap> <languagemap languagepattern=“(CA|ca).*” languagerulename=“Catalan”></languagemap> <languagemap languagepattern=“(ES|es).*” languagerulename=“Spanish”></languagemap> <languagemap languagepattern=“(DE|de).*” languagerulename=“German”></languagemap> <languagemap languagepattern=“(DA|da).*” languagerulename=“Danish”></languagemap> <languagemap languagepattern=“(EO|eo).*” languagerulename=“Esperanto”></languagemap> <languagemap languagepattern=“(FR|fr).*” languagerulename=“French”></languagemap> <languagemap languagepattern=“(UK|uk).*” languagerulename=“Ukrainian”></languagemap> <languagemap languagepattern=“(BE|be).*” languagerulename=“Belarusian”></languagemap> <languagemap languagepattern=“(GL|gl).*” languagerulename=“Galician”></languagemap> <languagemap languagepattern=“(JA|ja).*” languagerulename=“Ideographic”></languagemap> <languagemap languagepattern=“(ZH|zh).*” languagerulename=“Ideographic”></languagemap> <languagemap languagepattern=“(BR|br).*” languagerulename=“Breton”></languagemap> <languagemap languagepattern=“(PT|pt).*” languagerulename=“Portuguese”></languagemap> <languagemap languagepattern=“(IT|it).*” languagerulename=“Italian”></languagemap> <languagemap languagepattern=“(TA|ta).*” languagerulename=“Tamil”></languagemap> <languagemap languagepattern=“(FA|fa).*” languagerulename=“Persian”></languagemap> <languagemap languagepattern=“(GA|ga).*” languagerulename=“Irish”></languagemap> <languagemap languagepattern=“(SR|sr).*” languagerulename=“Serbian”></languagemap> <languagemap languagepattern=“(AR|ar).*” languagerulename=“Arabic”></languagemap> <languagemap languagepattern=“(SV|sv).*” languagerulename=“Generic”></languagemap> <languagemap languagepattern=“(LT|lt).*” languagerulename=“Generic”></languagemap> <languagemap languagepattern=“(ML|ml).*” languagerulename=“Generic”></languagemap> <languagemap languagepattern=“(TL|tl).*” languagerulename=“Generic”></languagemap> <languagemap languagepattern=“(AST|ast).*” languagerulename=“Generic”></languagemap> <languagemap languagepattern=“.*” languagerulename=“Default”></languagemap> </maprules> </body> </srx>