module Sequence::StringLike
Constants
- FFS_4BITTABLE
Public Instance Methods
_anchor(str,backwards=false,cache=true)
click to toggle source
add an anchor to a Regexp-string. normally,
# File lib/sequence/stringlike.rb, line 216 def _anchor(str,backwards=false,cache=true) cache and result=@@anchor_cache[[str,backwards]] and return result result=backwards ? "(?:#{str})\\Z" : "\\A(?:#{str})" cache and return @@anchor_cache[[str,backwards]]||=Regexp.new( result, 0, 'n' ) #is it really correct to force regexp to be binary encoding here? not sure #maybe encoding of str should be detected and reused? return result end
data_class()
click to toggle source
# File lib/sequence/stringlike.rb, line 12 def data_class; String end
ffs()
click to toggle source
# File lib/sequence/stringlike.rb, line 18 def ffs holding{ begin! zeros=read_til_charset(/[^\0]/) byte=read1 lo=byte&0xF rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4 return zeros.size<<3+rem } end
fixup_match_result(matchdata,addedgroups,pos_adjust,namelet,&body)
click to toggle source
# File lib/sequence/stringlike.rb, line 226 def fixup_match_result(matchdata,addedgroups,pos_adjust,namelet,&body) #remove extra capture results from () we inserted from MatchData #..first extract groups, begin and end idxs from old groups=matchdata.to_a begins=[] ends=[] matchdata.to_a.each_with_index{|substr,i| if substr begins<<matchdata.begin(i)+pos_adjust ends<<matchdata.end(i)+pos_adjust else begins<<nil ends<<nil end } #..remove data at group indexes we added above addedgroups.reverse_each{|groupidx| [groups,begins,ends].each{|arr| arr.delete_at groupidx } } #..now change matchdata to use fixed-up arrays result=CorrectedMatchData.new result.begins=begins result.ends=ends result.groups=groups if namelet==:pre result.set_pre_match_body( &body) result.set_post_match_body {matchdata.post_match} else result.set_pre_match_body {matchdata.pre_match} result.set_post_match_body( &body) end result.pos=pos_adjust result end
fns(bitnum)
click to toggle source
# File lib/sequence/stringlike.rb, line 30 def fns(bitnum) holding{ goto bitnum>>3 bitnum&=0x7 byte=read1 byte&=~((1<<(bitnum+1))-1) if byte.nonzero? zeros_size=0 else zeros_size=read_til_charset(/[^\0]/).size byte=read1 end lo=byte&0xF rem=FFS_4BITTABLE[lo]||FFS_4BITTABLE[byte>>4]+4 return zeros_size<<3+rem } end
group_anchors(rex,backwards,anchored=false)
click to toggle source
# File lib/sequence/stringlike.rb, line 166 def group_anchors(rex,backwards,anchored=false) @@fs_cache||={} result=@@fs_cache[[rex,backwards,anchored]] and return result if backwards caret,dollar,buffanchor='^',nil,'A' else caret,dollar,buffanchor=nil,'$','Z' end newrex=(anchored ? _anchor(rex,backwards,false) : rex.to_s) rewritten=incclass=false groupnum=0 addedgroups=[] result='' (frags=newrex.split( /((?:[^\\(\[\]$^]+|\\(?:[CM]-)*[^CMZA])*)/ )).each_index{|i| frag=frags[i] case frag when "\\" if !incclass and frags[i+1][0,1]==buffanchor frags[i+1].slice! 0 frag='(?!)' rewritten=true end when caret unless incclass addedgroups<<(groupnum+=1) frag="(^)" rewritten=true end when dollar unless incclass frag="(?=\n)" rewritten=true end when "("; incclass or frags[i+1][0]==?? or groupnum+=1 when "["; incclass=true #ignore stuff til ] when "]"; incclass=false #stop ignoring stuff end result<<frag } newrex=rewritten ? Regexp.new(result) : rex @@fs_cache[[rex,backwards,anchored]]=[newrex,addedgroups] end
index(pat,pos=0)
click to toggle source
# File lib/sequence/stringlike.rb, line 427 def index pat,pos=0 posi= self.begin() until_buffer_len=4*maxmatchlen(false) if Regexp===pat until_step_len=3*maxmatchlen(false) until posi.eof? if m=posi.match(pat,false,until_buffer_len) range=0...m.begin(0) pre=subseq(range) m.set_pre_match_body { pre } self.last_match=Thread.current[:last_match]=m return m.begin(0) #return match and what preceded it end posi.move until_step_len end #elsif SetOfChar===pat; ... else until_step_len=until_buffer_len String===pat and until_step_len-=pat.size-1 until posi.eof? buf=posi.readahead(until_buffer_len) if i=buf.index( pat) result=posi.pos+i return result end posi.move until_step_len end end return nil ensure posi.close end
like()
click to toggle source
# File lib/sequence/stringlike.rb, line 14 def like; StringLike end
match(rex,anchored=true, len=maxmatchlen(false))
click to toggle source
# File lib/sequence/stringlike.rb, line 130 def match(rex,anchored=true, len=maxmatchlen(false)) newrex=nearend(len)? rex : group_anchors(rex,false,false).first #do the match against what input we have matchdata=match_fast(newrex,false,len) if !matchdata or anchored && matchdata.begin(0).nonzero? self.last_match=Thread.current[:last_match]=nil return end posi=position;posi.move matchdata.end(0) result=fixup_match_result(matchdata,[],pos,:post) { posi.subseq(posi.pos..-1) } #note: post_match is a SubSeq #rex.last_match= self.last_match=Thread.current[:last_match]=result end
match_fast(rex,backwards=false,len=maxmatchlen(backwards))
click to toggle source
# File lib/sequence/stringlike.rb, line 76 def match_fast(rex,backwards=false,len=maxmatchlen(backwards)) str=send backwards ? :readbehind : :readahead, len if result=rex.match(str) if backwards def result.pre_match; end else def result.post_match ; end end end return result end
matchback(rex,anchored=true, len=maxmatchlen(true))
click to toggle source
# File lib/sequence/stringlike.rb, line 92 def matchback(rex,anchored=true, len=maxmatchlen(true)) nearbegin=nearbegin(len) newrex,addedgroups= if nearbegin && !anchored [rex,[]] else group_anchors(rex,:back,anchored) end #do the match against what input we have matchdata=match_fast(newrex,true,len) #fail if any ^ or \A matched at begin of buffer, #but buffer isn't begin of file if !matchdata or #not actually a match addedgroups.find{|i| matchdata.end(i)==0 } && !nearbegin self.last_match=Thread.current[:last_match]=nil return end matchpos=pos-len matchpos>=0 or matchpos=0 assert(matchpos>=0) match1st=position matchpos+matchdata.begin(0) result=fixup_match_result(matchdata,addedgroups,matchpos,:pre) do result=SubSeq.new(self,0,match1st.pos) result.pos=match1st.pos result end #note: pre_match is a subseq. #rex.last_match= self.last_match=Thread.current[:last_match]=result end
new_data()
click to toggle source
Return an empty object used for returning a sequence of elements. The only method required of this object is << (append to the sequence).
usually [] or “”
# File lib/sequence/stringlike.rb, line 10 def new_data; '' end
push(str)
click to toggle source
# File lib/sequence/stringlike.rb, line 417 def push(str) Integer===str and str=str.chr insert size, str end
read_til_charset(charrex,blocksize=16)
click to toggle source
# File lib/sequence/stringlike.rb, line 51 def read_til_charset(charrex,blocksize=16) blocks=[] m=nil until eof? block=read blocksize #if near eof, less than a full block may have been read if m=charrex .match(block) self.pos-=m.post_match.length+1 #'self.' shouldn't be needed... but is blocks<<m.pre_match if m.pre_match.length>0 break end blocks<<block end return blocks.join end
rindex(pat,pos=size-1)
click to toggle source
# File lib/sequence/stringlike.rb, line 460 def rindex pat,pos=size-1 posi= self.end() until_buffer_len=4*maxmatchlen(false) if Regexp===pat until_step_len=3*maxmatchlen(false) until posi.pos.zero? if m=posi.matchback(pat,false,until_buffer_len) range=m.end(0)+1..-1 post=subseq(range) m.set_post_match_body { post } self.last_match=Thread.current[:last_match]=m posi.close return m.begin(0) #return match and what preceded it end posi.move( -until_step_len ) end #elsif SetOfChar===pat; ... else until_step_len=until_buffer_len String===pat and until_step_len-=pat.size-1 until posi.pos.zero? buf=posi.readbehind(until_buffer_len) if i=buf.rindex( pat) result=posi.pos-until_buffer_len+i posi.close return result end posi.move( -until_step_len ) end end return nil ensure posi.close end
scan(pat)
click to toggle source
# File lib/sequence/stringlike.rb, line 311 def scan(pat) holding? {case pat when Integer pat==read1 and pat.chr #when SetOfChar; ... when String pat==read(pat.size) and pat when Regexp if m=match(pat,true) goto m.end(0) m.to_s end else raise ArgumentError.new("bad scan pattern for Sequence::StringLike") end} end
scan_until(pat)
click to toggle source
# File lib/sequence/stringlike.rb, line 343 def scan_until(pat) at=index( pat,pos) or return newpos=case pat when Regexp m=last_match s=slice(pos...m.begin(0)) m.set_pre_match_body{s} m.end(0) when String; at+pat.size when Integer; at+1 #when SetOfChar; huh else raise ArgumentError end return( read newpos-pos) =begin holding? { if Regexp===pat until_buffer_len=4*maxmatchlen(false) until_step_len=3*maxmatchlen(false) holding_position{|posi| until posi.eof? if m=posi.match(pat,false,until_buffer_len) pre=read(posi.pos-pos)+m.pre_match m.set_prematch_body {pre} #readjust matchdata to include data between my own pos and posi goto m.end(0) #advance my own position to end of match return m.pre_match+m.to_s #return match and what preceded it end posi.move until_step_len end nil } #elsif SetOfChar===pat; ... else #string or integer i=index(pat,pos) result=read(i-pos)<<pat move(pat.is_a?( Integer ) ? 1 : pat.size) result end } =end end
scanback(pat)
click to toggle source
# File lib/sequence/stringlike.rb, line 327 def scanback(pat) holding? {case pat when Integer pat==readback1 and pat.chr #when SetOfChar; ... when String pat==readback(pat.size) and pat when Regexp if m=matchback(pat,true) goto m.begin(0) m.to_s end else raise ArgumentError.new("bad scan pattern for Sequence::StringLike") end} end
scanback_until(pat)
click to toggle source
# File lib/sequence/stringlike.rb, line 386 def scanback_until(pat) at=rindex( pat,pos) or return newpos= if Regexp===pat m=last_match s=slice(m.end(0)+1..pos) m.set_post_match_body{s} m.begin(0) else at end assert(newpos<=pos) return( readback pos-newpos) =begin holding? { if Regexp===pat huh #need to scan til eof, like #scan_until does m=matchback(pat,false) or break goto= m.begin(0) m.to_s+m.post_match #elsif SetOfChar===pat; ... else #string or integer i=rindex(pat,pos) result=readback(pos-i-pat.size)<<pat move( -(pat.is_a? Integer ? 1 : pat.size)) result end } =end end
unshift(str)
click to toggle source
# File lib/sequence/stringlike.rb, line 422 def unshift(str) Integer===str and str=str.chr insert 0, str end