#!/usr/bin/env/ python3 import random,os,sys class pattern(): def __init__(self,Weight,DontCare): self._PW = Weight self._PDc = DontCare self._Pattern = [1 for i in range(self._PW-2)]+[0 for i in range(self._PDc)] random.shuffle(self._Pattern) self._Pattern.append(1) self._Pattern.insert(0,1) self._Weights = [ i for i in range(len(self._Pattern)) if self._Pattern[i] == 1] def __repr__(self): return repr(self._Pattern) def __str__(self): return str(self._Pattern) def __len__(self): return len(self._Pattern) def __getitem__(self,idx): return self._Pattern[idx] def weight(self): return len(self._Weights) def weights(self): return self._Weights class stream_shingle(): def __init__(self,Pattern): self.reset(Pattern) def __repr__(self): return repr(self._Words) def __str__(self): return str(self._Words) def stream_add(self,Stream): if type(Stream) not in [list,str]: stream = [Stream] else: stream = Stream for c in stream: WPos = self._TextCtr%len(self._Pattern) self._Words.append(self._Buffer[WPos]) self._Buffer[WPos] = [] for m in self._Pattern.weights(): self._Buffer[self._TextCtr%len(self._Pattern)-m].append(c) self._TextCtr += 1 def add(self,Stream): if type(Stream) not in [list,str]: stream = [Stream] else: stream = Stream if self._TextCtr == 0: self._Tmp = [] for c in stream: self._Tmp.append(c) self._TextCtr += 1 if len(self._Tmp) >= len(self._Pattern): for jdx,j in enumerate(self._Pattern.weights()): for idx in range(j,len(self._Pattern)): self._Buffer[idx-j].append(self._Tmp[idx]) self._TextCtr = len(self._Pattern) self.stream_add(self._Tmp[len(self._Pattern):]) del self._Tmp self._swap() def _swap(self): self._Tmp = self.add self.add = self.stream_add self.stream_add = self._Tmp def reset(self,Pattern): self._Pattern = Pattern self.clear() def clear(self): self._Buffer = [ [] for i in range(len(self._Pattern))] self._Words = [] self._TextCtr = 0 def shingles(self): return self._Words def extract_shingles(self): Words = list(self._Words) self._Words = [] return Words PW = 20 PDc = 10 WordBuffer=1000 Pattern = pattern(PW,PDc) FileOut = "./wg.rgb" OutFile = open(FileOut,"w") print(repr(Pattern)) FileIn = "./green.rgb" Bytes = os.path.getsize(FileIn) Wcount = 0 ShingleStream = stream_shingle(Pattern) InFile = open(FileIn,"r+b") for i in range(Bytes): ShingleStream.add(InFile.read(3)) if len(ShingleStream.shingles()) > WordBuffer: print("\r"+"{0:.2f}".format(100*i/Bytes)+"%",end="") Words = ShingleStream.extract_shingles() for Word in Words: #if Wcount %3 == 0: for Byte in Word: OutFile.write(str(int.from_bytes(Byte, byteorder=sys.byteorder))+" ") #/255 OutFile.write("\n") Wcount += WordBuffer if len(ShingleStream.shingles()) > 0: Wcount += len(ShingleStream.shingles()) for Word in Words: for Byte in Word: OutFile.write(str(int.from_bytes(Byte, byteorder=sys.byteorder))+" ") #/255 OutFile.write("\n") print("\r"+" "*80+"\rDone. Green File: "+repr(Wcount)) InFile.close() FileIn = "./white.rgb" Bytes = os.path.getsize(FileIn) Wcount = 0 ShingleStream = stream_shingle(Pattern) InFile = open(FileIn,"r+b") for i in range(Bytes): ShingleStream.add(InFile.read(3)) if len(ShingleStream.shingles()) > WordBuffer: print("\r"+"{0:.2f}".format(100*i/Bytes)+"%",end="") Words = ShingleStream.extract_shingles() for Word in Words: #if Wcount %3 == 0: for Byte in Word: OutFile.write(str(int.from_bytes(Byte, byteorder=sys.byteorder))+" ") #/255 OutFile.write("\n") Wcount += WordBuffer if len(ShingleStream.shingles()) > 0: Wcount += len(ShingleStream.shingles()) for Word in Words: for Byte in Word: OutFile.write(str(int.from_bytes(Byte, byteorder=sys.byteorder))+" ") #/255 OutFile.write("\n") print("\r"+" "*80+"\rDone. White File: "+repr(Wcount)) InFile.close() OutFile.close()