diff --git a/pyth/plugins/rtf15/reader.py b/pyth/plugins/rtf15/reader.py index 9829f14..07d312c 100644 --- a/pyth/plugins/rtf15/reader.py +++ b/pyth/plugins/rtf15/reader.py @@ -152,6 +152,14 @@ def parse(self): elif nextbyte == b'\\': control, digits = self.getControl() self.group.handle(control, digits) + + # '\binN #BDATA' is considered a single character as per spec. + # It also must skip decoding, so it's best to handle it here + if control == b'bin': + self.group.flushChars() + self.group.content.append(self.source.read(int(digits))) + continue + else: self.group.char(nextbyte) # within-group text @@ -237,8 +245,9 @@ def flushRun(self): if self.isImage: self.block.content.append( - document.Image(self.propStack[-1].copy(), - [b"".join(self.run)])) + document.Image({prop: value for prop, value in self.propStack[-1].items() + if prop not in document.Text.validProperties}, + [b"".join(bdata for bdata in self.run if bdata)])) self.isImage = False else: self.block.content.append( @@ -312,6 +321,11 @@ def handle_str(self, bit): self.run.append(bit) + def handle_bytes(self, bit): + # Binary data in Python 3 + self.run.append(bit) + + def handle_Push(self, _): self.propStack.append(self.propStack[-1].copy()) @@ -421,7 +435,7 @@ def handle(self, control, digits): b'picw', b'pich', b'picwgoal', b'pichgoal', b'picscalex', b'picscaley', b'picscaled', b'piccropt', b'piccropb', b'piccropr', b'piccropl', b'picbmp', b'picbpp', b'bin', b'blipupi', b'blipuid', b'bliptag', b'wbitmap']: - self.content.append(ImageMarker(control, digits)) + self.content.append(ImageMarker(control.decode('ascii'), digits)) return handler = getattr(self, 'handle_%s' % control.decode('ascii'), None)