diff options
author | 2022-11-13 23:46:45 +0530 | |
---|---|---|
committer | 2022-11-13 23:46:45 +0530 | |
commit | 9468226a9e2e2ab8cdd599f1d8538e860ca86120 (patch) | |
tree | 0a77ada226d6db80639f96b438bf83e4e756edb5 /env/lib/python3.10/site-packages/pikepdf/codec.py | |
download | idcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.tar.gz idcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.tar.bz2 idcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.zip |
id card generator
Diffstat (limited to 'env/lib/python3.10/site-packages/pikepdf/codec.py')
-rw-r--r-- | env/lib/python3.10/site-packages/pikepdf/codec.py | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/env/lib/python3.10/site-packages/pikepdf/codec.py b/env/lib/python3.10/site-packages/pikepdf/codec.py new file mode 100644 index 0000000..4290b91 --- /dev/null +++ b/env/lib/python3.10/site-packages/pikepdf/codec.py @@ -0,0 +1,170 @@ +# SPDX-FileCopyrightText: 2022 James R. Barlow +# SPDX-License-Identifier: MPL-2.0 + +"""Implement pdfdoc codec.""" + +from __future__ import annotations + +import codecs +from typing import Container + +from ._qpdf import pdf_doc_to_utf8, utf8_to_pdf_doc + +# pylint: disable=redefined-builtin + +# See PDF Reference Manual 1.7, Table D.2. +# The following generates set of all Unicode code points that can be encoded in +# pdfdoc. Since pdfdoc is 8-bit, the vast majority of code points cannot be. + +# Due to a bug, QPDF <= 10.5 and pikepdf < 5 had some inconsistencies around +# PdfDocEncoding. +PDFDOC_ENCODABLE = frozenset( + list(range(0x00, 0x17 + 1)) + + list(range(0x20, 0x7E + 1)) + + [ + 0x2022, + 0x2020, + 0x2021, + 0x2026, + 0x2014, + 0x2013, + 0x0192, + 0x2044, + 0x2039, + 0x203A, + 0x2212, + 0x2030, + 0x201E, + 0x201C, + 0x201D, + 0x2018, + 0x2019, + 0x201A, + 0x2122, + 0xFB01, + 0xFB02, + 0x0141, + 0x0152, + 0x0160, + 0x0178, + 0x017D, + 0x0131, + 0x0142, + 0x0153, + 0x0161, + 0x017E, + 0x20AC, + ] + + [0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC] + + list(range(0xA1, 0xAC + 1)) + + list(range(0xAE, 0xFF + 1)) +) + + +def _find_first_index(s: str, ordinals: Container[int]) -> int: + for n, char in enumerate(s): + if ord(char) not in ordinals: + return n + raise ValueError("couldn't find the unencodable character") # pragma: no cover + + +def pdfdoc_encode(input: str, errors: str = 'strict') -> tuple[bytes, int]: + error_marker = b'?' if errors == 'replace' else b'\xad' + success, pdfdoc = utf8_to_pdf_doc(input, error_marker) + if success: + return pdfdoc, len(input) + + if errors == 'ignore': + pdfdoc = pdfdoc.replace(b'\xad', b'') + return pdfdoc, len(input) + if errors == 'replace': + return pdfdoc, len(input) + if errors == 'strict': + if input.startswith('\xfe\xff') or input.startswith('\xff\xfe'): + raise UnicodeEncodeError( + 'pdfdoc', + input, + 0, + 2, + "strings beginning with byte order marks cannot be encoded in pdfdoc", + ) + + # libqpdf doesn't return what character caused the error, and Python + # needs this, so make an educated guess and raise an exception based + # on that. + offending_index = _find_first_index(input, PDFDOC_ENCODABLE) + raise UnicodeEncodeError( + 'pdfdoc', + input, + offending_index, + offending_index + 1, + "character cannot be represented in pdfdoc encoding", + ) + raise LookupError(errors) + + +def pdfdoc_decode(input: bytes, errors: str = 'strict') -> tuple[str, int]: + if isinstance(input, memoryview): + input = input.tobytes() + s = pdf_doc_to_utf8(input) + if errors == 'strict': + idx = s.find('\ufffd') + if idx >= 0: + raise UnicodeDecodeError( + 'pdfdoc', + input, + idx, + idx + 1, + "no Unicode mapping is defined for this character", + ) + + return s, len(input) + + +class PdfDocCodec(codecs.Codec): + """Implements PdfDocEncoding character map used inside PDFs.""" + + def encode(self, input: str, errors: str = 'strict') -> tuple[bytes, int]: + return pdfdoc_encode(input, errors) + + def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]: + return pdfdoc_decode(input, errors) + + +class PdfDocStreamWriter(PdfDocCodec, codecs.StreamWriter): + pass + + +class PdfDocStreamReader(PdfDocCodec, codecs.StreamReader): + def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]: + return PdfDocCodec.decode(self, input, errors) + + +class PdfDocIncrementalEncoder(codecs.IncrementalEncoder): + def encode(self, input: str, final: bool = False) -> bytes: + return pdfdoc_encode(input, 'strict')[0] + + +class PdfDocIncrementalDecoder(codecs.IncrementalDecoder): + def decode(self, input: bytes, final: bool = False) -> str: + return pdfdoc_decode(input, 'strict')[0] + + +def find_pdfdoc(encoding: str) -> codecs.CodecInfo | None: + if encoding in ('pdfdoc', 'pdfdoc_pikepdf'): + codec = PdfDocCodec() + return codecs.CodecInfo( + name=encoding, + encode=codec.encode, + decode=codec.decode, + streamwriter=PdfDocStreamWriter, + streamreader=PdfDocStreamReader, + incrementalencoder=PdfDocIncrementalEncoder, + incrementaldecoder=PdfDocIncrementalDecoder, + ) + return None # pragma: no cover + + +codecs.register(find_pdfdoc) + +__all__ = ['utf8_to_pdf_doc', 'pdf_doc_to_utf8'] |