aboutsummaryrefslogtreecommitdiffstats
path: root/env/lib/python3.10/site-packages/pikepdf/codec.py
diff options
context:
space:
mode:
Diffstat (limited to 'env/lib/python3.10/site-packages/pikepdf/codec.py')
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/codec.py170
1 files changed, 0 insertions, 170 deletions
diff --git a/env/lib/python3.10/site-packages/pikepdf/codec.py b/env/lib/python3.10/site-packages/pikepdf/codec.py
deleted file mode 100644
index 4290b91..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/codec.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Implement pdfdoc codec."""
-
-from __future__ import annotations
-
-import codecs
-from typing import Container
-
-from ._qpdf import pdf_doc_to_utf8, utf8_to_pdf_doc
-
-# pylint: disable=redefined-builtin
-
-# See PDF Reference Manual 1.7, Table D.2.
-# The following generates set of all Unicode code points that can be encoded in
-# pdfdoc. Since pdfdoc is 8-bit, the vast majority of code points cannot be.
-
-# Due to a bug, QPDF <= 10.5 and pikepdf < 5 had some inconsistencies around
-# PdfDocEncoding.
-PDFDOC_ENCODABLE = frozenset(
- list(range(0x00, 0x17 + 1))
- + list(range(0x20, 0x7E + 1))
- + [
- 0x2022,
- 0x2020,
- 0x2021,
- 0x2026,
- 0x2014,
- 0x2013,
- 0x0192,
- 0x2044,
- 0x2039,
- 0x203A,
- 0x2212,
- 0x2030,
- 0x201E,
- 0x201C,
- 0x201D,
- 0x2018,
- 0x2019,
- 0x201A,
- 0x2122,
- 0xFB01,
- 0xFB02,
- 0x0141,
- 0x0152,
- 0x0160,
- 0x0178,
- 0x017D,
- 0x0131,
- 0x0142,
- 0x0153,
- 0x0161,
- 0x017E,
- 0x20AC,
- ]
- + [0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC]
- + list(range(0xA1, 0xAC + 1))
- + list(range(0xAE, 0xFF + 1))
-)
-
-
-def _find_first_index(s: str, ordinals: Container[int]) -> int:
- for n, char in enumerate(s):
- if ord(char) not in ordinals:
- return n
- raise ValueError("couldn't find the unencodable character") # pragma: no cover
-
-
-def pdfdoc_encode(input: str, errors: str = 'strict') -> tuple[bytes, int]:
- error_marker = b'?' if errors == 'replace' else b'\xad'
- success, pdfdoc = utf8_to_pdf_doc(input, error_marker)
- if success:
- return pdfdoc, len(input)
-
- if errors == 'ignore':
- pdfdoc = pdfdoc.replace(b'\xad', b'')
- return pdfdoc, len(input)
- if errors == 'replace':
- return pdfdoc, len(input)
- if errors == 'strict':
- if input.startswith('\xfe\xff') or input.startswith('\xff\xfe'):
- raise UnicodeEncodeError(
- 'pdfdoc',
- input,
- 0,
- 2,
- "strings beginning with byte order marks cannot be encoded in pdfdoc",
- )
-
- # libqpdf doesn't return what character caused the error, and Python
- # needs this, so make an educated guess and raise an exception based
- # on that.
- offending_index = _find_first_index(input, PDFDOC_ENCODABLE)
- raise UnicodeEncodeError(
- 'pdfdoc',
- input,
- offending_index,
- offending_index + 1,
- "character cannot be represented in pdfdoc encoding",
- )
- raise LookupError(errors)
-
-
-def pdfdoc_decode(input: bytes, errors: str = 'strict') -> tuple[str, int]:
- if isinstance(input, memoryview):
- input = input.tobytes()
- s = pdf_doc_to_utf8(input)
- if errors == 'strict':
- idx = s.find('\ufffd')
- if idx >= 0:
- raise UnicodeDecodeError(
- 'pdfdoc',
- input,
- idx,
- idx + 1,
- "no Unicode mapping is defined for this character",
- )
-
- return s, len(input)
-
-
-class PdfDocCodec(codecs.Codec):
- """Implements PdfDocEncoding character map used inside PDFs."""
-
- def encode(self, input: str, errors: str = 'strict') -> tuple[bytes, int]:
- return pdfdoc_encode(input, errors)
-
- def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]:
- return pdfdoc_decode(input, errors)
-
-
-class PdfDocStreamWriter(PdfDocCodec, codecs.StreamWriter):
- pass
-
-
-class PdfDocStreamReader(PdfDocCodec, codecs.StreamReader):
- def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]:
- return PdfDocCodec.decode(self, input, errors)
-
-
-class PdfDocIncrementalEncoder(codecs.IncrementalEncoder):
- def encode(self, input: str, final: bool = False) -> bytes:
- return pdfdoc_encode(input, 'strict')[0]
-
-
-class PdfDocIncrementalDecoder(codecs.IncrementalDecoder):
- def decode(self, input: bytes, final: bool = False) -> str:
- return pdfdoc_decode(input, 'strict')[0]
-
-
-def find_pdfdoc(encoding: str) -> codecs.CodecInfo | None:
- if encoding in ('pdfdoc', 'pdfdoc_pikepdf'):
- codec = PdfDocCodec()
- return codecs.CodecInfo(
- name=encoding,
- encode=codec.encode,
- decode=codec.decode,
- streamwriter=PdfDocStreamWriter,
- streamreader=PdfDocStreamReader,
- incrementalencoder=PdfDocIncrementalEncoder,
- incrementaldecoder=PdfDocIncrementalDecoder,
- )
- return None # pragma: no cover
-
-
-codecs.register(find_pdfdoc)
-
-__all__ = ['utf8_to_pdf_doc', 'pdf_doc_to_utf8']