aboutsummaryrefslogtreecommitdiffstats
path: root/env/lib/python3.10/site-packages/pikepdf
diff options
context:
space:
mode:
authorLibravatarLibravatar Biswakalyan Bhuyan <biswa@surgot.in> 2022-11-14 16:43:12 +0530
committerLibravatarLibravatar Biswakalyan Bhuyan <biswa@surgot.in> 2022-11-14 16:43:12 +0530
commitd47f8b48935d258f4c5c3e2267911753bebd5214 (patch)
tree3ed04e75bc3fc7c8e4ce618f527565da1df630a1 /env/lib/python3.10/site-packages/pikepdf
parent9468226a9e2e2ab8cdd599f1d8538e860ca86120 (diff)
downloadidcard-d47f8b48935d258f4c5c3e2267911753bebd5214.tar.gz
idcard-d47f8b48935d258f4c5c3e2267911753bebd5214.tar.bz2
idcard-d47f8b48935d258f4c5c3e2267911753bebd5214.zip
id card
Diffstat (limited to 'env/lib/python3.10/site-packages/pikepdf')
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__init__.py100
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/__init__.cpython-310.pycbin1990 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_augments.cpython-310.pycbin4686 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_cpphelpers.cpython-310.pycbin3215 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_exceptions.cpython-310.pycbin472 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_methods.cpython-310.pycbin53055 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_version.cpython-310.pycbin409 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/_xml.cpython-310.pycbin1193 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/codec.cpython-310.pycbin4480 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/jbig2.cpython-310.pycbin3765 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/objects.cpython-310.pycbin10859 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/__pycache__/settings.cpython-310.pycbin396 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_augments.py151
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_cpphelpers.py104
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_exceptions.py8
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_methods.py1340
-rwxr-xr-xenv/lib/python3.10/site-packages/pikepdf/_qpdf.cpython-310-x86_64-linux-gnu.sobin2169224 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_qpdf.pyi762
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_version.py13
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_xml.py28
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/codec.py170
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/jbig2.py108
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__init__.py25
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/__init__.cpython-310.pycbin918 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_content_stream.cpython-310.pycbin4907 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_transcoding.cpython-310.pycbin8037 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/encryption.cpython-310.pycbin5098 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/image.cpython-310.pycbin32029 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/matrix.cpython-310.pycbin5540 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/metadata.cpython-310.pycbin26845 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/__pycache__/outlines.cpython-310.pycbin12353 -> 0 bytes
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/_content_stream.py136
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/_transcoding.py243
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/encryption.py176
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/image.py991
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/matrix.py145
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/metadata.py866
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/models/outlines.py421
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/objects.py300
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/py.typed3
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/settings.py18
41 files changed, 0 insertions, 6108 deletions
diff --git a/env/lib/python3.10/site-packages/pikepdf/__init__.py b/env/lib/python3.10/site-packages/pikepdf/__init__.py
deleted file mode 100644
index bca1e92..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__init__.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""A library for manipulating PDFs.
-
-isort:skip_file
-"""
-
-try:
- from . import _qpdf
-except ImportError as _e: # pragma: no cover
- _msg = "pikepdf's extension library failed to import"
- raise ImportError(_msg) from _e
-
-try:
- from ._version import __version__
-except ImportError as _e: # pragma: no cover
- raise ImportError("Failed to determine version") from _e
-
-from ._qpdf import (
- AccessMode,
- Annotation,
- AttachedFileSpec,
- ContentStreamInlineImage,
- ContentStreamInstruction,
- DataDecodingError,
- ForeignObjectError,
- Job,
- JobUsageError,
- NameTree,
- NumberTree,
- ObjectHelper,
- ObjectStreamMode,
- Page,
- PasswordError,
- Pdf,
- PdfError,
- Rectangle,
- StreamDecodeLevel,
- Token,
- TokenFilter,
- TokenType,
-)
-
-from .objects import (
- Array,
- Dictionary,
- Name,
- Object,
- ObjectType,
- Operator,
- Stream,
- String,
-)
-
-from .models import (
- Encryption,
- Outline,
- OutlineItem,
- OutlineStructureError,
- PageLocation,
- PdfImage,
- PdfInlineImage,
- PdfMatrix,
- Permissions,
- UnsupportedImageTypeError,
- make_page_destination,
- parse_content_stream,
- unparse_content_stream,
-)
-
-from . import settings
-
-# Importing these will monkeypatch classes defined in C++ and register a new
-# pdfdoc codec
-from . import _methods, codec
-
-# While _cpphelpers is intended to be called from our C++ code only, explicitly
-# importing helps introspection tools like PyInstaller figure out that the module
-# is necessary.
-from . import _cpphelpers
-
-__libqpdf_version__ = _qpdf.qpdf_version()
-
-
-# Provide pikepdf.{open, new} -> pikepdf.Pdf.{open, new}
-open = Pdf.open # pylint: disable=redefined-builtin
-new = Pdf.new
-
-# Exclude .open, .new here from to make sure from pikepdf import * does not clobber
-# builtins.open()
-# Exclude codec, objects, jbig2 because we import the interesting bits from them
-# directly to here.
-_exclude_from__all__ = {'open', 'new', 'codec', 'objects', 'jbig2'}
-
-__all__ = [
- k
- for k in locals().keys()
- if not k.startswith('_') and k not in _exclude_from__all__
-]
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/__init__.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index 71dd313..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/__init__.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_augments.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_augments.cpython-310.pyc
deleted file mode 100644
index 4158830..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_augments.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_cpphelpers.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_cpphelpers.cpython-310.pyc
deleted file mode 100644
index b4fa25a..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_cpphelpers.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_exceptions.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_exceptions.cpython-310.pyc
deleted file mode 100644
index e8622ca..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_exceptions.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_methods.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_methods.cpython-310.pyc
deleted file mode 100644
index 9c55237..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_methods.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_version.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_version.cpython-310.pyc
deleted file mode 100644
index fdfc76d..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_version.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_xml.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/_xml.cpython-310.pyc
deleted file mode 100644
index 6999039..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/_xml.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/codec.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/codec.cpython-310.pyc
deleted file mode 100644
index d61f814..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/codec.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/jbig2.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/jbig2.cpython-310.pyc
deleted file mode 100644
index d8356e2..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/jbig2.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/objects.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/objects.cpython-310.pyc
deleted file mode 100644
index 52714f4..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/objects.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/__pycache__/settings.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/__pycache__/settings.cpython-310.pyc
deleted file mode 100644
index 655e576..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/__pycache__/settings.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/_augments.py b/env/lib/python3.10/site-packages/pikepdf/_augments.py
deleted file mode 100644
index 88fc6e5..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_augments.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""A peculiar method of monkeypatching C++ binding classes with Python methods."""
-
-from __future__ import annotations
-
-import inspect
-import platform
-import sys
-from typing import Any, Callable, TypeVar
-
-if sys.version_info >= (3, 8):
- from typing import Protocol
-else:
- from typing_extensions import Protocol # pragma: no cover
-
-
-class AugmentedCallable(Protocol):
- """Protocol for any method, with attached booleans."""
-
- _augment_override_cpp: bool
- _augment_if_no_cpp: bool
-
- def __call__(self, *args, **kwargs) -> Any:
- """Any function.""" # pragma: no cover
-
-
-def augment_override_cpp(fn: AugmentedCallable) -> AugmentedCallable:
- """Replace the C++ implementation, if there is one."""
- fn._augment_override_cpp = True
- return fn
-
-
-def augment_if_no_cpp(fn: AugmentedCallable) -> AugmentedCallable:
- """Provide a Python implementation if no C++ implementation exists."""
- fn._augment_if_no_cpp = True
- return fn
-
-
-def _is_inherited_method(meth: Callable) -> bool:
- # Augmenting a C++ with a method that cls inherits from the Python
- # object is never what we want.
- return meth.__qualname__.startswith('object.')
-
-
-def _is_augmentable(m: Any) -> bool:
- return (
- inspect.isfunction(m) and not _is_inherited_method(m)
- ) or inspect.isdatadescriptor(m)
-
-
-Tcpp = TypeVar('Tcpp')
-T = TypeVar('T')
-
-
-def augments(cls_cpp: type[Tcpp]):
- """Attach methods of a Python support class to an existing class.
-
- This monkeypatches all methods defined in the support class onto an
- existing class. Example:
-
- .. code-block:: python
-
- @augments(ClassDefinedInCpp)
- class SupportClass:
- def foo(self):
- pass
-
- The Python method 'foo' will be monkeypatched on ClassDefinedInCpp. SupportClass
- has no meaning on its own and should not be used, but gets returned from
- this function so IDE code inspection doesn't get too confused.
-
- We don't subclass because it's much more convenient to monkeypatch Python
- methods onto the existing Python binding of the C++ class. For one thing,
- this allows the implementation to be moved from Python to C++ or vice
- versa. It saves having to implement an intermediate Python subclass and then
- ensures that the C++ superclass never 'leaks' to pikepdf users. Finally,
- wrapper classes and subclasses can become problematic if the call stack
- crosses the C++/Python boundary multiple times.
-
- Any existing methods may be used, regardless of whether they are defined
- elsewhere in the support class or in the target class.
-
- For data fields to work, the target class must be
- tagged ``py::dynamic_attr`` in pybind11.
-
- Strictly, the target class does not have to be C++ or derived from pybind11.
- This works on pure Python classes too.
-
- THIS DOES NOT work for class methods.
-
- (Alternative ideas: https://github.com/pybind/pybind11/issues/1074)
- """
- OVERRIDE_WHITELIST = {'__eq__', '__hash__', '__repr__'}
- if platform.python_implementation() == 'PyPy':
- # Either PyPy or pybind11's interface to PyPy automatically adds a __getattr__
- OVERRIDE_WHITELIST |= {'__getattr__'} # pragma: no cover
-
- def class_augment(cls: type[T], cls_cpp: type[Tcpp] = cls_cpp) -> type[T]:
-
- # inspect.getmembers has different behavior on PyPy - in particular it seems
- # that a typical PyPy class like cls will have more methods that it considers
- # methods than CPython does. Our predicate should take care of this.
- for name, member in inspect.getmembers(cls, predicate=_is_augmentable):
- if name == '__weakref__':
- continue
- if (
- hasattr(cls_cpp, name)
- and hasattr(cls, name)
- and name not in getattr(cls, '__abstractmethods__', set())
- and name not in OVERRIDE_WHITELIST
- and not getattr(getattr(cls, name), '_augment_override_cpp', False)
- ):
- if getattr(getattr(cls, name), '_augment_if_no_cpp', False):
- # If tagged as "augment if no C++", we only want the binding to be
- # applied when the primary class does not provide a C++
- # implementation. Usually this would be a function that not is
- # provided by pybind11 in some template.
- continue
-
- # If the original C++ class and Python support class both define the
- # same name, we generally have a conflict, because this is augmentation
- # not inheritance. However, if the method provided by the support class
- # is an abstract method, then we can consider the C++ version the
- # implementation. Also, pybind11 provides defaults for __eq__,
- # __hash__ and __repr__ that we often do want to override directly.
-
- raise RuntimeError(
- f"C++ {cls_cpp} and Python {cls} both define the same "
- f"non-abstract method {name}: "
- f"{getattr(cls_cpp, name, '')!r}, "
- f"{getattr(cls, name, '')!r}"
- )
- if inspect.isfunction(member):
- setattr(cls_cpp, name, member)
- installed_member = getattr(cls_cpp, name)
- installed_member.__qualname__ = member.__qualname__.replace(
- cls.__name__, cls_cpp.__name__
- )
- elif inspect.isdatadescriptor(member):
- setattr(cls_cpp, name, member)
-
- def disable_init(self):
- # Prevent initialization of the support class
- raise NotImplementedError(self.__class__.__name__ + '.__init__')
-
- cls.__init__ = disable_init # type: ignore
- return cls
-
- return class_augment
diff --git a/env/lib/python3.10/site-packages/pikepdf/_cpphelpers.py b/env/lib/python3.10/site-packages/pikepdf/_cpphelpers.py
deleted file mode 100644
index 4dff072..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_cpphelpers.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Support functions called by the C++ library binding layer.
-
-Not intended to be called from Python, and subject to change at any time.
-"""
-
-from __future__ import annotations
-
-from typing import Callable
-from warnings import warn
-
-from pikepdf import Dictionary, Name, Pdf
-
-
-def update_xmp_pdfversion(pdf: Pdf, version: str) -> None:
- """Update XMP metadata to specified PDF version."""
- if Name.Metadata not in pdf.Root:
- return # Don't create an empty XMP object just to store the version
-
- with pdf.open_metadata(set_pikepdf_as_editor=False, update_docinfo=False) as meta:
- if 'pdf:PDFVersion' in meta:
- meta['pdf:PDFVersion'] = version
-
-
-def _alpha(n: int) -> str:
- """Excel-style column numbering A..Z, AA..AZ..BA..ZZ.., AAA."""
- if n < 1:
- raise ValueError(f"Can't represent {n} in alphabetic numbering")
- p = []
- while n > 0:
- n, r = divmod(n - 1, 26)
- p.append(r)
- base = ord('A')
- ords = [(base + v) for v in reversed(p)]
- return ''.join(chr(o) for o in ords)
-
-
-def _roman(n: int) -> str:
- """Convert integer n to Roman numeral representation as a string."""
- if not (1 <= n <= 5000):
- raise ValueError(f"Can't represent {n} in Roman numerals")
- roman_numerals = (
- (1000, 'M'),
- (900, 'CM'),
- (500, 'D'),
- (400, 'CD'),
- (100, 'C'),
- (90, 'XC'),
- (50, 'L'),
- (40, 'XL'),
- (10, 'X'),
- (9, 'IX'),
- (5, 'V'),
- (4, 'IV'),
- (1, 'I'),
- )
- roman = ""
- for value, numeral in roman_numerals:
- while n >= value:
- roman += numeral
- n -= value
- return roman
-
-
-LABEL_STYLE_MAP: dict[Name, Callable[[int], str]] = {
- Name.D: str,
- Name.A: _alpha,
- Name.a: lambda x: _alpha(x).lower(),
- Name.R: _roman,
- Name.r: lambda x: _roman(x).lower(),
-}
-
-
-def label_from_label_dict(label_dict: int | Dictionary) -> str:
- """Convert a label dictionary returned by QPDF into a text string."""
- if isinstance(label_dict, int):
- return str(label_dict)
-
- label = ''
- if Name.P in label_dict:
- prefix = label_dict[Name.P]
- label += str(prefix)
-
- # If there is no S, return only the P portion
- if Name.S in label_dict:
- # St defaults to 1
- numeric_value = label_dict[Name.St] if Name.St in label_dict else 1
- if not isinstance(numeric_value, int):
- warn(
- "Page label dictionary has invalid non-integer start value", UserWarning
- )
- numeric_value = 1
-
- style = label_dict[Name.S]
- if isinstance(style, Name):
- style_fn = LABEL_STYLE_MAP[style]
- value = style_fn(numeric_value)
- label += value
- else:
- warn("Page label dictionary has invalid page label style", UserWarning)
-
- return label
diff --git a/env/lib/python3.10/site-packages/pikepdf/_exceptions.py b/env/lib/python3.10/site-packages/pikepdf/_exceptions.py
deleted file mode 100644
index 8f2412f..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_exceptions.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-from __future__ import annotations
-
-
-class DependencyError(Exception):
- """A third party dependency is needed to extract streams of this type."""
diff --git a/env/lib/python3.10/site-packages/pikepdf/_methods.py b/env/lib/python3.10/site-packages/pikepdf/_methods.py
deleted file mode 100644
index 25e1d95..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_methods.py
+++ /dev/null
@@ -1,1340 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Implement some features in Python and monkey-patch them onto C++ classes.
-
-In several cases the implementation of some higher levels features might as
-well be in Python. Fortunately we can attach Python methods to C++ class
-bindings after the fact.
-
-We can also move the implementation to C++ if desired.
-"""
-
-from __future__ import annotations
-
-import datetime
-import mimetypes
-import shutil
-from collections.abc import KeysView, MutableMapping
-from decimal import Decimal
-from io import BytesIO
-from pathlib import Path
-from subprocess import run
-from tempfile import NamedTemporaryFile
-from typing import BinaryIO, Callable, ItemsView, Iterator, TypeVar, ValuesView
-from warnings import warn
-
-from . import Array, Dictionary, Name, Object, Page, Pdf, Stream
-from ._augments import augment_override_cpp, augments
-from ._qpdf import (
- AccessMode,
- AttachedFile,
- AttachedFileSpec,
- Attachments,
- NameTree,
- NumberTree,
- ObjectStreamMode,
- Rectangle,
- StreamDecodeLevel,
- StreamParser,
- Token,
- _ObjectMapping,
-)
-from .models import Encryption, EncryptionInfo, Outline, PdfMetadata, Permissions
-from .models.metadata import decode_pdf_date, encode_pdf_date
-
-# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
-# mypy: ignore-errors
-
-__all__ = []
-
-Numeric = TypeVar('Numeric', int, float, Decimal)
-
-
-def _single_page_pdf(page) -> bytes:
- """Construct a single page PDF from the provided page in memory."""
- pdf = Pdf.new()
- pdf.pages.append(page)
- bio = BytesIO()
- pdf.save(bio)
- bio.seek(0)
- return bio.read()
-
-
-def _mudraw(buffer, fmt) -> bytes:
- """Use mupdf draw to rasterize the PDF in the memory buffer."""
- # mudraw cannot read from stdin so NamedTemporaryFile is required
- with NamedTemporaryFile(suffix='.pdf') as tmp_in:
- tmp_in.write(buffer)
- tmp_in.seek(0)
- tmp_in.flush()
-
- proc = run(
- ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
- capture_output=True,
- check=True,
- )
- return proc.stdout
-
-
-@augments(Object)
-class Extend_Object:
- def _ipython_key_completions_(self):
- if isinstance(self, (Dictionary, Stream)):
- return self.keys()
- return None
-
- def emplace(self, other: Object, retain=(Name.Parent,)):
- """Copy all items from other without making a new object.
-
- Particularly when working with pages, it may be desirable to remove all
- of the existing page's contents and emplace (insert) a new page on top
- of it, in a way that preserves all links and references to the original
- page. (Or similarly, for other Dictionary objects in a PDF.)
-
- Any Dictionary keys in the iterable *retain* are preserved. By default,
- /Parent is retained.
-
- When a page is assigned (``pdf.pages[0] = new_page``), only the
- application knows if references to the original the original page are
- still valid. For example, a PDF optimizer might restructure a page
- object into another visually similar one, and references would be valid;
- but for a program that reorganizes page contents such as a N-up
- compositor, references may not be valid anymore.
-
- This method takes precautions to ensure that child objects in common
- with ``self`` and ``other`` are not inadvertently deleted.
-
- Example:
- >>> pdf.pages[0].objgen
- (16, 0)
- >>> pdf.pages[0].emplace(pdf.pages[1])
- >>> pdf.pages[0].objgen
- (16, 0) # Same object
-
- .. versionchanged:: 2.11.1
- Added the *retain* argument.
- """
- if not self.same_owner_as(other):
- raise TypeError("Objects must have the same owner for emplace()")
-
- # .keys() returns strings, so make all strings
- retain = {str(k) for k in retain}
- self_keys = set(self.keys())
- other_keys = set(other.keys())
-
- assert all(isinstance(k, str) for k in (retain | self_keys | other_keys))
-
- del_keys = self_keys - other_keys - retain
- for k in (k for k in other_keys if k not in retain):
- self[k] = other[k] # pylint: disable=unsupported-assignment-operation
- for k in del_keys:
- del self[k] # pylint: disable=unsupported-delete-operation
-
- def _type_check_write(self, filter_, decode_parms):
- if isinstance(filter_, list):
- filter_ = Array(filter_)
- filter_ = filter_.wrap_in_array()
-
- if isinstance(decode_parms, list):
- decode_parms = Array(decode_parms)
- elif decode_parms is None:
- decode_parms = Array([])
- else:
- decode_parms = decode_parms.wrap_in_array()
-
- if not all(isinstance(item, Name) for item in filter_):
- raise TypeError(
- "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
- )
- if not all(
- (isinstance(item, Dictionary) or item is None) for item in decode_parms
- ):
- raise TypeError(
- "decode_parms must be: pikepdf.Dictionary or "
- "pikepdf.Array([pikepdf.Dictionary])"
- )
- if len(decode_parms) != 0 and len(filter_) != len(decode_parms):
- raise ValueError(
- f"filter ({repr(filter_)}) and decode_parms "
- f"({repr(decode_parms)}) must be arrays of same length"
- )
- if len(filter_) == 1:
- filter_ = filter_[0]
- if len(decode_parms) == 0:
- decode_parms = None
- elif len(decode_parms) == 1:
- decode_parms = decode_parms[0]
- return filter_, decode_parms
-
- def write(
- self,
- data: bytes,
- *,
- filter: Name | Array | None = None,
- decode_parms: Dictionary | Array | None = None,
- type_check: bool = True,
- ): # pylint: disable=redefined-builtin
- """
- Replace stream object's data with new (possibly compressed) `data`.
-
- `filter` and `decode_parms` describe any compression that is already
- present on the input `data`. For example, if your data is already
- compressed with the Deflate algorithm, you would set
- ``filter=Name.FlateDecode``.
-
- When writing the PDF in :meth:`pikepdf.Pdf.save`,
- pikepdf may change the compression or apply compression to data that was
- not compressed, depending on the parameters given to that function. It
- will never change lossless to lossy encoding.
-
- PNG and TIFF images, even if compressed, cannot be directly inserted
- into a PDF and displayed as images.
-
- Args:
- data: the new data to use for replacement
- filter: The filter(s) with which the
- data is (already) encoded
- decode_parms: Parameters for the
- filters with which the object is encode
- type_check: Check arguments; use False only if you want to
- intentionally create malformed PDFs.
-
- If only one `filter` is specified, it may be a name such as
- `Name('/FlateDecode')`. If there are multiple filters, then array
- of names should be given.
-
- If there is only one filter, `decode_parms` is a Dictionary of
- parameters for that filter. If there are multiple filters, then
- `decode_parms` is an Array of Dictionary, where each array index
- is corresponds to the filter.
- """
- if type_check and filter is not None:
- filter, decode_parms = self._type_check_write(filter, decode_parms)
-
- self._write(data, filter=filter, decode_parms=decode_parms)
-
-
-@augments(Pdf)
-class Extend_Pdf:
- def _repr_mimebundle_(
- self, include=None, exclude=None
- ): # pylint: disable=unused-argument
- """
- Present options to IPython or Jupyter for rich display of this object.
-
- See https://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display
- """
- bio = BytesIO()
- self.save(bio)
- bio.seek(0)
-
- data = {'application/pdf': bio.read()}
- return data
-
- @property
- def docinfo(self) -> Dictionary:
- """
- Access the (deprecated) document information dictionary.
-
- The document information dictionary is a brief metadata record that can
- store some information about the origin of a PDF. It is deprecated and
- removed in the PDF 2.0 specification (not deprecated from the
- perspective of pikepdf). Use the ``.open_metadata()`` API instead, which
- will edit the modern (and unfortunately, more complicated) XMP metadata
- object and synchronize changes to the document information dictionary.
-
- This property simplifies access to the actual document information
- dictionary and ensures that it is created correctly if it needs to be
- created.
-
- A new, empty dictionary will be created if this property is accessed
- and dictionary does not exist. (This is to ensure that convenient code
- like ``pdf.docinfo[Name.Title] = "Title"`` will work when the dictionary
- does not exist at all.)
-
- You can delete the document information dictionary by deleting this property,
- ``del pdf.docinfo``. Note that accessing the property after deleting it
- will re-create with a new, empty dictionary.
-
- .. versionchanged: 2.4
- Added support for ``del pdf.docinfo``.
- """
- if Name.Info not in self.trailer:
- self.trailer.Info = self.make_indirect(Dictionary())
- return self.trailer.Info
-
- @docinfo.setter
- def docinfo(self, new_docinfo: Dictionary):
- if not new_docinfo.is_indirect:
- raise ValueError(
- "docinfo must be an indirect object - use Pdf.make_indirect"
- )
- self.trailer.Info = new_docinfo
-
- @docinfo.deleter
- def docinfo(self):
- if Name.Info in self.trailer:
- del self.trailer.Info
-
- def open_metadata(
- self,
- set_pikepdf_as_editor: bool = True,
- update_docinfo: bool = True,
- strict: bool = False,
- ) -> PdfMetadata:
- """
- Open the PDF's XMP metadata for editing.
-
- There is no ``.close()`` function on the metadata object, since this is
- intended to be used inside a ``with`` block only.
-
- For historical reasons, certain parts of PDF metadata are stored in
- two different locations and formats. This feature coordinates edits so
- that both types of metadata are updated consistently and "atomically"
- (assuming single threaded access). It operates on the ``Pdf`` in memory,
- not any file on disk. To persist metadata changes, you must still use
- ``Pdf.save()``.
-
- Example:
- >>> with pdf.open_metadata() as meta:
- meta['dc:title'] = 'Set the Dublic Core Title'
- meta['dc:description'] = 'Put the Abstract here'
-
- Args:
- set_pikepdf_as_editor: Automatically update the metadata ``pdf:Producer``
- to show that this version of pikepdf is the most recent software to
- modify the metadata, and ``xmp:MetadataDate`` to timestamp the update.
- Recommended, except for testing.
-
- update_docinfo: Update the standard fields of DocumentInfo
- (the old PDF metadata dictionary) to match the corresponding
- XMP fields. The mapping is described in
- :attr:`PdfMetadata.DOCINFO_MAPPING`. Nonstandard DocumentInfo
- fields and XMP metadata fields with no DocumentInfo equivalent
- are ignored.
-
- strict: If ``False`` (the default), we aggressively attempt
- to recover from any parse errors in XMP, and if that fails we
- overwrite the XMP with an empty XMP record. If ``True``, raise
- errors when either metadata bytes are not valid and well-formed
- XMP (and thus, XML). Some trivial cases that are equivalent to
- empty or incomplete "XMP skeletons" are never treated as errors,
- and always replaced with a proper empty XMP block. Certain
- errors may be logged.
- """
- return PdfMetadata(
- self,
- pikepdf_mark=set_pikepdf_as_editor,
- sync_docinfo=update_docinfo,
- overwrite_invalid_xml=not strict,
- )
-
- def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline:
- """
- Open the PDF outline ("bookmarks") for editing.
-
- Recommend for use in a ``with`` block. Changes are committed to the
- PDF when the block exits. (The ``Pdf`` must still be opened.)
-
- Example:
- >>> with pdf.open_outline() as outline:
- outline.root.insert(0, OutlineItem('Intro', 0))
-
- Args:
- max_depth: Maximum recursion depth of the outline to be
- imported and re-written to the document. ``0`` means only
- considering the root level, ``1`` the first-level
- sub-outline of each root element, and so on. Items beyond
- this depth will be silently ignored. Default is ``15``.
- strict: With the default behavior (set to ``False``),
- structural errors (e.g. reference loops) in the PDF document
- will only cancel processing further nodes on that particular
- level, recovering the valid parts of the document outline
- without raising an exception. When set to ``True``, any such
- error will raise an ``OutlineStructureError``, leaving the
- invalid parts in place.
- Similarly, outline objects that have been accidentally
- duplicated in the ``Outline`` container will be silently
- fixed (i.e. reproduced as new objects) or raise an
- ``OutlineStructureError``.
- """
- return Outline(self, max_depth=max_depth, strict=strict)
-
- def make_stream(self, data: bytes, d=None, **kwargs) -> Stream:
- """
- Create a new pikepdf.Stream object that is attached to this PDF.
-
- See:
- :meth:`pikepdf.Stream.__new__`
-
- """
- return Stream(self, data, d, **kwargs)
-
- def add_blank_page(
- self, *, page_size: tuple[Numeric, Numeric] = (612.0, 792.0)
- ) -> Page:
- """
- Add a blank page to this PDF.
-
- If pages already exist, the page will be added to the end. Pages may be
- reordered using ``Pdf.pages``.
-
- The caller may add content to the page by modifying its objects after creating
- it.
-
- Args:
- page_size (tuple): The size of the page in PDF units (1/72 inch or 0.35mm).
- Default size is set to a US Letter 8.5" x 11" page.
- """
- for dim in page_size:
- if not (3 <= dim <= 14400):
- raise ValueError('Page size must be between 3 and 14400 PDF units')
-
- page_dict = Dictionary(
- Type=Name.Page,
- MediaBox=Array([0, 0, page_size[0], page_size[1]]),
- Contents=self.make_stream(b''),
- Resources=Dictionary(),
- )
- page_obj = self.make_indirect(page_dict)
- self._add_page(page_obj, first=False)
- return Page(page_obj)
-
- def close(self) -> None:
- """
- Close a ``Pdf`` object and release resources acquired by pikepdf.
-
- If pikepdf opened the file handle it will close it (e.g. when opened with a file
- path). If the caller opened the file for pikepdf, the caller close the file.
- ``with`` blocks will call close when exit.
-
- pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
- implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
- case for :class:`pikepdf.Stream` but can be true for any object. Do not close
- the `Pdf` object if you might still be accessing content from it.
-
- When an ``Object`` is copied from one ``Pdf`` to another, the ``Object`` is copied into
- the destination ``Pdf`` immediately, so after accessing all desired information
- from the source ``Pdf`` it may be closed.
-
- .. versionchanged:: 3.0
- In pikepdf 2.x, this function actually worked by resetting to a very short
- empty PDF. Code that relied on this quirk may not function correctly.
- """
- self._close()
- if getattr(self, '_tmp_stream', None):
- self._tmp_stream.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
-
- @property
- def allow(self) -> Permissions:
- """
- Report permissions associated with this PDF.
-
- By default these permissions will be replicated when the PDF is
- saved. Permissions may also only be changed when a PDF is being saved,
- and are only available for encrypted PDFs. If a PDF is not encrypted,
- all operations are reported as allowed.
-
- pikepdf has no way of enforcing permissions.
- """
- results = {}
- for field in Permissions._fields:
- results[field] = getattr(self, '_allow_' + field)
- return Permissions(**results)
-
- @property
- def encryption(self) -> EncryptionInfo:
- """
- Report encryption information for this PDF.
-
- Encryption settings may only be changed when a PDF is saved.
- """
- return EncryptionInfo(self._encryption_data)
-
- def check(self) -> list[str]:
- """
- Check if PDF is well-formed.
-
- Similar to ``qpdf --check``.
- """
-
- class DiscardingParser(StreamParser):
- def __init__(self): # pylint: disable=useless-super-delegation
- super().__init__() # required for C++
-
- def handle_object(self, *_args):
- pass
-
- def handle_eof(self):
- pass
-
- problems: list[str] = []
-
- self._decode_all_streams_and_discard()
-
- discarding_parser = DiscardingParser()
- for page in self.pages:
- page.parse_contents(discarding_parser)
-
- for warning in self.get_warnings():
- problems.append("WARNING: " + warning)
-
- return problems
-
- def save(
- self,
- filename_or_stream: Path | str | BinaryIO | None = None,
- *,
- static_id: bool = False,
- preserve_pdfa: bool = True,
- min_version: str | tuple[str, int] = "",
- force_version: str | tuple[str, int] = "",
- fix_metadata_version: bool = True,
- compress_streams: bool = True,
- stream_decode_level: StreamDecodeLevel | None = None,
- object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
- normalize_content: bool = False,
- linearize: bool = False,
- qdf: bool = False,
- progress: Callable[[int], None] = None,
- encryption: Encryption | bool | None = None,
- recompress_flate: bool = False,
- deterministic_id: bool = False,
- ) -> None:
- """
- Save all modifications to this :class:`pikepdf.Pdf`.
-
- Args:
- filename_or_stream: Where to write the output. If a file
- exists in this location it will be overwritten.
- If the file was opened with ``allow_overwriting_input=True``,
- then it is permitted to overwrite the original file, and
- this parameter may be omitted to implicitly use the original
- filename. Otherwise, the filename may not be the same as the
- input file, as overwriting the input file would corrupt data
- since pikepdf using lazy loading.
-
- static_id: Indicates that the ``/ID`` metadata, normally
- calculated as a hash of certain PDF contents and metadata
- including the current time, should instead be set to a static
- value. Only use this for debugging and testing. Use
- ``deterministic_id`` if you want to get the same ``/ID`` for
- the same document contents.
- preserve_pdfa: Ensures that the file is generated in a
- manner compliant with PDF/A and other stricter variants.
- This should be True, the default, in most cases.
-
- min_version: Sets the minimum version of PDF
- specification that should be required. If left alone QPDF
- will decide. If a tuple, the second element is an integer, the
- extension level. If the version number is not a valid format,
- QPDF will decide what to do.
- force_version: Override the version recommend by QPDF,
- potentially creating an invalid file that does not display
- in old versions. See QPDF manual for details. If a tuple, the
- second element is an integer, the extension level.
- fix_metadata_version: If ``True`` (default) and the XMP metadata
- contains the optional PDF version field, ensure the version in
- metadata is correct. If the XMP metadata does not contain a PDF
- version field, none will be added. To ensure that the field is
- added, edit the metadata and insert a placeholder value in
- ``pdf:PDFVersion``. If XMP metadata does not exist, it will
- not be created regardless of the value of this argument.
-
- object_stream_mode:
- ``disable`` prevents the use of object streams.
- ``preserve`` keeps object streams from the input file.
- ``generate`` uses object streams wherever possible,
- creating the smallest files but requiring PDF 1.5+.
-
- compress_streams: Enables or disables the compression of
- stream objects in the PDF that are created without specifying
- any compression setting. Metadata is never compressed.
- By default this is set to ``True``, and should be except
- for debugging. Existing streams in the PDF or streams will not
- be modified. To decompress existing streams, you must set
- both ``compress_streams=False`` and ``stream_decode_level``
- to the desired decode level (e.g. ``.generalized`` will
- decompress most non-image content).
-
- stream_decode_level: Specifies how
- to encode stream objects. See documentation for
- :class:`pikepdf.StreamDecodeLevel`.
-
- recompress_flate: When disabled (the default), qpdf does not
- uncompress and recompress streams compressed with the Flate
- compression algorithm. If True, pikepdf will instruct qpdf to
- do this, which may be useful if recompressing streams to a
- higher compression level.
-
- normalize_content: Enables parsing and reformatting the
- content stream within PDFs. This may debugging PDFs easier.
-
- linearize: Enables creating linear or "fast web view",
- where the file's contents are organized sequentially so that
- a viewer can begin rendering before it has the whole file.
- As a drawback, it tends to make files larger.
-
- qdf: Save output QDF mode. QDF mode is a special output
- mode in QPDF to allow editing of PDFs in a text editor. Use
- the program ``fix-qdf`` to fix convert back to a standard
- PDF.
-
- progress: Specify a callback function that is called
- as the PDF is written. The function will be called with an
- integer between 0-100 as the sole parameter, the progress
- percentage. This function may not access or modify the PDF
- while it is being written, or data corruption will almost
- certainly occur.
-
- encryption: If ``False``
- or omitted, existing encryption will be removed. If ``True``
- encryption settings are copied from the originating PDF.
- Alternately, an ``Encryption`` object may be provided that
- sets the parameters for new encryption.
-
- deterministic_id: Indicates that the ``/ID`` metadata, normally
- calculated as a hash of certain PDF contents and metadata
- including the current time, should instead be computed using
- only deterministic data like the file contents. At a small
- runtime cost, this enables generation of the same ``/ID`` if
- the same inputs are converted in the same way multiple times.
- Does not work for encrypted files.
-
- Raises:
- PdfError
- ForeignObjectError
- ValueError
-
- You may call ``.save()`` multiple times with different parameters
- to generate different versions of a file, and you *may* continue
- to modify the file after saving it. ``.save()`` does not modify
- the ``Pdf`` object in memory, except possibly by updating the XMP
- metadata version with ``fix_metadata_version``.
-
- .. note::
-
- :meth:`pikepdf.Pdf.remove_unreferenced_resources` before saving
- may eliminate unnecessary resources from the output file if there
- are any objects (such as images) that are referenced in a page's
- Resources dictionary but never called in the page's content stream.
-
- .. note::
-
- pikepdf can read PDFs with incremental updates, but always
- coalesces any incremental updates into a single non-incremental
- PDF file when saving.
-
- .. versionchanged:: 2.7
- Added *recompress_flate*.
-
- .. versionchanged:: 3.0
- Keyword arguments now mandatory for everything except the first
- argument.
- """
- if not filename_or_stream and getattr(self, '_original_filename', None):
- filename_or_stream = self._original_filename
- if not filename_or_stream:
- raise ValueError(
- "Cannot save to original filename because the original file was "
- "not opening using Pdf.open(..., allow_overwriting_input=True). "
- "Either specify a new destination filename/file stream or open "
- "with allow_overwriting_input=True. If this Pdf was created using "
- "Pdf.new(), you must specify a destination object since there is "
- "no original filename to save to."
- )
- self._save(
- filename_or_stream,
- static_id=static_id,
- preserve_pdfa=preserve_pdfa,
- min_version=min_version,
- force_version=force_version,
- fix_metadata_version=fix_metadata_version,
- compress_streams=compress_streams,
- stream_decode_level=stream_decode_level,
- object_stream_mode=object_stream_mode,
- normalize_content=normalize_content,
- linearize=linearize,
- qdf=qdf,
- progress=progress,
- encryption=encryption,
- samefile_check=getattr(self, '_tmp_stream', None) is None,
- recompress_flate=recompress_flate,
- deterministic_id=deterministic_id,
- )
-
- @staticmethod
- def open(
- filename_or_stream: Path | str | BinaryIO,
- *,
- password: str | bytes = "",
- hex_password: bool = False,
- ignore_xref_streams: bool = False,
- suppress_warnings: bool = True,
- attempt_recovery: bool = True,
- inherit_page_attributes: bool = True,
- access_mode: AccessMode = AccessMode.default,
- allow_overwriting_input: bool = False,
- ) -> Pdf:
- """
- Open an existing file at *filename_or_stream*.
-
- If *filename_or_stream* is path-like, the file will be opened for reading.
- The file should not be modified by another process while it is open in
- pikepdf, or undefined behavior may occur. This is because the file may be
- lazily loaded. Despite this restriction, pikepdf does not try to use any OS
- services to obtain an exclusive lock on the file. Some applications may
- want to attempt this or copy the file to a temporary location before
- editing. This behaviour changes if *allow_overwriting_input* is set: the whole
- file is then read and copied to memory, so that pikepdf can overwrite it
- when calling ``.save()``.
-
- When this function is called with a stream-like object, you must ensure
- that the data it returns cannot be modified, or undefined behavior will
- occur.
-
- Any changes to the file must be persisted by using ``.save()``.
-
- If *filename_or_stream* has ``.read()`` and ``.seek()`` methods, the file
- will be accessed as a readable binary stream. pikepdf will read the
- entire stream into a private buffer.
-
- ``.open()`` may be used in a ``with``-block; ``.close()`` will be called when
- the block exits, if applicable.
-
- Whenever pikepdf opens a file, it will close it. If you open the file
- for pikepdf or give it a stream-like object to read from, you must
- release that object when appropriate.
-
- Examples:
- >>> with Pdf.open("test.pdf") as pdf:
- ...
-
- >>> pdf = Pdf.open("test.pdf", password="rosebud")
-
- Args:
- filename_or_stream: Filename or Python readable and seekable file
- stream of PDF to open.
- password: User or owner password to open an
- encrypted PDF. If the type of this parameter is ``str``
- it will be encoded as UTF-8. If the type is ``bytes`` it will
- be saved verbatim. Passwords are always padded or
- truncated to 32 bytes internally. Use ASCII passwords for
- maximum compatibility.
- hex_password: If True, interpret the password as a
- hex-encoded version of the exact encryption key to use, without
- performing the normal key computation. Useful in forensics.
- ignore_xref_streams: If True, ignore cross-reference
- streams. See qpdf documentation.
- suppress_warnings: If True (default), warnings are not
- printed to stderr. Use :meth:`pikepdf.Pdf.get_warnings()` to
- retrieve warnings.
- attempt_recovery: If True (default), attempt to recover
- from PDF parsing errors.
- inherit_page_attributes: If True (default), push attributes
- set on a group of pages to individual pages
- access_mode: If ``.default``, pikepdf will
- decide how to access the file. Currently, it will always
- selected stream access. To attempt memory mapping and fallback
- to stream if memory mapping failed, use ``.mmap``. Use
- ``.mmap_only`` to require memory mapping or fail
- (this is expected to only be useful for testing). Applications
- should be prepared to handle the SIGBUS signal on POSIX in
- the event that the file is successfully mapped but later goes
- away.
- allow_overwriting_input: If True, allows calling ``.save()``
- to overwrite the input file. This is performed by loading the
- entire input file into memory at open time; this will use more
- memory and may recent performance especially when the opened
- file will not be modified.
-
- Raises:
- pikepdf.PasswordError: If the password failed to open the
- file.
- pikepdf.PdfError: If for other reasons we could not open
- the file.
- TypeError: If the type of ``filename_or_stream`` is not
- usable.
- FileNotFoundError: If the file was not found.
-
- Note:
- When *filename_or_stream* is a stream and the stream is located on a
- network, pikepdf assumes that the stream using buffering and read caches
- to achieve reasonable performance. Streams that fetch data over a network
- in response to every read or seek request, no matter how small, will
- perform poorly. It may be easier to download a PDF from network to
- temporary local storage (such as ``io.BytesIO``), manipulate it, and
- then re-upload it.
-
- .. versionchanged:: 3.0
- Keyword arguments now mandatory for everything except the first
- argument.
- """
- if isinstance(filename_or_stream, bytes) and filename_or_stream.startswith(
- b'%PDF-'
- ):
- warn(
- "It looks like you called with Pdf.open(data) with a bytes-like object "
- "containing a PDF. This will probably fail because this function "
- "expects a filename or opened file-like object. Instead, please use "
- "Pdf.open(BytesIO(data))."
- )
-
- tmp_stream, original_filename = None, False
- if allow_overwriting_input:
- try:
- Path(filename_or_stream)
- except TypeError as error:
- raise ValueError(
- '"allow_overwriting_input=True" requires "open" first argument '
- 'to be a file path'
- ) from error
- original_filename = Path(filename_or_stream)
- with open(original_filename, 'rb') as pdf_file:
- tmp_stream = BytesIO()
- shutil.copyfileobj(pdf_file, tmp_stream)
- pdf = Pdf._open(
- tmp_stream or filename_or_stream,
- password=password,
- hex_password=hex_password,
- ignore_xref_streams=ignore_xref_streams,
- suppress_warnings=suppress_warnings,
- attempt_recovery=attempt_recovery,
- inherit_page_attributes=inherit_page_attributes,
- access_mode=access_mode,
- )
- pdf._tmp_stream = tmp_stream
- pdf._original_filename = original_filename
- return pdf
-
-
-@augments(_ObjectMapping)
-class Extend_ObjectMapping:
- def get(self, key, default=None) -> Object:
- try:
- return self[key]
- except KeyError:
- return default
-
-
-def check_is_box(obj) -> None:
- try:
- if obj.is_rectangle:
- return
- except AttributeError:
- pass
-
- try:
- pdfobj = Array(obj)
- if pdfobj.is_rectangle:
- return
- except Exception as e:
- raise ValueError("object is not a rectangle") from e
-
- raise ValueError("object is not a rectangle")
-
-
-@augments(Page)
-class Extend_Page:
- @property
- def mediabox(self):
- """Return page's /MediaBox, in PDF units."""
- return self._get_mediabox(True)
-
- @mediabox.setter
- def mediabox(self, value):
- check_is_box(value)
- self.obj['/MediaBox'] = value
-
- @property
- def cropbox(self):
- """Return page's effective /CropBox, in PDF units.
-
- If the /CropBox is not defined, the /MediaBox is returned.
- """
- return self._get_cropbox(True, False)
-
- @cropbox.setter
- def cropbox(self, value):
- check_is_box(value)
- self.obj['/CropBox'] = value
-
- @property
- def trimbox(self):
- """Return page's effective /TrimBox, in PDF units.
-
- If the /TrimBox is not defined, the /CropBox is returned (and if
- /CropBox is not defined, /MediaBox is returned).
- """
- return self._get_trimbox(True, False)
-
- @trimbox.setter
- def trimbox(self, value):
- check_is_box(value)
- self.obj['/TrimBox'] = value
-
- @property
- def images(self) -> _ObjectMapping:
- """Return all regular images associated with this page.
-
- This method does not recurse into Form XObjects and does not
- attempt to find inline images.
- """
- return self._images
-
- @property
- def resources(self) -> Dictionary:
- """Return this page's resources dictionary."""
- return self.obj['/Resources']
-
- def add_resource(
- self,
- res: Object,
- res_type: Name,
- name: Name | None = None,
- *,
- prefix: str = '',
- replace_existing: bool = True,
- ) -> Name:
- """Add a new resource to the page's Resources dictionary.
-
- If the Resources dictionaries do not exist, they will be created.
-
- Args:
- self: The object to add to the resources dictionary.
- res: The dictionary object to insert into the resources
- dictionary.
- res_type: Should be one of the following Resource dictionary types:
- ExtGState, ColorSpace, Pattern, Shading, XObject, Font, Properties.
- name: The name of the object. If omitted, a random name will be
- generated with enough randomness to be globally unique.
- prefix: A prefix for the name of the object. Allows conveniently
- namespacing when using random names, e.g. prefix="Im" for images.
- Mutually exclusive with name parameter.
- replace_existing: If the name already exists in one of the resource
- dictionaries, remove it.
-
- Example:
- >>> resource_name = pdf.pages[0].add_resource(formxobj, Name.XObject)
-
- .. versionadded:: 2.3
-
- .. versionchanged:: 2.14
- If *res* does not belong to the same `Pdf` that owns this page,
- a copy of *res* is automatically created and added instead. In previous
- versions, it was necessary to change for this case manually.
-
- .. versionchanged:: 4.3.0
- Returns the name of the overlay in the resources dictionary instead
- of returning None.
- """
- if Name.Resources not in self.obj:
- self.obj.Resources = Dictionary()
- elif not isinstance(self.obj.Resources, Dictionary):
- raise TypeError("Page /Resources exists but is not a dictionary")
- resources = self.obj.Resources
-
- if res_type not in resources:
- resources[res_type] = Dictionary()
-
- if name is not None and prefix:
- raise ValueError("Must specify one of name= or prefix=")
- if name is None:
- name = Name.random(prefix=prefix)
-
- for res_dict in resources.as_dict().values():
- if not isinstance(res_dict, Dictionary):
- continue
- if name in res_dict:
- if replace_existing:
- del res_dict[name]
- else:
- raise ValueError(f"Name {name} already exists in page /Resources")
-
- resources[res_type][name] = res.with_same_owner_as(self.obj)
- return name
-
- def _over_underlay(
- self,
- other,
- rect: Rectangle | None,
- under: bool,
- push_stack: bool,
- shrink: bool,
- expand: bool,
- ) -> Name:
- formx = None
- if isinstance(other, Page):
- formx = other.as_form_xobject()
- elif isinstance(other, Dictionary) and other.get(Name.Type) == Name.Page:
- formx = Page(other).as_form_xobject()
- elif (
- isinstance(other, Stream)
- and other.get(Name.Type) == Name.XObject
- and other.get(Name.Subtype) == Name.Form
- ):
- formx = other
-
- if formx is None:
- raise TypeError(
- "other object is not something we can convert to Form XObject"
- )
-
- if rect is None:
- rect = Rectangle(self.trimbox)
-
- formx_placed_name = self.add_resource(formx, Name.XObject)
- cs = self.calc_form_xobject_placement(
- formx, formx_placed_name, rect, allow_shrink=shrink, allow_expand=expand
- )
-
- if push_stack:
- self.contents_add(b'q\n', prepend=True) # prepend q
- self.contents_add(b'Q\n', prepend=False) # i.e. append Q
-
- self.contents_add(cs, prepend=under)
- self.contents_coalesce()
- return formx_placed_name
-
- def add_overlay(
- self,
- other: Object | Page,
- rect: Rectangle | None = None,
- *,
- push_stack: bool = True,
- shrink: bool = True,
- expand: bool = True,
- ) -> Name:
- """Overlay another object on this page.
-
- Overlays will be drawn after all previous content, potentially drawing on top
- of existing content.
-
- Args:
- other: A Page or Form XObject to render as an overlay on top of this
- page.
- rect: The PDF rectangle (in PDF units) in which to draw the overlay.
- If omitted, this page's trimbox, cropbox or mediabox (in that order)
- will be used.
- push_stack: If True (default), push the graphics stack of the existing
- content stream to ensure that the overlay is rendered correctly.
- Officially PDF limits the graphics stack depth to 32. Most
- viewers will tolerate more, but excessive pushes may cause problems.
- Multiple content streams may also be coalesced into a single content
- stream where this parameter is True, since the PDF specification
- permits PDF writers to coalesce streams as they see fit.
- shrink: If True (default), allow the object to shrink to fit inside the
- rectangle. The aspect ratio will be preserved.
- expand: If True (default), allow the object to expand to fit inside the
- rectangle. The aspect ratio will be preserved.
-
- Returns:
- The name of the Form XObject that contains the overlay.
-
- .. versionadded:: 2.14
-
- .. versionchanged:: 4.0.0
- Added the *push_stack* parameter. Previously, this method behaved
- as if *push_stack* were False.
-
- .. versionchanged:: 4.2.0
- Added the *shrink* and *expand* parameters. Previously, this method
- behaved as if ``shrink=True, expand=False``.
-
- .. versionchanged:: 4.3.0
- Returns the name of the overlay in the resources dictionary instead
- of returning None.
- """
- return self._over_underlay(
- other,
- rect,
- under=False,
- push_stack=push_stack,
- expand=expand,
- shrink=shrink,
- )
-
- def add_underlay(
- self,
- other: Object | Page,
- rect: Rectangle | None = None,
- *,
- shrink: bool = True,
- expand: bool = True,
- ) -> Name:
- """Underlay another object beneath this page.
-
- Underlays will be drawn before all other content, so they may be overdrawn
- partially or completely.
-
- There is no *push_stack* parameter for this function, since adding an
- underlay can be done without manipulating the graphics stack.
-
- Args:
- other: A Page or Form XObject to render as an underlay underneath this
- page.
- rect: The PDF rectangle (in PDF units) in which to draw the underlay.
- If omitted, this page's trimbox, cropbox or mediabox (in that order)
- will be used.
- shrink: If True (default), allow the object to shrink to fit inside the
- rectangle. The aspect ratio will be preserved.
- expand: If True (default), allow the object to expand to fit inside the
- rectangle. The aspect ratio will be preserved.
-
- Returns:
- The name of the Form XObject that contains the underlay.
-
- .. versionadded:: 2.14
-
- .. versionchanged:: 4.2.0
- Added the *shrink* and *expand* parameters. Previously, this method
- behaved as if ``shrink=True, expand=False``. Fixed issue with wrong
- page rect being selected.
- """
- return self._over_underlay(
- other, rect, under=True, push_stack=False, expand=expand, shrink=shrink
- )
-
- def contents_add(self, contents: Stream | bytes, *, prepend: bool = False):
- """Append or prepend to an existing page's content stream.
-
- Args:
- contents: An existing content stream to append or prepend.
- prepend: Prepend if true, append if false (default).
-
- .. versionadded:: 2.14
- """
- return self._contents_add(contents, prepend=prepend)
-
- def __getattr__(self, name):
- return getattr(self.obj, name)
-
- @augment_override_cpp
- def __setattr__(self, name, value):
- if hasattr(self.__class__, name):
- object.__setattr__(self, name, value)
- else:
- setattr(self.obj, name, value)
-
- @augment_override_cpp
- def __delattr__(self, name):
- if hasattr(self.__class__, name):
- object.__delattr__(self, name)
- else:
- delattr(self.obj, name)
-
- def __getitem__(self, key):
- return self.obj[key]
-
- def __setitem__(self, key, value):
- self.obj[key] = value
-
- def __delitem__(self, key):
- del self.obj[key]
-
- def __contains__(self, key):
- return key in self.obj
-
- def get(self, key, default=None):
- try:
- return self[key]
- except KeyError:
- return default
-
- def emplace(self, other: Page, retain=(Name.Parent,)):
- return self.obj.emplace(other.obj, retain=retain)
-
- def __repr__(self):
- return (
- repr(self.obj)
- .replace('Dictionary', 'Page', 1)
- .replace('(Type="/Page")', '', 1)
- )
-
- def _repr_mimebundle_(self, include=None, exclude=None):
- data = {}
- bundle = {'application/pdf', 'image/png'}
- if include:
- bundle = {k for k in bundle if k in include}
- if exclude:
- bundle = {k for k in bundle if k not in exclude}
- pagedata = _single_page_pdf(self.obj)
- if 'application/pdf' in bundle:
- data['application/pdf'] = pagedata
- if 'image/png' in bundle:
- try:
- data['image/png'] = _mudraw(pagedata, 'png')
- except (FileNotFoundError, RuntimeError):
- pass
- return data
-
-
-@augments(Token)
-class Extend_Token:
- def __repr__(self):
- return f'pikepdf.Token({self.type_}, {self.raw_value})'
-
-
-@augments(Rectangle)
-class Extend_Rectangle:
- def __repr__(self):
- return f'pikepdf.Rectangle({self.llx}, {self.lly}, {self.urx}, {self.ury})'
-
- def __hash__(self):
- return hash((self.llx, self.lly, self.urx, self.ury))
-
-
-@augments(Attachments)
-class Extend_Attachments(MutableMapping):
- def __getitem__(self, k: str) -> AttachedFileSpec:
- filespec = self._get_filespec(k)
- if filespec is None:
- raise KeyError(k)
- return filespec
-
- def __setitem__(self, k: str, v: AttachedFileSpec) -> None:
- if not v.filename:
- v.filename = k
- return self._add_replace_filespec(k, v)
-
- def __delitem__(self, k: str) -> None:
- return self._remove_filespec(k)
-
- def __len__(self):
- return len(self._get_all_filespecs())
-
- def __iter__(self) -> Iterator[str]:
- yield from self._get_all_filespecs()
-
- def __repr__(self):
- return f"<pikepdf._qpdf.Attachments with {len(self)} attached files>"
-
-
-@augments(AttachedFileSpec)
-class Extend_AttachedFileSpec:
- @staticmethod
- def from_filepath(pdf: Pdf, path: Path | str, *, description: str = ''):
- """Construct a file specification from a file path.
-
- This function will automatically add a creation and modified date
- using the file system, and a MIME type inferred from the file's extension.
-
- If the data required for the attach is in memory, use
- :meth:`pikepdf.AttachedFileSpec` instead.
-
- Args:
- pdf: The Pdf to attach this file specification to.
- path: A file path for the file to attach to this Pdf.
- description: An optional description. May be shown to the user in
- PDF viewers.
- """
- mime, _ = mimetypes.guess_type(str(path))
- if mime is None:
- mime = ''
- if not isinstance(path, Path):
- path = Path(path)
-
- stat = path.stat()
- return AttachedFileSpec(
- pdf,
- path.read_bytes(),
- description=description,
- filename=str(path.name),
- mime_type=mime,
- creation_date=encode_pdf_date(
- datetime.datetime.fromtimestamp(stat.st_ctime)
- ),
- mod_date=encode_pdf_date(datetime.datetime.fromtimestamp(stat.st_mtime)),
- )
-
- def __repr__(self):
- if self.filename:
- return (
- f"<pikepdf._qpdf.AttachedFileSpec for {self.filename!r}, "
- f"description {self.description!r}>"
- )
- return f"<pikepdf._qpdf.AttachedFileSpec description {self.description!r}>"
-
-
-@augments(AttachedFile)
-class Extend_AttachedFile:
- @property
- def creation_date(self) -> datetime.datetime | None:
- if not self._creation_date:
- return None
- return decode_pdf_date(self._creation_date)
-
- @creation_date.setter
- def creation_date(self, value: datetime.datetime):
- self._creation_date = encode_pdf_date(value)
-
- @property
- def mod_date(self) -> datetime.datetime | None:
- if not self._mod_date:
- return None
- return decode_pdf_date(self._mod_date)
-
- @mod_date.setter
- def mod_date(self, value: datetime.datetime):
- self._mod_date = encode_pdf_date(value)
-
- def read_bytes(self) -> bytes:
- return self.obj.read_bytes()
-
- def __repr__(self):
- return (
- f'<pikepdf._qpdf.AttachedFile objid={self.obj.objgen} size={self.size} '
- f'mime_type={self.mime_type} creation_date={self.creation_date} '
- f'mod_date={self.mod_date}>'
- )
-
-
-@augments(NameTree)
-class Extend_NameTree:
- def keys(self):
- return KeysView(self._as_map())
-
- def values(self):
- return ValuesView(self._as_map())
-
- def items(self):
- return ItemsView(self._as_map())
-
- get = MutableMapping.get
- pop = MutableMapping.pop
- popitem = MutableMapping.popitem
- clear = MutableMapping.clear
- update = MutableMapping.update
- setdefault = MutableMapping.setdefault
-
-
-MutableMapping.register(NameTree)
-
-
-@augments(NumberTree)
-class Extend_NumberTree:
- def keys(self):
- return KeysView(self._as_map())
-
- def values(self):
- return ValuesView(self._as_map())
-
- def items(self):
- return ItemsView(self._as_map())
-
- get = MutableMapping.get
- pop = MutableMapping.pop
- popitem = MutableMapping.popitem
- clear = MutableMapping.clear
- update = MutableMapping.update
- setdefault = MutableMapping.setdefault
-
-
-MutableMapping.register(NumberTree)
diff --git a/env/lib/python3.10/site-packages/pikepdf/_qpdf.cpython-310-x86_64-linux-gnu.so b/env/lib/python3.10/site-packages/pikepdf/_qpdf.cpython-310-x86_64-linux-gnu.so
deleted file mode 100755
index 31165aa..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_qpdf.cpython-310-x86_64-linux-gnu.so
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/_qpdf.pyi b/env/lib/python3.10/site-packages/pikepdf/_qpdf.pyi
deleted file mode 100644
index 828891a..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_qpdf.pyi
+++ /dev/null
@@ -1,762 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-from __future__ import annotations
-
-# pybind11 does not generate type annotations yet, and mypy doesn't understand
-# the way we're augmenting C++ classes with Python methods as in
-# pikepdf/_methods.py. Thus, we need to manually spell out the resulting types
-# after augmenting.
-import datetime
-import sys
-from abc import abstractmethod
-from decimal import Decimal
-from enum import Enum
-from pathlib import Path
-from typing import (
- Any,
- BinaryIO,
- Callable,
- ClassVar,
- Collection,
- Iterable,
- Iterator,
- KeysView,
- Mapping,
- MutableMapping,
- Sequence,
- TypeVar,
- overload,
-)
-
-if sys.version_info >= (3, 8):
- from typing import Literal
-else:
- from typing_extensions import Literal # pragma: no cover
-
-from pikepdf.models.encryption import Encryption, EncryptionInfo, Permissions
-from pikepdf.models.image import PdfInlineImage
-from pikepdf.models.metadata import PdfMetadata
-from pikepdf.models.outlines import Outline
-from pikepdf.objects import Array, Dictionary, Name, Stream, String
-
-# This is the whole point of stub files, but apparently we have to do this...
-# pylint: disable=no-method-argument,unused-argument,no-self-use,too-many-public-methods
-
-T = TypeVar('T', bound='Object')
-Numeric = TypeVar('Numeric', int, float, Decimal)
-
-class Buffer: ...
-
-# Exceptions
-
-class DataDecodingError(Exception): ...
-class JobUsageError(Exception): ...
-class PasswordError(Exception): ...
-class PdfError(Exception): ...
-class ForeignObjectError(Exception): ...
-
-# Enums
-class AccessMode(Enum):
- default: int = ...
- mmap: int = ...
- mmap_only: int = ...
- stream: int = ...
-
-class EncryptionMethod(Enum):
- none: int = ...
- unknown: int = ...
- rc4: int = ...
- aes: int = ...
- aesv3: int = ...
-
-class ObjectStreamMode(Enum):
- disable: int = ...
- generate: int = ...
- preserve: int = ...
-
-class ObjectType(Enum):
- array: int = ...
- boolean: int = ...
- dictionary: int = ...
- inlineimage: int = ...
- integer: int = ...
- name_: int = ...
- null: int = ...
- operator: int = ...
- real: int = ...
- reserved: int = ...
- stream: int = ...
- string: int = ...
- uninitialized: int = ...
-
-class StreamDecodeLevel(Enum):
- all: int = ...
- generalized: int = ...
- none: int = ...
- specialized: int = ...
-
-class TokenType(Enum):
- array_close: int = ...
- array_open: int = ...
- bad: int = ...
- bool: int = ...
- brace_close: int = ...
- brace_open: int = ...
- comment: int = ...
- dict_close: int = ...
- dict_open: int = ...
- eof: int = ...
- inline_image: int = ...
- integer: int = ...
- name_: int = ...
- null: int = ...
- real: int = ...
- space: int = ...
- string: int = ...
- word: int = ...
-
-class Object:
- def _ipython_key_completions_(self) -> KeysView | None: ...
- def _inline_image_raw_bytes(self) -> bytes: ...
- def _parse_page_contents(self, callbacks: Callable) -> None: ...
- def _parse_page_contents_grouped(
- self, whitelist: str
- ) -> list[tuple[Collection[Object | PdfInlineImage], Operator]]: ...
- @staticmethod
- def _parse_stream(stream: Object, parser: StreamParser) -> list: ...
- @staticmethod
- def _parse_stream_grouped(stream: Object, whitelist: str) -> list: ...
- def _repr_mimebundle_(self, include=None, exclude=None) -> dict | None: ...
- def _write(
- self,
- data: bytes,
- filter: Object, # pylint: disable=redefined-builtin
- decode_parms: Object,
- ) -> None: ...
- def append(self, pyitem: Any) -> None: ...
- def as_dict(self) -> _ObjectMapping: ...
- def as_list(self) -> _ObjectList: ...
- def emplace(self, other: Object, retain: Iterable[Name] = ...) -> None: ...
- def extend(self, arg0: Iterable[Object]) -> None: ...
- @overload
- def get(self, key: str, default: T | None = ...) -> Object | T | None: ...
- @overload
- def get(self, key: Name, default: T | None = ...) -> Object | T | None: ...
- def get_raw_stream_buffer(self) -> Buffer: ...
- def get_stream_buffer(self, decode_level: StreamDecodeLevel = ...) -> Buffer: ...
- def is_owned_by(self, possible_owner: Pdf) -> bool: ...
- def items(self) -> Iterable[tuple[str, Object]]: ...
- def keys(self) -> set[str]: ...
- @staticmethod
- def parse(stream: bytes, description: str = ...) -> Object: ...
- def read_bytes(self, decode_level: StreamDecodeLevel = ...) -> bytes: ...
- def read_raw_bytes(self) -> bytes: ...
- def same_owner_as(self, other: Object) -> bool: ...
- def to_json(self, dereference: bool = ...) -> bytes: ...
- def unparse(self, resolved: bool = ...) -> bytes: ...
- def with_same_owner_as(self, arg0: Object) -> Object: ...
- def wrap_in_array(self) -> Object: ...
- def write(
- self,
- data: bytes,
- *,
- filter: Name | Array | None = ..., # pylint: disable=redefined-builtin
- decode_parms: Dictionary | Array | None = ...,
- type_check: bool = ...,
- ) -> None: ...
- def __bytes__(self) -> bytes: ...
- @overload
- def __contains__(self, arg0: Object) -> bool: ...
- @overload
- def __contains__(self, arg0: str) -> bool: ...
- def __copy__(self) -> Object: ...
- def __delattr__(self, arg0: str) -> None: ...
- @overload
- def __delitem__(self, arg0: str) -> None: ...
- @overload
- def __delitem__(self, arg0: Object) -> None: ...
- @overload
- def __delitem__(self, arg0: int) -> None: ...
- def __dir__(self) -> list: ...
- def __eq__(self, other: Any) -> bool: ...
- def __getattr__(self, arg0: str) -> Object: ...
- @overload
- def __getitem__(self, arg0: str) -> Object: ...
- @overload
- def __getitem__(self, arg0: Object) -> Object: ...
- @overload
- def __getitem__(self, arg0: int) -> Object: ...
- def __hash__(self) -> int: ...
- def __iter__(self) -> Iterable[Object]: ...
- def __len__(self) -> int: ...
- def __setattr__(self, arg0: str, arg1: object) -> None: ...
- @overload
- def __setitem__(self, arg0: str, arg1: Object) -> None: ...
- @overload
- def __setitem__(self, arg0: Object, arg1: Object) -> None: ...
- @overload
- def __setitem__(self, arg0: str, arg1: object) -> None: ...
- @overload
- def __setitem__(self, arg0: Object, arg1: object) -> None: ...
- @overload
- def __setitem__(self, arg0: int, arg1: Object) -> None: ...
- @overload
- def __setitem__(self, arg0: int, arg1: object) -> None: ...
- @property
- def _objgen(self) -> tuple[int, int]: ...
- @property
- def _type_code(self) -> ObjectType: ...
- @property
- def _type_name(self) -> str: ...
- @property
- def images(self) -> _ObjectMapping: ...
- @property
- def is_indirect(self) -> bool: ...
- @property
- def is_rectangle(self) -> bool: ...
- @property
- def objgen(self) -> tuple[int, int]: ...
- @property
- def stream_dict(self) -> Object: ...
- @stream_dict.setter
- def stream_dict(self, val: Object) -> None: ...
-
-class ObjectHelper:
- def __eq__(self, other: Any) -> bool: ...
- @property
- def obj(self) -> Object: ...
-
-class _ObjectList:
- @overload
- def __init__(self) -> None: ...
- @overload
- def __init__(self, arg0: _ObjectList) -> None: ...
- @overload
- def __init__(self, arg0: Iterable) -> None: ...
- @overload
- def __init__(*args, **kwargs) -> None: ...
- def append(self, x: Object) -> None: ...
- def clear(self) -> None: ...
- def count(self, x: Object) -> int: ...
- @overload
- def extend(self, L: _ObjectList) -> None: ...
- @overload
- def extend(self, L: Iterable[Object]) -> None: ...
- def insert(self, i: int, x: Object) -> None: ...
- @overload
- def pop(self) -> Object: ...
- @overload
- def pop(self, i: int) -> Object: ...
- @overload
- def pop(*args, **kwargs) -> Any: ...
- def remove(self, x: Object) -> None: ...
- def __bool__(self) -> bool: ...
- def __contains__(self, x: Object) -> bool: ...
- @overload
- def __delitem__(self, arg0: int) -> None: ...
- @overload
- def __delitem__(self, arg0: slice) -> None: ...
- @overload
- def __delitem__(*args, **kwargs) -> Any: ...
- def __eq__(self, other: Any) -> bool: ...
- @overload
- def __getitem__(self, s: slice) -> _ObjectList: ...
- @overload
- def __getitem__(self, arg0: int) -> Object: ...
- @overload
- def __getitem__(*args, **kwargs) -> Any: ...
- def __iter__(self) -> Iterator[Object]: ...
- def __len__(self) -> int: ...
- def __ne__(self, other: Any) -> bool: ...
- @overload
- def __setitem__(self, arg0: int, arg1: Object) -> None: ...
- @overload
- def __setitem__(self, arg0: slice, arg1: _ObjectList) -> None: ...
- @overload
- def __setitem__(*args, **kwargs) -> Any: ...
-
-class _ObjectMapping:
- get: Any = ...
- keys: Any = ...
- values: Any = ...
- __contains__: Any = ...
- def __init__(self) -> None: ...
- def items(self) -> Iterator: ...
- def __bool__(self) -> bool: ...
- def __delitem__(self, arg0: str) -> None: ...
- def __getitem__(self, arg0: str) -> Object: ...
- def __iter__(self) -> Iterator: ...
- def __len__(self) -> int: ...
- def __setitem__(self, arg0: str, arg1: Object) -> None: ...
-
-class Operator(Object): ...
-
-class Annotation:
- def __init__(self, arg0: Object) -> None: ...
- @overload
- def get_appearance_stream(self, which: Object) -> Object: ...
- @overload
- def get_appearance_stream(self, which: Object, state: Object) -> Object: ...
- def get_page_content_for_appearance(
- self,
- name: Object,
- rotate: int,
- required_flags: int = ...,
- forbidden_flags: int = ...,
- ) -> bytes: ...
- @property
- def appearance_dict(self) -> Object: ...
- @property
- def appearance_state(self) -> Object: ...
- @property
- def flags(self) -> int: ...
- @property
- def obj(self) -> Object: ...
- @property
- def subtype(self) -> str: ...
-
-class AttachedFile:
- _creation_date: str
- _mod_date: str
- creation_date: datetime.datetime | None
- mime_type: str
- mod_date: datetime.datetime | None
- @property
- def md5(self) -> bytes: ...
- @property
- def obj(self) -> Object: ...
- def read_bytes(self) -> bytes: ...
- @property
- def size(self) -> int: ...
-
-class AttachedFileSpec:
- description: str
- filename: str
- def __init__(
- self,
- data: bytes,
- *,
- description: str,
- filename: str,
- mime_type: str,
- creation_date: str,
- mod_date: str,
- ) -> None: ...
- def get_all_filenames(self) -> dict: ...
- @overload
- def get_file(self) -> AttachedFile: ...
- @overload
- def get_file(self, name: Name) -> AttachedFile: ...
- @property
- def obj(self) -> Object: ...
- @staticmethod
- def from_filepath(
- pdf: Pdf, path: Path | str, *, description: str = ''
- ) -> AttachedFileSpec: ...
-
-class Attachments(MutableMapping[str, AttachedFileSpec]):
- def __contains__(self, k: object) -> bool: ...
- def __delitem__(self, k: str) -> None: ...
- def __eq__(self, other: Any) -> bool: ...
- def __getitem__(self, k: str) -> AttachedFileSpec: ...
- def __iter__(self) -> Iterator[str]: ...
- def __len__(self) -> int: ...
- def __setitem__(self, k: str, v: AttachedFileSpec): ...
- def __init__(self, *args, **kwargs) -> None: ...
- def _add_replace_filespec(self, arg0: str, arg1: AttachedFileSpec) -> None: ...
- def _get_all_filespecs(self) -> dict[str, AttachedFileSpec]: ...
- def _get_filespec(self, arg0: str) -> AttachedFileSpec: ...
- def _remove_filespec(self, arg0: str) -> bool: ...
- @property
- def _has_embedded_files(self) -> bool: ...
-
-class Token:
- def __init__(self, arg0: TokenType, arg1: bytes) -> None: ...
- def __eq__(self, other: Any) -> bool: ...
- @property
- def error_msg(self) -> str: ...
- @property
- def raw_value(self) -> bytes: ...
- @property
- def type_(self) -> TokenType: ...
- @property
- def value(self) -> str: ...
-
-class _QPDFTokenFilter: ...
-
-class TokenFilter(_QPDFTokenFilter):
- def __init__(self) -> None: ...
- def handle_token(self, token: Token = ...) -> None | list | Token: ...
-
-class StreamParser:
- def __init__(self) -> None: ...
- @abstractmethod
- def handle_eof(self) -> None: ...
- @abstractmethod
- def handle_object(self, obj: Object, offset: int, length: int) -> None: ...
-
-class Page:
- _repr_mimebundle_: Any = ...
- @overload
- def __init__(self, arg0: Object) -> None: ...
- @overload
- def __init__(self, arg0: Page) -> None: ...
- def __contains__(self, key: Any) -> bool: ...
- def __delattr__(self, name: Any) -> None: ...
- def __eq__(self, other: Any) -> bool: ...
- def __getattr__(self, name: Any) -> Object: ...
- def __getitem__(self, name: Any) -> Object: ...
- def __setattr__(self, name: Any, value: Any): ...
- def __setitem__(self, name: Any, value: Any): ...
- def _get_cropbox(self, arg0: bool, arg1: bool) -> Object: ...
- def _get_mediabox(self, arg0: bool) -> Object: ...
- def _get_trimbox(self, arg0: bool, arg1: bool) -> Object: ...
- def add_content_token_filter(self, tf: TokenFilter) -> None: ...
- def add_overlay(
- self,
- other: Object | Page,
- rect: Rectangle | None,
- *,
- push_stack: bool | None = ...,
- ): ...
- def add_underlay(self, other: Object | Page, rect: Rectangle | None): ...
- def as_form_xobject(self, handle_transformations: bool = ...) -> Object: ...
- def calc_form_xobject_placement(
- self,
- formx: Object,
- name: Name,
- rec: Rectangle,
- *,
- invert_transformations: bool,
- allow_shrink: bool,
- allow_expand: bool,
- ) -> bytes: ...
- def contents_add(self, contents: Stream | bytes, *, prepend: bool) -> None: ...
- def contents_coalesce(self) -> None: ...
- def emplace(self, other: Page, retain: Iterable[Name]) -> None: ...
- def externalize_inline_images(self, min_size: int = ...) -> None: ...
- def get(self, key: str | Name, default: T | None = ...) -> T | None | Object: ...
- def get_filtered_contents(self, tf: TokenFilter) -> bytes: ...
- def index(self) -> int: ...
- def label(self) -> str: ...
- def parse_contents(self, arg0: StreamParser) -> None: ...
- def remove_unreferenced_resources(self) -> None: ...
- def rotate(self, angle: int, relative: bool) -> None: ...
- @property
- def images(self) -> _ObjectMapping: ...
- @property
- def cropbox(self) -> Array: ...
- @cropbox.setter
- def cropbox(self, val: Array) -> None: ...
- @property
- def mediabox(self) -> Array: ...
- @mediabox.setter
- def mediabox(self, val: Array) -> None: ...
- @property
- def obj(self) -> Dictionary: ...
- @property
- def trimbox(self) -> Array: ...
- @trimbox.setter
- def trimbox(self, val: Array) -> None: ...
- @property
- def resources(self) -> Dictionary: ...
- def add_resource(
- self,
- res: Object,
- res_type: Name,
- name: Name | None = None,
- *,
- prefix: str = '',
- replace_existing: bool = True,
- ) -> Name: ...
-
-class PageList:
- def __init__(self, *args, **kwargs) -> None: ...
- def append(self, page: Page) -> None: ...
- @overload
- def extend(self, other: PageList) -> None: ...
- @overload
- def extend(self, iterable: Iterable[Page]) -> None: ...
- def insert(self, index: int, obj: Page) -> None: ...
- def p(self, pnum: int) -> Page: ...
- def remove(self, **kwargs) -> None: ...
- def reverse(self) -> None: ...
- @overload
- def __delitem__(self, arg0: int) -> None: ...
- @overload
- def __delitem__(self, arg0: slice) -> None: ...
- @overload
- def __getitem__(self, arg0: int) -> Page: ...
- @overload
- def __getitem__(self, arg0: slice) -> list[Page]: ...
- def __iter__(self) -> PageList: ...
- def __len__(self) -> int: ...
- def __next__(self) -> Page: ...
- @overload
- def __setitem__(self, arg0: int, arg1: Page) -> None: ...
- @overload
- def __setitem__(self, arg0: slice, arg1: Iterable[Page]) -> None: ...
-
-class Pdf:
- _repr_mimebundle_: Any = ...
- def add_blank_page(self, *, page_size: tuple[Numeric, Numeric] = ...) -> Page: ...
- def __enter__(self) -> Pdf: ...
- def __exit__(self, exc_type, exc_value, traceback) -> None: ...
- def __init__(self, *args, **kwargs) -> None: ...
- def _add_page(self, page: Object, first: bool = ...) -> None: ...
- def _decode_all_streams_and_discard(self) -> None: ...
- def _get_object_id(self, arg0: int, arg1: int) -> Object: ...
- def _process(self, arg0: str, arg1: bytes) -> None: ...
- def _remove_page(self, arg0: Object) -> None: ...
- def _replace_object(self, arg0: tuple[int, int], arg1: Object) -> None: ...
- def _swap_objects(self, arg0: tuple[int, int], arg1: tuple[int, int]) -> None: ...
- def check(self) -> list[str]: ...
- def check_linearization(self, stream: object = ...) -> bool: ...
- def close(self) -> None: ...
- def copy_foreign(self, h: Object) -> Object: ...
- @overload
- def get_object(self, objgen: tuple[int, int]) -> Object: ...
- @overload
- def get_object(self, objid: int, gen: int) -> Object: ...
- def get_warnings(self) -> list: ...
- @overload
- def make_indirect(self, h: T) -> T: ...
- @overload
- def make_indirect(self, obj: Any) -> Object: ...
- def make_stream(self, data: bytes, d=None, **kwargs) -> Stream: ...
- @classmethod
- def new(cls) -> Pdf: ...
- @staticmethod
- def open(
- filename_or_stream: Path | str | BinaryIO,
- *,
- password: str | bytes = "",
- hex_password: bool = False,
- ignore_xref_streams: bool = False,
- suppress_warnings: bool = True,
- attempt_recovery: bool = True,
- inherit_page_attributes: bool = True,
- access_mode: AccessMode = AccessMode.default,
- allow_overwriting_input: bool = False,
- ) -> Pdf: ...
- def open_metadata(
- self,
- set_pikepdf_as_editor: bool = True,
- update_docinfo: bool = True,
- strict: bool = False,
- ) -> PdfMetadata: ...
- def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline: ...
- def remove_unreferenced_resources(self) -> None: ...
- def save(
- self,
- filename_or_stream: Path | str | BinaryIO | None = None,
- *,
- static_id: bool = False,
- preserve_pdfa: bool = True,
- min_version: str | tuple[str, int] = "",
- force_version: str | tuple[str, int] = "",
- fix_metadata_version: bool = True,
- compress_streams: bool = True,
- stream_decode_level: StreamDecodeLevel | None = None,
- object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
- normalize_content: bool = False,
- linearize: bool = False,
- qdf: bool = False,
- progress: Callable[[int], None] = None,
- encryption: Encryption | bool | None = None,
- recompress_flate: bool = False,
- deterministic_id: bool = False,
- ) -> None: ...
- def show_xref_table(self) -> None: ...
- @property
- def Root(self) -> Object: ...
- @property
- def _allow_accessibility(self) -> bool: ...
- @property
- def _allow_extract(self) -> bool: ...
- @property
- def _allow_modify_all(self) -> bool: ...
- @property
- def _allow_modify_annotation(self) -> bool: ...
- @property
- def _allow_modify_assembly(self) -> bool: ...
- @property
- def _allow_modify_form(self) -> bool: ...
- @property
- def _allow_modify_other(self) -> bool: ...
- @property
- def _allow_print_highres(self) -> bool: ...
- @property
- def _allow_print_lowres(self) -> bool: ...
- @property
- def _encryption_data(self) -> dict: ...
- @property
- def _pages(self) -> Any: ...
- @property
- def allow(self) -> Permissions: ...
- @property
- def docinfo(self) -> Object: ...
- @docinfo.setter
- def docinfo(self, val: Object) -> None: ...
- @property
- def encryption(self) -> EncryptionInfo: ...
- @property
- def extension_level(self) -> int: ...
- @property
- def filename(self) -> str: ...
- @property
- def is_encrypted(self) -> bool: ...
- @property
- def is_linearized(self) -> bool: ...
- @property
- def objects(self) -> Any: ...
- @property
- def pages(self) -> PageList: ...
- @property
- def pdf_version(self) -> str: ...
- @property
- def root(self) -> Object: ...
- @property
- def trailer(self) -> Object: ...
- @property
- def user_password_matched(self) -> bool: ...
- @property
- def owner_password_matched(self) -> bool: ...
- def generate_appearance_streams(self) -> None: ...
- def flatten_annotations(self, mode: str) -> None: ...
- @property
- def attachments(self) -> Attachments: ...
-
-class Rectangle:
- llx: float = ...
- lly: float = ...
- urx: float = ...
- ury: float = ...
- @overload
- def __init__(self, llx: float, lly: float, urx: float, ury: float) -> None: ...
- @overload
- def __init__(self, a: Array) -> None: ...
- @property
- def width(self) -> float: ...
- @property
- def height(self) -> float: ...
- @property
- def lower_left(self) -> tuple[float, float]: ...
- @property
- def lower_right(self) -> tuple[float, float]: ...
- @property
- def upper_left(self) -> tuple[float, float]: ...
- @property
- def upper_right(self) -> tuple[float, float]: ...
- def as_array(self) -> Array: ...
-
-class NameTree(MutableMapping[str | bytes, Object]):
- @staticmethod
- def new(pdf: Pdf, auto_repair: bool = True) -> NameTree: ...
- def __contains__(self, name: object) -> bool: ...
- def __delitem__(self, name: str | bytes) -> None: ...
- def __eq__(self, other: Any) -> bool: ...
- def __getitem__(self, name: str | bytes) -> Object: ...
- def __iter__(self) -> Iterator[bytes]: ...
- def __len__(self) -> int: ...
- def __setitem__(self, name: str | bytes, o: Object) -> None: ...
- def __init__(self, obj: Object, *, auto_repair: bool = ...) -> None: ...
- def _as_map(self) -> _ObjectMapping: ...
- @property
- def obj(self) -> Object: ...
-
-class NumberTree(MutableMapping[int, Object]):
- @staticmethod
- def new(pdf: Pdf, auto_repair: bool = True) -> NumberTree: ...
- def __contains__(self, key: object) -> bool: ...
- def __delitem__(self, key: int) -> None: ...
- def __eq__(self, other: Any) -> bool: ...
- def __getitem__(self, key: int) -> Object: ...
- def __iter__(self) -> Iterator[int]: ...
- def __len__(self) -> int: ...
- def __setitem__(self, key: int, o: Object) -> None: ...
- def __init__(self, obj: Object, pdf: Pdf, *, auto_repair: bool = ...) -> None: ...
- def _as_map(self) -> _ObjectMapping: ...
- @property
- def obj(self) -> Object: ...
-
-class ContentStreamInstruction:
- @property
- def operands(self) -> _ObjectList: ...
- @property
- def operator(self) -> Operator: ...
- def __getitem__(self, index: int) -> _ObjectList | Operator: ...
- def __len__(self) -> int: ...
-
-class ContentStreamInlineImage:
- @property
- def operands(self) -> _ObjectList: ...
- @property
- def operator(self) -> Operator: ...
- def __getitem__(self, index: int) -> _ObjectList | Operator: ...
- def __len__(self) -> int: ...
- @property
- def iimage(self) -> PdfInlineImage: ...
-
-class Job:
- EXIT_ERROR: ClassVar[int] = 2
- EXIT_WARNING: ClassVar[int] = 3
- EXIT_IS_NOT_ENCRYPTED: ClassVar[int] = 2
- EXIT_CORRECT_PASSWORD: ClassVar[int] = 3
- LATEST_JOB_JSON: ClassVar[int]
- LATEST_JSON: ClassVar[int]
-
- @staticmethod
- def json_out_schema(*, schema: int) -> str: ...
- @staticmethod
- def job_json_schema(*, schema: int) -> str: ...
- @overload
- def __init__(self, json: str) -> None: ...
- @overload
- def __init__(self, json_dict: Mapping) -> None: ...
- @overload
- def __init__(
- self, args: Sequence[str | bytes], *, progname: str = "pikepdf"
- ) -> None: ...
- def check_configuration(self) -> None: ...
- @property
- def creates_output(self) -> bool: ...
- @property
- def message_prefix(self) -> str: ...
- def run(self) -> None: ...
- @property
- def has_warnings(self) -> bool: ...
- @property
- def exit_code(self) -> int: ...
- @property
- def encryption_status(self) -> dict[str, bool]: ...
-
-def _Null() -> Any: ...
-def _encode(handle: Any) -> Object: ...
-def _new_array(arg0: Iterable) -> Array: ...
-def _new_boolean(arg0: bool) -> Object: ...
-def _new_dictionary(arg0: Mapping[Any, Any]) -> Dictionary: ...
-def _new_integer(arg0: int) -> Object: ...
-def _new_name(arg0: str) -> Name: ...
-def _new_operator(op: str) -> Operator: ...
-@overload
-def _new_real(arg0: str) -> Object: ...
-@overload
-def _new_real(value: float, places: int = ...) -> Object: ...
-def _new_stream(arg0: Pdf, arg1: bytes) -> Stream: ...
-def _new_string(s: str | bytes) -> String: ...
-def _new_string_utf8(s: str) -> String: ...
-def _test_file_not_found(*args, **kwargs) -> Any: ...
-def _translate_qpdf_logic_error(arg0: str) -> str: ...
-def get_decimal_precision() -> int: ...
-def pdf_doc_to_utf8(pdfdoc: bytes) -> str: ...
-def qpdf_version() -> str: ...
-def set_access_default_mmap(mmap: bool) -> bool: ...
-def set_decimal_precision(prec: int) -> int: ...
-def unparse(obj: Any) -> bytes: ...
-def utf8_to_pdf_doc(utf8: str, unknown: bytes) -> tuple[bool, bytes]: ...
-def _unparse_content_stream(contentstream: Iterable[Any]) -> bytes: ...
-def set_flate_compression_level(
- level: Literal[-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-) -> None: ...
diff --git a/env/lib/python3.10/site-packages/pikepdf/_version.py b/env/lib/python3.10/site-packages/pikepdf/_version.py
deleted file mode 100644
index 9a084d9..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_version.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-from __future__ import annotations
-
-try:
- from importlib_metadata import version as _package_version # type: ignore
-except ImportError:
- from importlib.metadata import version as _package_version
-
-__version__ = _package_version('pikepdf')
-
-__all__ = ['__version__']
diff --git a/env/lib/python3.10/site-packages/pikepdf/_xml.py b/env/lib/python3.10/site-packages/pikepdf/_xml.py
deleted file mode 100644
index edf811c..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_xml.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-from __future__ import annotations
-
-from typing import IO, Any, AnyStr
-
-from lxml.etree import XMLParser as _UnsafeXMLParser
-from lxml.etree import _ElementTree
-from lxml.etree import parse as _parse
-
-
-class _XMLParser(_UnsafeXMLParser):
- def __init__(self, *args: Any, **kwargs: Any):
- # Prevent XXE attacks
- # https://rules.sonarsource.com/python/type/Vulnerability/RSPEC-2755
- kwargs['resolve_entities'] = False
- kwargs['no_network'] = True
- super().__init__(*args, **kwargs)
-
-
-def parse_xml(source: AnyStr | IO[Any], recover: bool = False) -> _ElementTree:
- """Wrap lxml's parse to provide protection against XXE attacks."""
- parser = _XMLParser(recover=recover, remove_pis=False)
- return _parse(source, parser=parser)
-
-
-__all__ = ['parse_xml']
diff --git a/env/lib/python3.10/site-packages/pikepdf/codec.py b/env/lib/python3.10/site-packages/pikepdf/codec.py
deleted file mode 100644
index 4290b91..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/codec.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Implement pdfdoc codec."""
-
-from __future__ import annotations
-
-import codecs
-from typing import Container
-
-from ._qpdf import pdf_doc_to_utf8, utf8_to_pdf_doc
-
-# pylint: disable=redefined-builtin
-
-# See PDF Reference Manual 1.7, Table D.2.
-# The following generates set of all Unicode code points that can be encoded in
-# pdfdoc. Since pdfdoc is 8-bit, the vast majority of code points cannot be.
-
-# Due to a bug, QPDF <= 10.5 and pikepdf < 5 had some inconsistencies around
-# PdfDocEncoding.
-PDFDOC_ENCODABLE = frozenset(
- list(range(0x00, 0x17 + 1))
- + list(range(0x20, 0x7E + 1))
- + [
- 0x2022,
- 0x2020,
- 0x2021,
- 0x2026,
- 0x2014,
- 0x2013,
- 0x0192,
- 0x2044,
- 0x2039,
- 0x203A,
- 0x2212,
- 0x2030,
- 0x201E,
- 0x201C,
- 0x201D,
- 0x2018,
- 0x2019,
- 0x201A,
- 0x2122,
- 0xFB01,
- 0xFB02,
- 0x0141,
- 0x0152,
- 0x0160,
- 0x0178,
- 0x017D,
- 0x0131,
- 0x0142,
- 0x0153,
- 0x0161,
- 0x017E,
- 0x20AC,
- ]
- + [0x02D8, 0x02C7, 0x02C6, 0x02D9, 0x02DD, 0x02DB, 0x02DA, 0x02DC]
- + list(range(0xA1, 0xAC + 1))
- + list(range(0xAE, 0xFF + 1))
-)
-
-
-def _find_first_index(s: str, ordinals: Container[int]) -> int:
- for n, char in enumerate(s):
- if ord(char) not in ordinals:
- return n
- raise ValueError("couldn't find the unencodable character") # pragma: no cover
-
-
-def pdfdoc_encode(input: str, errors: str = 'strict') -> tuple[bytes, int]:
- error_marker = b'?' if errors == 'replace' else b'\xad'
- success, pdfdoc = utf8_to_pdf_doc(input, error_marker)
- if success:
- return pdfdoc, len(input)
-
- if errors == 'ignore':
- pdfdoc = pdfdoc.replace(b'\xad', b'')
- return pdfdoc, len(input)
- if errors == 'replace':
- return pdfdoc, len(input)
- if errors == 'strict':
- if input.startswith('\xfe\xff') or input.startswith('\xff\xfe'):
- raise UnicodeEncodeError(
- 'pdfdoc',
- input,
- 0,
- 2,
- "strings beginning with byte order marks cannot be encoded in pdfdoc",
- )
-
- # libqpdf doesn't return what character caused the error, and Python
- # needs this, so make an educated guess and raise an exception based
- # on that.
- offending_index = _find_first_index(input, PDFDOC_ENCODABLE)
- raise UnicodeEncodeError(
- 'pdfdoc',
- input,
- offending_index,
- offending_index + 1,
- "character cannot be represented in pdfdoc encoding",
- )
- raise LookupError(errors)
-
-
-def pdfdoc_decode(input: bytes, errors: str = 'strict') -> tuple[str, int]:
- if isinstance(input, memoryview):
- input = input.tobytes()
- s = pdf_doc_to_utf8(input)
- if errors == 'strict':
- idx = s.find('\ufffd')
- if idx >= 0:
- raise UnicodeDecodeError(
- 'pdfdoc',
- input,
- idx,
- idx + 1,
- "no Unicode mapping is defined for this character",
- )
-
- return s, len(input)
-
-
-class PdfDocCodec(codecs.Codec):
- """Implements PdfDocEncoding character map used inside PDFs."""
-
- def encode(self, input: str, errors: str = 'strict') -> tuple[bytes, int]:
- return pdfdoc_encode(input, errors)
-
- def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]:
- return pdfdoc_decode(input, errors)
-
-
-class PdfDocStreamWriter(PdfDocCodec, codecs.StreamWriter):
- pass
-
-
-class PdfDocStreamReader(PdfDocCodec, codecs.StreamReader):
- def decode(self, input: bytes, errors: str = 'strict') -> tuple[str, int]:
- return PdfDocCodec.decode(self, input, errors)
-
-
-class PdfDocIncrementalEncoder(codecs.IncrementalEncoder):
- def encode(self, input: str, final: bool = False) -> bytes:
- return pdfdoc_encode(input, 'strict')[0]
-
-
-class PdfDocIncrementalDecoder(codecs.IncrementalDecoder):
- def decode(self, input: bytes, final: bool = False) -> str:
- return pdfdoc_decode(input, 'strict')[0]
-
-
-def find_pdfdoc(encoding: str) -> codecs.CodecInfo | None:
- if encoding in ('pdfdoc', 'pdfdoc_pikepdf'):
- codec = PdfDocCodec()
- return codecs.CodecInfo(
- name=encoding,
- encode=codec.encode,
- decode=codec.decode,
- streamwriter=PdfDocStreamWriter,
- streamreader=PdfDocStreamReader,
- incrementalencoder=PdfDocIncrementalEncoder,
- incrementaldecoder=PdfDocIncrementalDecoder,
- )
- return None # pragma: no cover
-
-
-codecs.register(find_pdfdoc)
-
-__all__ = ['utf8_to_pdf_doc', 'pdf_doc_to_utf8']
diff --git a/env/lib/python3.10/site-packages/pikepdf/jbig2.py b/env/lib/python3.10/site-packages/pikepdf/jbig2.py
deleted file mode 100644
index 28c596b..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/jbig2.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Integrate JBIG2 image decoding.
-
-Requires third-party JBIG2 decoder in the form of an external program, like
-jbig2dec.
-"""
-
-from __future__ import annotations
-
-import os
-from abc import ABC, abstractmethod
-from pathlib import Path
-from subprocess import DEVNULL, PIPE, CalledProcessError, run
-from tempfile import TemporaryDirectory
-
-from packaging.version import Version
-from PIL import Image
-
-from pikepdf._exceptions import DependencyError
-
-
-def _extract_jbig2_bytes(jbig2: bytes, jbig2_globals: bytes) -> bytes:
- with TemporaryDirectory(prefix='pikepdf-', suffix='.jbig2') as tmpdir:
- image_path = Path(tmpdir) / "image"
- global_path = Path(tmpdir) / "global"
- output_path = Path(tmpdir) / "outfile"
-
- args = [
- "jbig2dec",
- "--embedded",
- "--format",
- "png",
- "--output",
- os.fspath(output_path),
- ]
-
- # Get the raw stream, because we can't decode im_obj - that is why we are here
- # (Strictly speaking we should remove any non-JBIG2 filters if double encoded)
- image_path.write_bytes(jbig2)
-
- if len(jbig2_globals) > 0:
- global_path.write_bytes(jbig2_globals)
- args.append(os.fspath(global_path))
-
- args.append(os.fspath(image_path))
-
- run(args, stdout=DEVNULL, check=True)
- with Image.open(output_path) as im:
- return im.tobytes()
-
-
-class JBIG2DecoderInterface(ABC):
- """pikepdf's C++ expects this Python interface to be available for JBIG2."""
-
- @abstractmethod
- def check_available(self) -> None:
- """Check if decoder is available. Throws DependencyError if not."""
-
- @abstractmethod
- def decode_jbig2(self, jbig2: bytes, jbig2_globals: bytes) -> bytes:
- """Decode JBIG2 from jbig2 and globals, returning decoded bytes."""
-
- def available(self) -> bool:
- """Return True if decoder is available."""
- try:
- self.check_available()
- except DependencyError:
- return False
- else:
- return True
-
-
-class JBIG2Decoder(JBIG2DecoderInterface):
- """JBIG2 decoder implementation."""
-
- def check_available(self) -> None:
- """Check if jbig2dec is installed and usable."""
- version = self._version()
- if version < Version('0.15'):
- raise DependencyError("jbig2dec is too old (older than version 0.15)")
-
- def decode_jbig2(self, jbig2: bytes, jbig2_globals: bytes) -> bytes:
- """Decode JBIG2 from binary data, returning decode bytes."""
- return _extract_jbig2_bytes(jbig2, jbig2_globals)
-
- def _version(self) -> Version:
- try:
- proc = run(
- ['jbig2dec', '--version'], stdout=PIPE, check=True, encoding='ascii'
- )
- except (CalledProcessError, FileNotFoundError) as e:
- raise DependencyError("jbig2dec - not installed or not found") from e
- else:
- result = proc.stdout
- version_str = result.replace(
- 'jbig2dec', ''
- ).strip() # returns "jbig2dec 0.xx"
- return Version(version_str)
-
-
-_jbig2_decoder = JBIG2Decoder()
-
-
-def get_decoder() -> JBIG2DecoderInterface:
- """Return an instance of a JBIG2 decoder."""
- return _jbig2_decoder
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__init__.py b/env/lib/python3.10/site-packages/pikepdf/models/__init__.py
deleted file mode 100644
index e2e73ba..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Python implementation of higher level PDF constructs."""
-
-from __future__ import annotations
-
-from ._content_stream import (
- ContentStreamInstructions,
- PdfParsingError,
- UnparseableContentStreamInstructions,
- parse_content_stream,
- unparse_content_stream,
-)
-from .encryption import Encryption, EncryptionInfo, Permissions
-from .image import PdfImage, PdfInlineImage, UnsupportedImageTypeError
-from .matrix import PdfMatrix
-from .metadata import PdfMetadata
-from .outlines import (
- Outline,
- OutlineItem,
- OutlineStructureError,
- PageLocation,
- make_page_destination,
-)
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/__init__.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index b07eefb..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/__init__.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_content_stream.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_content_stream.cpython-310.pyc
deleted file mode 100644
index 6e1c1c9..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_content_stream.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_transcoding.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_transcoding.cpython-310.pyc
deleted file mode 100644
index f9ad743..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/_transcoding.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/encryption.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/encryption.cpython-310.pyc
deleted file mode 100644
index 32e8098..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/encryption.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/image.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/image.cpython-310.pyc
deleted file mode 100644
index 0de94e9..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/image.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/matrix.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/matrix.cpython-310.pyc
deleted file mode 100644
index ee96c86..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/matrix.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/metadata.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/metadata.cpython-310.pyc
deleted file mode 100644
index 4b97e11..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/metadata.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/outlines.cpython-310.pyc b/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/outlines.cpython-310.pyc
deleted file mode 100644
index 18dbd1d..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/__pycache__/outlines.cpython-310.pyc
+++ /dev/null
Binary files differ
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/_content_stream.py b/env/lib/python3.10/site-packages/pikepdf/models/_content_stream.py
deleted file mode 100644
index 8976c4c..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/_content_stream.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Content stream parsing."""
-
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Collection, List, Tuple, Union, cast
-
-from pikepdf import Object, ObjectType, Operator, Page, PdfError, _qpdf
-
-if TYPE_CHECKING:
- from pikepdf.models.image import PdfInlineImage
-
-# Operands, Operator
-_OldContentStreamOperands = Collection[Union[Object, 'PdfInlineImage']]
-_OldContentStreamInstructions = Tuple[_OldContentStreamOperands, Operator]
-
-ContentStreamInstructions = Union[
- _qpdf.ContentStreamInstruction, _qpdf.ContentStreamInlineImage
-]
-
-UnparseableContentStreamInstructions = Union[
- ContentStreamInstructions, _OldContentStreamInstructions
-]
-
-
-class PdfParsingError(Exception):
- """Error when parsing a PDF content stream."""
-
- def __init__(self, message=None, line=None):
- if not message:
- message = f"Error encoding content stream at line {line}"
- super().__init__(message)
- self.line = line
-
-
-def parse_content_stream(
- page_or_stream: Object | Page, operators: str = ''
-) -> list[ContentStreamInstructions]:
- """Parse a PDF content stream into a sequence of instructions.
-
- A PDF content stream is list of instructions that describe where to render
- the text and graphics in a PDF. This is the starting point for analyzing
- PDFs.
-
- If the input is a page and page.Contents is an array, then the content
- stream is automatically treated as one coalesced stream.
-
- Each instruction contains at least one operator and zero or more operands.
-
- This function does not have anything to do with opening a PDF file itself or
- processing data from a whole PDF. It is for processing a specific object inside
- a PDF that is already opened.
-
- Args:
- page_or_stream: A page object, or the content
- stream attached to another object such as a Form XObject.
- operators: A space-separated string of operators to whitelist.
- For example 'q Q cm Do' will return only operators
- that pertain to drawing images. Use 'BI ID EI' for inline images.
- All other operators and associated tokens are ignored. If blank,
- all tokens are accepted.
-
- Example:
- >>> with pikepdf.Pdf.open(input_pdf) as pdf:
- >>> page = pdf.pages[0]
- >>> for operands, command in parse_content_stream(page):
- >>> print(command)
-
- .. versionchanged:: 3.0
- Returns a list of ``ContentStreamInstructions`` instead of a list
- of (operand, operator) tuples. The returned items are duck-type compatible
- with the previous returned items.
- """
- if not isinstance(page_or_stream, (Object, Page)):
- raise TypeError("stream must be a pikepdf.Object or pikepdf.Page")
-
- if (
- isinstance(page_or_stream, Object)
- and page_or_stream._type_code != ObjectType.stream
- and page_or_stream.get('/Type') != '/Page'
- ):
- raise TypeError("parse_content_stream called on page or stream object")
-
- if isinstance(page_or_stream, Page):
- page_or_stream = page_or_stream.obj
-
- try:
- if page_or_stream.get('/Type') == '/Page':
- page = page_or_stream
- instructions = cast(
- List[ContentStreamInstructions],
- page._parse_page_contents_grouped(operators),
- )
- else:
- stream = page_or_stream
- instructions = cast(
- List[ContentStreamInstructions],
- Object._parse_stream_grouped(stream, operators),
- )
- except PdfError as e:
- if 'supposed to be a stream or an array' in str(e):
- raise TypeError("parse_content_stream called on non-stream Object") from e
- raise e from e
-
- return instructions
-
-
-def unparse_content_stream(
- instructions: Collection[UnparseableContentStreamInstructions],
-) -> bytes:
- """Convert collection of instructions to bytes suitable for storing in PDF.
-
- Given a parsed list of instructions/operand-operators, convert to bytes suitable
- for embedding in a PDF. In PDF the operator always follows the operands.
-
- Args:
- instructions: collection of instructions such as is returned
- by :func:`parse_content_stream()`
-
- Returns:
- A binary content stream, suitable for attaching to a Pdf.
- To attach to a Pdf, use :meth:`Pdf.make_stream()``.
-
- .. versionchanged:: 3.0
- Now accept collections that contain any mixture of
- ``ContentStreamInstruction``, ``ContentStreamInlineImage``, and the older
- operand-operator tuples from pikepdf 2.x.
- """
- try:
- return _qpdf._unparse_content_stream(instructions)
- except (ValueError, TypeError, RuntimeError) as e:
- raise PdfParsingError(
- "While unparsing a content stream, an error occurred"
- ) from e
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/_transcoding.py b/env/lib/python3.10/site-packages/pikepdf/models/_transcoding.py
deleted file mode 100644
index e54facf..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/_transcoding.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-from __future__ import annotations
-
-import struct
-from typing import Any, Callable, NamedTuple, Union
-
-from PIL import Image
-from PIL.TiffTags import TAGS_V2 as TIFF_TAGS
-
-BytesLike = Union[bytes, memoryview]
-MutableBytesLike = Union[bytearray, memoryview]
-
-
-def _next_multiple(n: int, k: int) -> int:
- """Return the multiple of k that is greater than or equal n.
-
- >>> _next_multiple(101, 4)
- 104
- >>> _next_multiple(100, 4)
- 100
- """
- div, mod = divmod(n, k)
- if mod > 0:
- div += 1
- return div * k
-
-
-def unpack_subbyte_pixels(
- packed: BytesLike, size: tuple[int, int], bits: int, scale: int = 0
-) -> tuple[BytesLike, int]:
- """Unpack subbyte *bits* pixels into full bytes and rescale.
-
- When scale is 0, the appropriate scale is calculated.
- e.g. for 2-bit, the scale is adjusted so that
- 0b00 = 0.00 = 0x00
- 0b01 = 0.33 = 0x55
- 0b10 = 0.66 = 0xaa
- 0b11 = 1.00 = 0xff
- When scale is 1, no scaling is applied, appropriate when
- the bytes are palette indexes.
- """
- width, height = size
- bits_per_byte = 8 // bits
- stride = _next_multiple(width, bits_per_byte)
- buffer = bytearray(bits_per_byte * stride * height)
- max_read = len(buffer) // bits_per_byte
- if scale == 0:
- scale = 255 / ((2**bits) - 1)
- if bits == 4:
- _4bit_inner_loop(packed[:max_read], buffer, scale)
- elif bits == 2:
- _2bit_inner_loop(packed[:max_read], buffer, scale)
- # elif bits == 1:
- # _1bit_inner_loop(packed[:max_read], buffer, scale)
- else:
- raise NotImplementedError(bits)
- return memoryview(buffer), stride
-
-
-# def _1bit_inner_loop(in_: BytesLike, out: MutableBytesLike, scale: int) -> None:
-# """Unpack 1-bit values to their 8-bit equivalents.
-
-# Thus *out* must be 8x at long as *in*.
-# """
-# for n, val in enumerate(in_):
-# out[8 * n + 0] = int((val >> 7) & 0b1) * scale
-# out[8 * n + 1] = int((val >> 6) & 0b1) * scale
-# out[8 * n + 2] = int((val >> 5) & 0b1) * scale
-# out[8 * n + 3] = int((val >> 4) & 0b1) * scale
-# out[8 * n + 4] = int((val >> 3) & 0b1) * scale
-# out[8 * n + 5] = int((val >> 2) & 0b1) * scale
-# out[8 * n + 6] = int((val >> 1) & 0b1) * scale
-# out[8 * n + 7] = int((val >> 0) & 0b1) * scale
-
-
-def _2bit_inner_loop(in_: BytesLike, out: MutableBytesLike, scale: int) -> None:
- """Unpack 2-bit values to their 8-bit equivalents.
-
- Thus *out* must be 4x at long as *in*.
- """
- for n, val in enumerate(in_):
- out[4 * n] = int((val >> 6) * scale)
- out[4 * n + 1] = int(((val >> 4) & 0b11) * scale)
- out[4 * n + 2] = int(((val >> 2) & 0b11) * scale)
- out[4 * n + 3] = int((val & 0b11) * scale)
-
-
-def _4bit_inner_loop(in_: BytesLike, out: MutableBytesLike, scale: int) -> None:
- """Unpack 4-bit values to their 8-bit equivalents.
-
- Thus *out* must be 2x at long as *in*.
- """
- for n, val in enumerate(in_):
- out[2 * n] = int((val >> 4) * scale)
- out[2 * n + 1] = int((val & 0b1111) * scale)
-
-
-def image_from_byte_buffer(buffer: BytesLike, size: tuple[int, int], stride: int):
- """Use Pillow to create one-component image from a byte buffer.
-
- *stride* is the number of bytes per row, and is essential for packed bits
- with odd image widths.
- """
- ystep = 1 # image is top to bottom in memory
- return Image.frombuffer('L', size, buffer, "raw", 'L', stride, ystep)
-
-
-def _make_rgb_palette(gray_palette: bytes) -> bytes:
- palette = b''
- for entry in gray_palette:
- palette += bytes([entry]) * 3
- return palette
-
-
-def _depalettize_cmyk(buffer: BytesLike, palette: BytesLike):
- with memoryview(buffer) as mv:
- output = bytearray(4 * len(mv))
- for n, pal_idx in enumerate(mv):
- output[4 * n : 4 * (n + 1)] = palette[4 * pal_idx : 4 * (pal_idx + 1)]
- return output
-
-
-def image_from_buffer_and_palette(
- buffer: BytesLike,
- size: tuple[int, int],
- stride: int,
- base_mode: str,
- palette: BytesLike,
-) -> Image.Image:
- """Construct an image from a byte buffer and apply the palette.
-
- 1/2/4-bit images must be unpacked (no scaling!) to byte buffers first, such
- that every 8-bit integer is an index into the palette.
- """
- # Reminder Pillow palette byte order unintentionally changed in 8.3.0
- # https://github.com/python-pillow/Pillow/issues/5595
- # 8.2.0: all aligned by channel (very nonstandard)
- # 8.3.0: all channels for one color followed by the next color (e.g. RGBRGBRGB)
-
- if base_mode == 'RGB':
- im = image_from_byte_buffer(buffer, size, stride)
- im.putpalette(palette, rawmode=base_mode)
- elif base_mode == 'L':
- # Pillow does not fully support palettes with rawmode='L'.
- # Convert to RGB palette.
- gray_palette = _make_rgb_palette(palette)
- im = image_from_byte_buffer(buffer, size, stride)
- im.putpalette(gray_palette, rawmode='RGB')
- elif base_mode == 'CMYK':
- # Pillow does not support CMYK with palettes; convert manually
- output = _depalettize_cmyk(buffer, palette)
- im = Image.frombuffer('CMYK', size, data=output, decoder_name='raw')
- else:
- raise NotImplementedError(f'palette with {base_mode}')
- return im
-
-
-def fix_1bit_palette_image(
- im: Image.Image, base_mode: str, palette: BytesLike
-) -> Image.Image:
- """Apply palettes to 1-bit images."""
- im = im.convert('P')
- if base_mode == 'RGB' and len(palette) == 6:
- # rgbrgb -> rgb000000...rgb
- palette = palette[0:3] + (b'\x00\x00\x00' * (256 - 2)) + palette[3:6]
- im.putpalette(palette, rawmode='RGB')
- elif base_mode == 'L':
- try:
- im.putpalette(palette, rawmode='L')
- except ValueError as e:
- if 'unrecognized raw mode' in str(e):
- rgb_palette = _make_rgb_palette(palette)
- im.putpalette(rgb_palette, rawmode='RGB')
- return im
-
-
-def generate_ccitt_header(
- size: tuple[int, int],
- data_length: int,
- ccitt_group: int,
- photometry: int,
- icc: bytes,
-) -> bytes:
- """Generate binary CCITT header for image with given parameters."""
- tiff_header_struct = '<' + '2s' + 'H' + 'L' + 'H'
-
- tag_keys = {tag.name: key for key, tag in TIFF_TAGS.items()} # type: ignore
- ifd_struct = '<HHLL'
-
- class IFD(NamedTuple):
- key: int
- typecode: Any
- count_: int
- data: int | Callable[[], int | None]
-
- ifds: list[IFD] = []
-
- def header_length(ifd_count) -> int:
- return (
- struct.calcsize(tiff_header_struct)
- + struct.calcsize(ifd_struct) * ifd_count
- + 4
- )
-
- def add_ifd(tag_name: str, data: int | Callable[[], int | None], count: int = 1):
- key = tag_keys[tag_name]
- typecode = TIFF_TAGS[key].type # type: ignore
- ifds.append(IFD(key, typecode, count, data))
-
- image_offset = None
- width, height = size
- add_ifd('ImageWidth', width)
- add_ifd('ImageLength', height)
- add_ifd('BitsPerSample', 1)
- add_ifd('Compression', ccitt_group)
- add_ifd('PhotometricInterpretation', int(photometry))
- add_ifd('StripOffsets', lambda: image_offset)
- add_ifd('RowsPerStrip', height)
- add_ifd('StripByteCounts', data_length)
-
- icc_offset = 0
- if icc:
- add_ifd('ICCProfile', lambda: icc_offset, count=len(icc))
-
- icc_offset = header_length(len(ifds))
- image_offset = icc_offset + len(icc)
-
- ifd_args = [(arg() if callable(arg) else arg) for ifd in ifds for arg in ifd]
- tiff_header = struct.pack(
- (tiff_header_struct + ifd_struct[1:] * len(ifds) + 'L'),
- b'II', # Byte order indication: Little endian
- 42, # Version number (always 42)
- 8, # Offset to first IFD
- len(ifds), # Number of tags in IFD
- *ifd_args,
- 0, # Last IFD
- )
-
- if icc:
- tiff_header += icc
- return tiff_header
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/encryption.py b/env/lib/python3.10/site-packages/pikepdf/models/encryption.py
deleted file mode 100644
index d6b5036..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/encryption.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""For managing PDF encryption."""
-
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, Any, NamedTuple, cast
-
-if sys.version_info >= (3, 8):
- from typing import Literal
-else:
- from typing_extensions import Literal # pragma: no cover
-
-if TYPE_CHECKING:
- from pikepdf._qpdf import EncryptionMethod
-
-
-class Permissions(NamedTuple):
- """
- Stores the user-level permissions for an encrypted PDF.
-
- A compliant PDF reader/writer should enforce these restrictions on people
- who have the user password and not the owner password. In practice, either
- password is sufficient to decrypt all document contents. A person who has
- the owner password should be allowed to modify the document in any way.
- pikepdf does not enforce the restrictions in any way; it is up to application
- developers to enforce them as they see fit.
-
- Unencrypted PDFs implicitly have all permissions allowed. Permissions can
- only be changed when a PDF is saved.
- """
-
- accessibility: bool = True
- """Can users use screen readers and accessibility tools to read the PDF?"""
-
- extract: bool = True
- """Can users extract contents?"""
-
- modify_annotation: bool = True
- """Can users modify annotations?"""
-
- modify_assembly: bool = False
- """Can users arrange document contents?"""
-
- modify_form: bool = True
- """Can users fill out forms?"""
-
- modify_other: bool = True
- """Can users modify the document?"""
-
- print_lowres: bool = True
- """Can users print the document at low resolution?"""
-
- print_highres: bool = True
- """Can users print the document at high resolution?"""
-
-
-DEFAULT_PERMISSIONS = Permissions()
-
-
-class EncryptionInfo:
- """
- Reports encryption information for an encrypted PDF.
-
- This information may not be changed, except when a PDF is saved.
- This object is not used to specify the encryption settings to save
- a PDF, due to non-overlapping information requirements.
- """
-
- def __init__(self, encdict: dict[str, Any]):
- """
- Initialize EncryptionInfo.
-
- Generally pikepdf will initialize and return it.
-
- Args:
- encdict: Python dictionary containing encryption settings.
- """
- self._encdict = encdict
-
- @property
- def R(self) -> int:
- """Revision number of the security handler."""
- return int(self._encdict['R'])
-
- @property
- def V(self) -> int:
- """Version of PDF password algorithm."""
- return int(self._encdict['V'])
-
- @property
- def P(self) -> int:
- """Return encoded permission bits.
-
- See :meth:`Pdf.allow` instead.
- """
- return int(self._encdict['P'])
-
- @property
- def stream_method(self) -> EncryptionMethod:
- """Encryption method used to encode streams."""
- return cast('EncryptionMethod', self._encdict['stream'])
-
- @property
- def string_method(self) -> EncryptionMethod:
- """Encryption method used to encode strings."""
- return cast('EncryptionMethod', self._encdict['string'])
-
- @property
- def file_method(self) -> EncryptionMethod:
- """Encryption method used to encode the whole file."""
- return cast('EncryptionMethod', self._encdict['file'])
-
- @property
- def user_password(self) -> bytes:
- """If possible, return the user password.
-
- The user password can only be retrieved when a PDF is opened
- with the owner password and when older versions of the
- encryption algorithm are used.
-
- The password is always returned as ``bytes`` even if it has
- a clear Unicode representation.
- """
- return bytes(self._encdict['user_passwd'])
-
- @property
- def encryption_key(self) -> bytes:
- """Return the RC4 or AES encryption key used for this file."""
- return bytes(self._encdict['encryption_key'])
-
- @property
- def bits(self) -> int:
- """Return the number of bits in the encryption algorithm.
-
- e.g. if the algorithm is AES-256, this returns 256.
- """
- return len(self._encdict['encryption_key']) * 8
-
-
-class Encryption(NamedTuple):
- """Specify the encryption settings to apply when a PDF is saved."""
-
- owner: str = ''
- """The owner password to use. This allows full control
- of the file. If blank, the PDF will be encrypted and
- present as "(SECURED)" in PDF viewers. If the owner password
- is blank, the user password should be as well."""
-
- user: str = ''
- """The user password to use. With this password, some
- restrictions will be imposed by a typical PDF reader.
- If blank, the PDF can be opened by anyone, but only modified
- as allowed by the permissions in ``allow``."""
-
- R: Literal[2, 3, 4, 5, 6] = 6
- """Select the security handler algorithm to use. Choose from:
- ``2``, ``3``, ``4`` or ``6``. By default, the highest version of
- is selected (``6``). ``5`` is a deprecated algorithm that should
- not be used."""
-
- allow: Permissions = DEFAULT_PERMISSIONS
- """The permissions to set.
- If omitted, all permissions are granted to the user."""
-
- aes: bool = True
- """If True, request the AES algorithm. If False, use RC4.
- If omitted, AES is selected whenever possible (R >= 4)."""
-
- metadata: bool = True
- """If True, also encrypt the PDF metadata. If False,
- metadata is not encrypted. Reading document metadata without
- decryption may be desirable in some cases. Requires ``aes=True``.
- If omitted, metadata is encrypted whenever possible."""
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/image.py b/env/lib/python3.10/site-packages/pikepdf/models/image.py
deleted file mode 100644
index 5981a8e..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/image.py
+++ /dev/null
@@ -1,991 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Extract images embedded in PDF."""
-
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from decimal import Decimal
-from io import BytesIO
-from itertools import zip_longest
-from pathlib import Path
-from shutil import copyfileobj
-from typing import Any, BinaryIO, Callable, NamedTuple, Sequence, TypeVar, cast
-
-from PIL import Image
-from PIL.ImageCms import ImageCmsProfile
-
-from pikepdf import (
- Array,
- Dictionary,
- Name,
- Object,
- Pdf,
- PdfError,
- Stream,
- StreamDecodeLevel,
- String,
- jbig2,
-)
-from pikepdf._exceptions import DependencyError
-from pikepdf._qpdf import Buffer
-from pikepdf._version import __version__
-from pikepdf.models import _transcoding
-
-T = TypeVar('T')
-
-
-class UnsupportedImageTypeError(Exception):
- """This image is formatted in a way pikepdf does not supported."""
-
-
-class NotExtractableError(Exception):
- """Indicates that an image cannot be directly extracted."""
-
-
-class HifiPrintImageNotTranscodableError(NotExtractableError):
- """Image contains high fidelity printing information and cannot be extracted."""
-
-
-class InvalidPdfImageError(Exception):
- """This image is not valid according to the PDF 1.7 specification."""
-
-
-def _array_str(value: Object | str | list):
- """Simplify pikepdf objects to array of str. Keep Streams and dictionaries intact."""
-
- def _convert(item):
- if isinstance(item, (list, Array)):
- return [_convert(subitem) for subitem in item]
- if isinstance(item, (Stream, Dictionary, bytes, int)):
- return item
- if isinstance(item, (Name, str)):
- return str(item)
- if isinstance(item, (String)):
- return bytes(item)
- raise NotImplementedError(value)
-
- result = _convert(value)
- if not isinstance(result, list):
- result = [result]
- return result
-
-
-def _ensure_list(value: list[Object] | Dictionary | Array) -> list[Object]:
- """Ensure value is a list of pikepdf.Object, if it was not already.
-
- To support DecodeParms which can be present as either an array of dicts or a single
- dict. It's easier to convert to an array of one dict.
- """
- if isinstance(value, list):
- return value
- return list(value.wrap_in_array().as_list())
-
-
-def _metadata_from_obj(
- obj: Dictionary | Stream, name: str, type_: Callable[[Any], T], default: T
-) -> T | None:
- """Retrieve metadata from a dictionary or stream, and ensure it is the expected type."""
- val = getattr(obj, name, default)
- try:
- return type_(val)
- except TypeError:
- if val is None:
- return None
- raise NotImplementedError('Metadata access for ' + name)
-
-
-class PaletteData(NamedTuple):
- """Returns the color space and binary representation of the palette.
-
- ``base_colorspace`` is typically ``"RGB"`` or ``"L"`` (for grayscale).
-
- ``palette`` is typically 256 or 256*3=768 bytes, for grayscale and RGB color
- respectively, with each unit/triplet being the grayscale/RGB triplet values.
- """
-
- base_colorspace: str
- palette: bytes
-
-
-class PdfImageBase(ABC):
- """Abstract base class for images."""
-
- SIMPLE_COLORSPACES = {'/DeviceRGB', '/DeviceGray', '/CalRGB', '/CalGray'}
- MAIN_COLORSPACES = SIMPLE_COLORSPACES | {'/DeviceCMYK', '/CalCMYK', '/ICCBased'}
- PRINT_COLORSPACES = {'/Separation', '/DeviceN'}
-
- @abstractmethod
- def _metadata(self, name: str, type_: Callable[[Any], T], default: T) -> T:
- """Get metadata for this image type."""
-
- @property
- def width(self) -> int:
- """Width of the image data in pixels."""
- return self._metadata('Width', int, 0)
-
- @property
- def height(self) -> int:
- """Height of the image data in pixels."""
- return self._metadata('Height', int, 0)
-
- @property
- def image_mask(self) -> bool:
- """Return ``True`` if this is an image mask."""
- return self._metadata('ImageMask', bool, False)
-
- @property
- def _bpc(self) -> int | None:
- """Bits per component for this image (low-level)."""
- return self._metadata('BitsPerComponent', int, 0)
-
- @property
- def _colorspaces(self):
- """Colorspace (low-level)."""
- return self._metadata('ColorSpace', _array_str, [])
-
- @property
- def filters(self):
- """List of names of the filters that we applied to encode this image."""
- return self._metadata('Filter', _array_str, [])
-
- @property
- def decode_parms(self):
- """List of the /DecodeParms, arguments to filters."""
- return self._metadata('DecodeParms', _ensure_list, [])
-
- @property
- def colorspace(self) -> str | None:
- """PDF name of the colorspace that best describes this image."""
- if self.image_mask:
- return None # Undefined for image masks
- if self._colorspaces:
- if self._colorspaces[0] in self.MAIN_COLORSPACES:
- return self._colorspaces[0]
- if self._colorspaces[0] == '/Indexed':
- subspace = self._colorspaces[1]
- if isinstance(subspace, str) and subspace in self.MAIN_COLORSPACES:
- return subspace
- if isinstance(subspace, list) and subspace[0] in (
- '/ICCBased',
- '/DeviceN',
- ):
- return subspace[0]
- if self._colorspaces[0] == '/DeviceN':
- return '/DeviceN'
-
- raise NotImplementedError(
- "not sure how to get colorspace: " + repr(self._colorspaces)
- )
-
- @property
- def bits_per_component(self) -> int:
- """Bits per component of this image."""
- if self._bpc is None or self._bpc == 0:
- return 1 if self.image_mask else 8
- return self._bpc
-
- @property
- @abstractmethod
- def icc(self) -> ImageCmsProfile | None:
- """Return ICC profile for this image if one is defined."""
-
- @property
- def indexed(self) -> bool:
- """Check if the image has a defined color palette."""
- return '/Indexed' in self._colorspaces
-
- def _colorspace_has_name(self, name):
- try:
- cs = self._colorspaces
- if cs[0] == '/Indexed' and cs[1][0] == name:
- return True
- if cs[0] == name:
- return True
- except (IndexError, AttributeError, KeyError):
- pass
- return False
-
- @property
- def is_device_n(self) -> bool:
- """Check if image has a /DeviceN (complex printing) colorspace."""
- return self._colorspace_has_name('/DeviceN')
-
- @property
- def is_separation(self) -> bool:
- """Check if image has a /DeviceN (complex printing) colorspace."""
- return self._colorspace_has_name('/Separation')
-
- @property
- def size(self) -> tuple[int, int]:
- """Size of image as (width, height)."""
- return self.width, self.height
-
- def _approx_mode_from_icc(self):
- if self.indexed:
- icc_profile = self._colorspaces[1][1]
- else:
- icc_profile = self._colorspaces[1]
- icc_profile_nchannels = int(icc_profile['/N'])
-
- if icc_profile_nchannels == 1:
- return 'L'
-
- # Multiple channels, need to open the profile and look
- mode_from_xcolor_space = {'RGB ': 'RGB', 'CMYK': 'CMYK'}
- xcolor_space = self.icc.profile.xcolor_space
- return mode_from_xcolor_space.get(xcolor_space, '')
-
- @property
- def mode(self) -> str:
- """``PIL.Image.mode`` equivalent for this image, where possible.
-
- If an ICC profile is attached to the image, we still attempt to resolve a Pillow
- mode.
- """
- m = ''
- if self.is_device_n:
- m = 'DeviceN'
- elif self.is_separation:
- m = 'Separation'
- elif self.indexed:
- m = 'P'
- elif self.colorspace == '/DeviceGray' and self.bits_per_component == 1:
- m = '1'
- elif self.colorspace == '/DeviceGray' and self.bits_per_component > 1:
- m = 'L'
- elif self.colorspace == '/DeviceRGB':
- m = 'RGB'
- elif self.colorspace == '/DeviceCMYK':
- m = 'CMYK'
- elif self.colorspace == '/ICCBased':
- try:
- m = self._approx_mode_from_icc()
- except (ValueError, TypeError) as e:
- raise NotImplementedError(
- "Not sure how to handle PDF image of this type"
- ) from e
- if m == '':
- raise NotImplementedError(
- "Not sure how to handle PDF image of this type"
- ) from None
- return m
-
- @property
- def filter_decodeparms(self):
- """Return normalized the Filter and DecodeParms data.
-
- PDF has a lot of possible data structures concerning /Filter and
- /DecodeParms. /Filter can be absent or a name or an array, /DecodeParms
- can be absent or a dictionary (if /Filter is a name) or an array (if
- /Filter is an array). When both are arrays the lengths match.
-
- Normalize this into:
- [(/FilterName, {/DecodeParmName: Value, ...}), ...]
-
- The order of /Filter matters as indicates the encoding/decoding sequence.
- """
- return list(zip_longest(self.filters, self.decode_parms, fillvalue={}))
-
- @property
- def palette(self) -> PaletteData | None:
- """Retrieve the color palette for this image if applicable."""
- if not self.indexed:
- return None
- try:
- _idx, base, _hival, lookup = self._colorspaces
- except ValueError as e:
- raise ValueError('Not sure how to interpret this palette') from e
- if self.icc or self.is_device_n or self.is_separation:
- base = str(base[0])
- else:
- base = str(base)
- lookup = bytes(lookup)
- if base not in self.MAIN_COLORSPACES and base not in self.PRINT_COLORSPACES:
- raise NotImplementedError(f"not sure how to interpret this palette: {base}")
- if base == '/DeviceRGB':
- base = 'RGB'
- elif base == '/DeviceGray':
- base = 'L'
- elif base == '/DeviceCMYK':
- base = 'CMYK'
- elif base == '/DeviceN':
- base = 'DeviceN'
- elif base == '/Separation':
- base = 'Separation'
- elif base == '/ICCBased':
- base = self._approx_mode_from_icc()
- return PaletteData(base, lookup)
-
- @abstractmethod
- def as_pil_image(self) -> Image.Image:
- """Convert this PDF image to a Python PIL (Pillow) image."""
-
- @staticmethod
- def _remove_simple_filters(obj: Stream, filters: Sequence[str]):
- """Remove simple lossless compression where it appears.
-
- Args:
- obj: the compressed object
- filters: all files on the data
- """
- COMPLEX_FILTERS = {
- '/DCTDecode',
- '/JPXDecode',
- '/JBIG2Decode',
- '/CCITTFaxDecode',
- }
-
- idx = [n for n, item in enumerate(filters) if item in COMPLEX_FILTERS]
- if idx:
- if len(idx) > 1:
- raise NotImplementedError(
- f"Object {obj.objgen} has compound complex filters: {filters}. "
- "We cannot decompress this."
- )
- simple_filters = filters[: idx[0]]
- complex_filters = filters[idx[0] :]
- else:
- simple_filters = filters
- complex_filters = []
-
- if not simple_filters:
- return obj.read_raw_bytes(), complex_filters
-
- original_filters = obj.Filter
- try:
- obj.Filter = Array([Name(s) for s in simple_filters])
- data = obj.read_bytes(StreamDecodeLevel.specialized)
- finally:
- obj.Filter = original_filters
-
- return data, complex_filters
-
-
-class PdfImage(PdfImageBase):
- """Support class to provide a consistent API for manipulating PDF images.
-
- The data structure for images inside PDFs is irregular and complex,
- making it difficult to use without introducing errors for less
- typical cases. This class addresses these difficulties by providing a
- regular, Pythonic API similar in spirit (and convertible to) the Python
- Pillow imaging library.
- """
-
- obj: Stream
- _icc: ImageCmsProfile | None
-
- def __new__(cls, obj):
- """Construct a PdfImage... or a PdfJpxImage if that is what we really are."""
- instance = super().__new__(cls)
- instance.__init__(obj)
- if '/JPXDecode' in instance.filters:
- instance = super().__new__(PdfJpxImage)
- instance.__init__(obj)
- return instance
-
- def __init__(self, obj: Stream):
- """Construct a PDF image from a Image XObject inside a PDF.
-
- ``pim = PdfImage(page.Resources.XObject['/ImageNN'])``
-
- Args:
- obj: an Image XObject
- """
- if isinstance(obj, Stream) and obj.stream_dict.get("/Subtype") != "/Image":
- raise TypeError("can't construct PdfImage from non-image")
- self.obj = obj
- self._icc = None
-
- def __eq__(self, other):
- if not isinstance(other, PdfImageBase):
- return NotImplemented
- return self.obj == other.obj
-
- @classmethod
- def _from_pil_image(cls, *, pdf, page, name, image): # pragma: no cover
- """Insert a PIL image into a PDF (rudimentary).
-
- Args:
- pdf (pikepdf.Pdf): the PDF to attach the image to
- page (pikepdf.Object): the page to attach the image to
- name (str or pikepdf.Name): the name to set the image
- image (PIL.Image.Image): the image to insert
- """
- data = image.tobytes()
-
- imstream = Stream(pdf, data)
- imstream.Type = Name('/XObject')
- imstream.Subtype = Name('/Image')
- if image.mode == 'RGB':
- imstream.ColorSpace = Name('/DeviceRGB')
- elif image.mode in ('1', 'L'):
- imstream.ColorSpace = Name('/DeviceGray')
- imstream.BitsPerComponent = 1 if image.mode == '1' else 8
- imstream.Width = image.width
- imstream.Height = image.height
-
- page.Resources.XObject[name] = imstream
-
- return cls(imstream)
-
- def _metadata(self, name, type_, default):
- return _metadata_from_obj(self.obj, name, type_, default)
-
- @property
- def _iccstream(self):
- if self.colorspace == '/ICCBased':
- if not self.indexed:
- return self._colorspaces[1]
- assert isinstance(self._colorspaces[1], list)
- return self._colorspaces[1][1]
- raise NotImplementedError("Don't know how to find ICC stream for image")
-
- @property
- def icc(self) -> ImageCmsProfile | None:
- """If an ICC profile is attached, return a Pillow object that describe it.
-
- Most of the information may be found in ``icc.profile``.
- """
- if self.colorspace not in ('/ICCBased', '/Indexed'):
- return None
- if not self._icc:
- iccstream = self._iccstream
- iccbuffer = iccstream.get_stream_buffer()
- iccbytesio = BytesIO(iccbuffer)
- try:
- self._icc = ImageCmsProfile(iccbytesio)
- except OSError as e:
- if str(e) == 'cannot open profile from string':
- # ICC profile is corrupt
- raise UnsupportedImageTypeError(
- "ICC profile corrupt or not readable"
- ) from e
- return self._icc
-
- def _extract_direct(self, *, stream: BinaryIO) -> str:
- """Attempt to extract the image directly to a usable image file.
-
- If there is no way to extract the image without decompressing or
- transcoding then raise an exception. The type and format of image
- generated will vary.
-
- Args:
- stream: Writable file stream to write data to, e.g. an open file
- """
-
- def normal_dct_rgb() -> bool:
- # Normal DCTDecode RGB images have the default value of
- # /ColorTransform 1 and are actually in YUV. Such a file can be
- # saved as a standard JPEG. RGB JPEGs without YUV conversion can't
- # be saved as JPEGs, and are probably bugs. Some software in the
- # wild actually produces RGB JPEGs in PDFs (probably a bug).
- DEFAULT_CT_RGB = 1
- ct = self.filter_decodeparms[0][1].get('/ColorTransform', DEFAULT_CT_RGB)
- return self.mode == 'RGB' and ct == DEFAULT_CT_RGB
-
- def normal_dct_cmyk() -> bool:
- # Normal DCTDecode CMYKs have /ColorTransform 0 and can be saved.
- # There is a YUVK colorspace but CMYK JPEGs don't generally use it
- DEFAULT_CT_CMYK = 0
- ct = self.filter_decodeparms[0][1].get('/ColorTransform', DEFAULT_CT_CMYK)
- return self.mode == 'CMYK' and ct == DEFAULT_CT_CMYK
-
- data, filters = self._remove_simple_filters(self.obj, self.filters)
-
- if filters == ['/CCITTFaxDecode']:
- if self.colorspace == '/ICCBased':
- icc = self._iccstream.read_bytes()
- else:
- icc = None
- stream.write(self._generate_ccitt_header(data, icc=icc))
- stream.write(data)
- return '.tif'
- if filters == ['/DCTDecode'] and (
- self.mode == 'L' or normal_dct_rgb() or normal_dct_cmyk()
- ):
- stream.write(data)
- return '.jpg'
-
- raise NotExtractableError()
-
- def _extract_transcoded_1248bits(self) -> Image.Image:
- """Extract an image when there are 1/2/4/8 bits packed in byte data."""
- stride = 0 # tell Pillow to calculate stride from line width
- scale = 0 if self.mode == 'L' else 1
- if self.bits_per_component in (2, 4):
- buffer, stride = _transcoding.unpack_subbyte_pixels(
- self.read_bytes(), self.size, self.bits_per_component, scale
- )
- elif self.bits_per_component == 8:
- buffer = cast(memoryview, self.get_stream_buffer())
- else:
- raise InvalidPdfImageError("BitsPerComponent must be 1, 2, 4, 8, or 16")
-
- if self.mode == 'P' and self.palette is not None:
- base_mode, palette = self.palette
- im = _transcoding.image_from_buffer_and_palette(
- buffer,
- self.size,
- stride,
- base_mode,
- palette,
- )
- else:
- im = _transcoding.image_from_byte_buffer(buffer, self.size, stride)
- return im
-
- def _extract_transcoded_1bit(self) -> Image.Image:
- if self.mode in ('RGB', 'CMYK'):
- raise UnsupportedImageTypeError("1-bit RGB and CMYK are not supported")
- try:
- data = self.read_bytes()
- except (RuntimeError, PdfError) as e:
- if (
- 'read_bytes called on unfilterable stream' in str(e)
- and not jbig2.get_decoder().available()
- ):
- raise DependencyError(
- "jbig2dec - not installed or installed version is too old "
- "(older than version 0.15)"
- ) from None
- raise
-
- im = Image.frombytes('1', self.size, data)
-
- if self.palette is not None:
- base_mode, palette = self.palette
- im = _transcoding.fix_1bit_palette_image(im, base_mode, palette)
-
- return im
-
- def _extract_transcoded(self) -> Image.Image:
- if self.mode in {'DeviceN', 'Separation'}:
- raise HifiPrintImageNotTranscodableError()
-
- if self.mode == 'RGB' and self.bits_per_component == 8:
- # Cannot use the zero-copy .get_stream_buffer here, we have 3-byte
- # RGB and Pillow needs RGBX.
- im = Image.frombuffer(
- 'RGB', self.size, self.read_bytes(), 'raw', 'RGB', 0, 1
- )
- elif self.mode == 'CMYK' and self.bits_per_component == 8:
- im = Image.frombuffer(
- 'CMYK', self.size, self.get_stream_buffer(), 'raw', 'CMYK', 0, 1
- )
- # elif self.mode == '1':
- elif self.bits_per_component == 1:
- im = self._extract_transcoded_1bit()
- elif self.mode in ('L', 'P') and self.bits_per_component <= 8:
- im = self._extract_transcoded_1248bits()
- else:
- raise UnsupportedImageTypeError(repr(self) + ", " + repr(self.obj))
-
- if self.colorspace == '/ICCBased' and self.icc is not None:
- im.info['icc_profile'] = self.icc.tobytes()
-
- return im
-
- def _extract_to_stream(self, *, stream: BinaryIO) -> str:
- """Extract the image to a stream.
-
- If possible, the compressed data is extracted and inserted into
- a compressed image file format without transcoding the compressed
- content. If this is not possible, the data will be decompressed
- and extracted to an appropriate format.
-
- Args:
- stream: Writable stream to write data to
-
- Returns:
- The file format extension.
- """
- try:
- return self._extract_direct(stream=stream)
- except NotExtractableError:
- pass
-
- im = None
- try:
- im = self._extract_transcoded()
- if im.mode == 'CMYK':
- im.save(stream, format='tiff', compression='tiff_adobe_deflate')
- return '.tiff'
- if im:
- im.save(stream, format='png')
- return '.png'
- except PdfError as e:
- if 'called on unfilterable stream' in str(e):
- raise UnsupportedImageTypeError(repr(self)) from e
- raise
- finally:
- if im:
- im.close()
-
- raise UnsupportedImageTypeError(repr(self))
-
- def extract_to(
- self, *, stream: BinaryIO | None = None, fileprefix: str = ''
- ) -> str:
- """Extract the image directly to a usable image file.
-
- If possible, the compressed data is extracted and inserted into
- a compressed image file format without transcoding the compressed
- content. If this is not possible, the data will be decompressed
- and extracted to an appropriate format.
-
- Because it is not known until attempted what image format will be
- extracted, users should not assume what format they are getting back.
- When saving the image to a file, use a temporary filename, and then
- rename the file to its final name based on the returned file extension.
-
- Images might be saved as any of .png, .jpg, or .tiff.
-
- Examples:
- >>> im.extract_to(stream=bytes_io)
- '.png'
-
- >>> im.extract_to(fileprefix='/tmp/image00')
- '/tmp/image00.jpg'
-
- Args:
- stream: Writable stream to write data to.
- fileprefix (str or Path): The path to write the extracted image to,
- without the file extension.
-
- Returns:
- If *fileprefix* was provided, then the fileprefix with the
- appropriate extension. If no *fileprefix*, then an extension
- indicating the file type.
- """
- if bool(stream) == bool(fileprefix):
- raise ValueError("Cannot set both stream and fileprefix")
- if stream:
- return self._extract_to_stream(stream=stream)
-
- bio = BytesIO()
- extension = self._extract_to_stream(stream=bio)
- bio.seek(0)
- filepath = Path(str(Path(fileprefix)) + extension)
- with filepath.open('wb') as target:
- copyfileobj(bio, target)
- return str(filepath)
-
- def read_bytes(
- self, decode_level: StreamDecodeLevel = StreamDecodeLevel.specialized
- ) -> bytes:
- """Decompress this image and return it as unencoded bytes."""
- return self.obj.read_bytes(decode_level=decode_level)
-
- def get_stream_buffer(
- self, decode_level: StreamDecodeLevel = StreamDecodeLevel.specialized
- ) -> Buffer:
- """Access this image with the buffer protocol."""
- return self.obj.get_stream_buffer(decode_level=decode_level)
-
- def as_pil_image(self) -> Image.Image:
- """Extract the image as a Pillow Image, using decompression as necessary.
-
- Caller must close the image.
- """
- try:
- bio = BytesIO()
- self._extract_direct(stream=bio)
- bio.seek(0)
- return Image.open(bio)
- except NotExtractableError:
- pass
-
- im = self._extract_transcoded()
- if not im:
- raise UnsupportedImageTypeError(repr(self))
-
- return im
-
- def _generate_ccitt_header(self, data: bytes, icc: bytes | None = None) -> bytes:
- """Construct a CCITT G3 or G4 header from the PDF metadata."""
- # https://stackoverflow.com/questions/2641770/
- # https://www.itu.int/itudoc/itu-t/com16/tiff-fx/docs/tiff6.pdf
-
- if not self.decode_parms:
- raise ValueError("/CCITTFaxDecode without /DecodeParms")
- if self.decode_parms[0].get("/EncodedByteAlign", False):
- raise UnsupportedImageTypeError(
- "/CCITTFaxDecode with /EncodedByteAlign true"
- )
-
- k = self.decode_parms[0].get("/K", 0)
- if k < 0:
- ccitt_group = 4 # Pure two-dimensional encoding (Group 4)
- elif k > 0:
- ccitt_group = 3 # Group 3 2-D
- else:
- ccitt_group = 2 # Group 3 1-D
- _black_is_one = self.decode_parms[0].get("/BlackIs1", False)
- # PDF spec says:
- # BlackIs1: A flag indicating whether 1 bits shall be interpreted as black
- # pixels and 0 bits as white pixels, the reverse of the normal
- # PDF convention for image data. Default value: false.
- # TIFF spec says:
- # use 0 for white_is_zero (=> black is 1) MINISWHITE
- # use 1 for black_is_zero (=> white is 1) MINISBLACK
- # However, despite the documentation, it seems PDF viewers treat
- # photometry as 0 when ccitt is involved.
- # For example see
- # https://gitlab.gnome.org/GNOME/evince/-/blob/main/backend/tiff/tiff2ps.c#L852-865
- photometry = 0
-
- img_size = len(data)
- if icc is None:
- icc = b''
- return _transcoding.generate_ccitt_header(
- self.size, img_size, ccitt_group, photometry, icc
- )
-
- def show(self): # pragma: no cover
- """Show the image however PIL wants to."""
- self.as_pil_image().show()
-
- def __repr__(self):
- return (
- f'<pikepdf.PdfImage image mode={self.mode} '
- f'size={self.width}x{self.height} at {hex(id(self))}>'
- )
-
- def _repr_png_(self) -> bytes:
- """Display hook for IPython/Jupyter."""
- b = BytesIO()
- with self.as_pil_image() as im:
- im.save(b, 'PNG')
- return b.getvalue()
-
-
-class PdfJpxImage(PdfImage):
- """Support class for JPEG 2000 images. Implements the same API as :class:`PdfImage`.
-
- If you call PdfImage(object_that_is_actually_jpeg2000_image), pikepdf will return
- this class instead, due to the check in PdfImage.__new__.
- """
-
- def __init__(self, obj):
- """Initialize a JPEG 2000 image."""
- super().__init__(obj)
- self._jpxpil = self.as_pil_image()
-
- def __eq__(self, other):
- if not isinstance(other, PdfImageBase):
- return NotImplemented
- return (
- self.obj == other.obj
- and isinstance(other, PdfJpxImage)
- and self._jpxpil == other._jpxpil
- )
-
- def _extract_direct(self, *, stream: BinaryIO):
- data, filters = self._remove_simple_filters(self.obj, self.filters)
- if filters != ['/JPXDecode']:
- raise UnsupportedImageTypeError(self.filters)
- stream.write(data)
- return '.jp2'
-
- @property
- def _colorspaces(self):
- """Return the effective colorspace of a JPEG 2000 image.
-
- If the ColorSpace dictionary is present, the colorspace embedded in the
- JPEG 2000 data will be ignored, as required by the specification.
- """
- # (PDF 1.7 Table 89) If ColorSpace is present, any colour space
- # specifications in the JPEG2000 data shall be ignored.
- super_colorspaces = super()._colorspaces
- if super_colorspaces:
- return super_colorspaces
- if self._jpxpil.mode == 'L':
- return ['/DeviceGray']
- if self._jpxpil.mode == 'RGB':
- return ['/DeviceRGB']
- raise NotImplementedError('Complex JP2 colorspace')
-
- @property
- def _bpc(self) -> int:
- """Return 8, since bpc is not meaningful for JPEG 2000 encoding."""
- # (PDF 1.7 Table 89) If the image stream uses the JPXDecode filter, this
- # entry is optional and shall be ignored if present. The bit depth is
- # determined by the conforming reader in the process of decoding the
- # JPEG2000 image.
- return 8
-
- @property
- def indexed(self) -> bool:
- """Return False, since JPEG 2000 should not be indexed."""
- # Nothing in the spec precludes an Indexed JPXDecode image, except for
- # the fact that doing so is madness. Let's assume it no one is that
- # insane.
- return False
-
- def __repr__(self):
- return (
- f'<pikepdf.PdfJpxImage JPEG2000 image mode={self.mode} '
- f'size={self.width}x{self.height} at {hex(id(self))}>'
- )
-
-
-class PdfInlineImage(PdfImageBase):
- """Support class for PDF inline images. Implements the same API as :class:`PdfImage`."""
-
- # Inline images can contain abbreviations that we write automatically
- ABBREVS = {
- b'/W': b'/Width',
- b'/H': b'/Height',
- b'/BPC': b'/BitsPerComponent',
- b'/IM': b'/ImageMask',
- b'/CS': b'/ColorSpace',
- b'/F': b'/Filter',
- b'/DP': b'/DecodeParms',
- b'/G': b'/DeviceGray',
- b'/RGB': b'/DeviceRGB',
- b'/CMYK': b'/DeviceCMYK',
- b'/I': b'/Indexed',
- b'/AHx': b'/ASCIIHexDecode',
- b'/A85': b'/ASCII85Decode',
- b'/LZW': b'/LZWDecode',
- b'/RL': b'/RunLengthDecode',
- b'/CCF': b'/CCITTFaxDecode',
- b'/DCT': b'/DCTDecode',
- }
- REVERSE_ABBREVS = {v: k for k, v in ABBREVS.items()}
-
- _data: Object
- _image_object: tuple[Object, ...]
-
- def __init__(self, *, image_data: Object, image_object: tuple):
- """Construct wrapper for inline image.
-
- Args:
- image_data: data stream for image, extracted from content stream
- image_object: the metadata for image, also from content stream
- """
- # Convert the sequence of pikepdf.Object from the content stream into
- # a dictionary object by unparsing it (to bytes), eliminating inline
- # image abbreviations, and constructing a bytes string equivalent to
- # what an image XObject would look like. Then retrieve data from there
-
- self._data = image_data
- self._image_object = image_object
-
- reparse = b' '.join(
- self._unparse_obj(obj, remap_names=self.ABBREVS) for obj in image_object
- )
- try:
- reparsed_obj = Object.parse(b'<< ' + reparse + b' >>')
- except PdfError as e:
- raise PdfError("parsing inline " + reparse.decode('unicode_escape')) from e
- self.obj = reparsed_obj
-
- def __eq__(self, other):
- if not isinstance(other, PdfImageBase):
- return NotImplemented
- return (
- self.obj == other.obj
- and isinstance(other, PdfInlineImage)
- and (
- self._data._inline_image_raw_bytes()
- == other._data._inline_image_raw_bytes()
- )
- )
-
- @classmethod
- def _unparse_obj(cls, obj, remap_names):
- if isinstance(obj, Object):
- if isinstance(obj, Name):
- name = obj.unparse(resolved=True)
- assert isinstance(name, bytes)
- return remap_names.get(name, name)
- return obj.unparse(resolved=True)
- if isinstance(obj, bool):
- return b'true' if obj else b'false' # Lower case for PDF spec
- if isinstance(obj, (int, Decimal, float)):
- return str(obj).encode('ascii')
- raise NotImplementedError(repr(obj))
-
- def _metadata(self, name, type_, default):
- return _metadata_from_obj(self.obj, name, type_, default)
-
- def unparse(self) -> bytes:
- """Create the content stream bytes that reproduce this inline image."""
-
- def metadata_tokens():
- for metadata_obj in self._image_object:
- unparsed = self._unparse_obj(
- metadata_obj, remap_names=self.REVERSE_ABBREVS
- )
- assert isinstance(unparsed, bytes)
- yield unparsed
-
- def inline_image_tokens():
- yield b'BI\n'
- yield b' '.join(m for m in metadata_tokens())
- yield b'\nID\n'
- yield self._data._inline_image_raw_bytes()
- yield b'EI'
-
- return b''.join(inline_image_tokens())
-
- @property
- def icc(self): # pragma: no cover
- """Raise an exception since ICC profiles are not supported on inline images."""
- raise InvalidPdfImageError(
- "Inline images with ICC profiles are not supported in the PDF specification"
- )
-
- def __repr__(self):
- try:
- mode = self.mode
- except NotImplementedError:
- mode = '?'
- return (
- f'<pikepdf.PdfInlineImage image mode={mode} '
- f'size={self.width}x{self.height} at {hex(id(self))}>'
- )
-
- def _convert_to_pdfimage(self):
- # Construct a temporary PDF that holds this inline image, and...
- tmppdf = Pdf.new()
- tmppdf.add_blank_page(page_size=(self.width, self.height))
- tmppdf.pages[0].contents_add(
- f'{self.width} 0 0 {self.height} 0 0 cm'.encode('ascii'), prepend=True
- )
- tmppdf.pages[0].contents_add(self.unparse())
-
- # ...externalize it,
- tmppdf.pages[0].externalize_inline_images()
- raw_img = next(im for im in tmppdf.pages[0].images.values())
-
- # ...then use the regular PdfImage API to extract it.
- img = PdfImage(raw_img)
- return img
-
- def as_pil_image(self) -> Image.Image:
- """Return inline image as a Pillow Image."""
- return self._convert_to_pdfimage().as_pil_image()
-
- def extract_to(self, *, stream: BinaryIO | None = None, fileprefix: str = ''):
- """Extract the inline image directly to a usable image file.
-
- See:
- :meth:`PdfImage.extract_to`
- """
- return self._convert_to_pdfimage().extract_to(
- stream=stream, fileprefix=fileprefix
- )
-
- def read_bytes(self):
- """Return decompressed image bytes."""
- # QPDF does not have an API to return this directly, so convert it.
- return self._convert_to_pdfimage().read_bytes()
-
- def get_stream_buffer(self):
- """Return decompressed stream buffer."""
- # QPDF does not have an API to return this directly, so convert it.
- return self._convert_to_pdfimage().get_stream_buffer()
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/matrix.py b/env/lib/python3.10/site-packages/pikepdf/models/matrix.py
deleted file mode 100644
index c660320..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/matrix.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""PDF content matrix support."""
-
-from __future__ import annotations
-
-from math import cos, pi, sin
-
-
-class PdfMatrix:
- """
- Support class for PDF content stream matrices.
-
- PDF content stream matrices are 3x3 matrices summarized by a shorthand
- ``(a, b, c, d, e, f)`` which correspond to the first two column vectors.
- The final column vector is always ``(0, 0, 1)`` since this is using
- `homogenous coordinates <https://en.wikipedia.org/wiki/Homogeneous_coordinates>`_.
-
- PDF uses row vectors. That is, ``vr @ A'`` gives the effect of transforming
- a row vector ``vr=(x, y, 1)`` by the matrix ``A'``. Most textbook
- treatments use ``A @ vc`` where the column vector ``vc=(x, y, 1)'``.
-
- (``@`` is the Python matrix multiplication operator.)
-
- Addition and other operations are not implemented because they're not that
- meaningful in a PDF context (they can be defined and are mathematically
- meaningful in general).
-
- PdfMatrix objects are immutable. All transformations on them produce a new
- matrix.
-
- """
-
- def __init__(self, *args):
- # fmt: off
- if not args:
- self.values = ((1, 0, 0), (0, 1, 0), (0, 0, 1))
- elif len(args) == 6:
- a, b, c, d, e, f = map(float, args)
- self.values = ((a, b, 0),
- (c, d, 0),
- (e, f, 1))
- elif isinstance(args[0], PdfMatrix):
- self.values = args[0].values
- elif len(args[0]) == 6:
- a, b, c, d, e, f = map(float, args[0])
- self.values = ((a, b, 0),
- (c, d, 0),
- (e, f, 1))
- elif len(args[0]) == 3 and len(args[0][0]) == 3:
- self.values = (tuple(args[0][0]),
- tuple(args[0][1]),
- tuple(args[0][2]))
- else:
- raise ValueError('invalid arguments: ' + repr(args))
- # fmt: on
-
- @staticmethod
- def identity():
- """Constructs and returns an identity matrix."""
- return PdfMatrix()
-
- def __matmul__(self, other):
- """Multiply this matrix by another matrix.
-
- Can be used to concatenate transformations.
- """
- a = self.values
- b = other.values
- return PdfMatrix(
- [
- [sum(float(i) * float(j) for i, j in zip(row, col)) for col in zip(*b)]
- for row in a
- ]
- )
-
- def scaled(self, x, y):
- """Concatenates a scaling matrix on this matrix."""
- return self @ PdfMatrix((x, 0, 0, y, 0, 0))
-
- def rotated(self, angle_degrees_ccw):
- """Concatenates a rotation matrix on this matrix."""
- angle = angle_degrees_ccw / 180.0 * pi
- c, s = cos(angle), sin(angle)
- return self @ PdfMatrix((c, s, -s, c, 0, 0))
-
- def translated(self, x, y):
- """Translates this matrix."""
- return self @ PdfMatrix((1, 0, 0, 1, x, y))
-
- @property
- def shorthand(self):
- """Return the 6-tuple (a,b,c,d,e,f) that describes this matrix."""
- return (self.a, self.b, self.c, self.d, self.e, self.f)
-
- @property
- def a(self):
- """Return matrix this value."""
- return self.values[0][0]
-
- @property
- def b(self):
- """Return matrix this value."""
- return self.values[0][1]
-
- @property
- def c(self):
- """Return matrix this value."""
- return self.values[1][0]
-
- @property
- def d(self):
- """Return matrix this value."""
- return self.values[1][1]
-
- @property
- def e(self):
- """Return matrix this value.
-
- Typically corresponds to translation on the x-axis.
- """
- return self.values[2][0]
-
- @property
- def f(self):
- """Return matrix this value.
-
- Typically corresponds to translation on the y-axis.
- """
- return self.values[2][1]
-
- def __eq__(self, other):
- if isinstance(other, PdfMatrix):
- return self.shorthand == other.shorthand
- return False
-
- def encode(self):
- """Encode this matrix in binary suitable for including in a PDF."""
- return '{:.6f} {:.6f} {:.6f} {:.6f} {:.6f} {:.6f}'.format(
- self.a, self.b, self.c, self.d, self.e, self.f
- ).encode()
-
- def __repr__(self):
- return f"pikepdf.PdfMatrix({repr(self.values)})"
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/metadata.py b/env/lib/python3.10/site-packages/pikepdf/models/metadata.py
deleted file mode 100644
index 62158b1..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/metadata.py
+++ /dev/null
@@ -1,866 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""PDF metadata handling."""
-
-from __future__ import annotations
-
-import logging
-import re
-import sys
-from abc import ABC, abstractmethod
-from datetime import datetime
-from functools import wraps
-from io import BytesIO
-from typing import TYPE_CHECKING, Any, Callable, NamedTuple, Set
-from warnings import warn
-
-from lxml import etree
-from lxml.etree import QName, XMLSyntaxError
-
-from .. import Name, Stream, String
-from .. import __version__ as pikepdf_version
-from .._xml import parse_xml
-
-if sys.version_info < (3, 9): # pragma: no cover
- from typing import Iterable, MutableMapping
-else:
- from collections.abc import Iterable, MutableMapping
-
-if TYPE_CHECKING: # pragma: no cover
- from pikepdf import Pdf
-
-
-XMP_NS_DC = "http://purl.org/dc/elements/1.1/"
-XMP_NS_PDF = "http://ns.adobe.com/pdf/1.3/"
-XMP_NS_PDFA_ID = "http://www.aiim.org/pdfa/ns/id/"
-XMP_NS_PDFX_ID = "http://www.npes.org/pdfx/ns/id/"
-XMP_NS_PHOTOSHOP = "http://ns.adobe.com/photoshop/1.0/"
-XMP_NS_PRISM = "http://prismstandard.org/namespaces/basic/1.0/"
-XMP_NS_PRISM2 = "http://prismstandard.org/namespaces/basic/2.0/"
-XMP_NS_PRISM3 = "http://prismstandard.org/namespaces/basic/3.0/"
-XMP_NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
-XMP_NS_XMP = "http://ns.adobe.com/xap/1.0/"
-XMP_NS_XMP_MM = "http://ns.adobe.com/xap/1.0/mm/"
-XMP_NS_XMP_RIGHTS = "http://ns.adobe.com/xap/1.0/rights/"
-
-DEFAULT_NAMESPACES: list[tuple[str, str]] = [
- ('adobe:ns:meta/', 'x'),
- (XMP_NS_DC, 'dc'),
- (XMP_NS_PDF, 'pdf'),
- (XMP_NS_PDFA_ID, 'pdfaid'),
- (XMP_NS_PDFX_ID, 'pdfxid'),
- (XMP_NS_PHOTOSHOP, 'photoshop'),
- (XMP_NS_PRISM, 'prism'),
- (XMP_NS_PRISM2, 'prism2'),
- (XMP_NS_PRISM3, 'prism3'),
- (XMP_NS_RDF, 'rdf'),
- (XMP_NS_XMP, 'xmp'),
- (XMP_NS_XMP_MM, 'xmpMM'),
- (XMP_NS_XMP_RIGHTS, 'xmpRights'),
-]
-
-for _uri, _prefix in DEFAULT_NAMESPACES:
- etree.register_namespace(_prefix, _uri)
-
-# This one should not be registered
-XMP_NS_XML = "http://www.w3.org/XML/1998/namespace"
-
-XPACKET_BEGIN = b"""<?xpacket begin="\xef\xbb\xbf" id="W5M0MpCehiHzreSzNTczkc9d"?>\n"""
-
-XMP_EMPTY = b"""<x:xmpmeta xmlns:x="adobe:ns:meta/" x:xmptk="pikepdf">
- <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
- </rdf:RDF>
-</x:xmpmeta>
-"""
-
-XPACKET_END = b"""\n<?xpacket end="w"?>\n"""
-
-
-class XmpContainer(NamedTuple):
- """Map XMP container object to suitable Python container."""
-
- rdf_type: str
- py_type: type
- insert_fn: Callable[..., None]
-
-
-log = logging.getLogger(__name__)
-
-
-class NeverRaise(Exception):
- """An exception that is never raised."""
-
-
-class AltList(list):
- """XMP AltList container."""
-
-
-XMP_CONTAINERS = [
- XmpContainer('Alt', AltList, AltList.append),
- XmpContainer('Bag', set, set.add),
- XmpContainer('Seq', list, list.append),
-]
-
-LANG_ALTS = frozenset(
- [
- str(QName(XMP_NS_DC, 'title')),
- str(QName(XMP_NS_DC, 'description')),
- str(QName(XMP_NS_DC, 'rights')),
- str(QName(XMP_NS_XMP_RIGHTS, 'UsageTerms')),
- ]
-)
-
-# These are the illegal characters in XML 1.0. (XML 1.1 is a bit more permissive,
-# but we'll be strict to ensure wider compatibility.)
-re_xml_illegal_chars = re.compile(
- r"(?u)[^\x09\x0A\x0D\x20-\U0000D7FF\U0000E000-\U0000FFFD\U00010000-\U0010FFFF]"
-)
-re_xml_illegal_bytes = re.compile(
- br"[^\x09\x0A\x0D\x20-\xFF]|&#0;"
- # br"&#(?:[0-9]|0[0-9]|1[0-9]|2[0-9]|3[0-1]|x[0-9A-Fa-f]|x0[0-9A-Fa-f]|x1[0-9A-Fa-f]);"
-)
-
-
-def _parser_basic(xml: bytes):
- return parse_xml(BytesIO(xml))
-
-
-def _parser_strip_illegal_bytes(xml: bytes):
- return parse_xml(BytesIO(re_xml_illegal_bytes.sub(b'', xml)))
-
-
-def _parser_recovery(xml: bytes):
- return parse_xml(BytesIO(xml), recover=True)
-
-
-def _parser_replace_with_empty_xmp(_xml: bytes = b''):
- log.warning("Error occurred parsing XMP, replacing with empty XMP.")
- return _parser_basic(XMP_EMPTY)
-
-
-def _clean(s: str | Iterable[str], joiner: str = '; ') -> str:
- """Ensure an object can safely be inserted in a XML tag body.
-
- If we still have a non-str object at this point, the best option is to
- join it, because it's apparently calling for a new node in a place that
- isn't allowed in the spec or not supported.
- """
- if not isinstance(s, str):
- if isinstance(s, Iterable):
- warn(f"Merging elements of {s}")
- if isinstance(s, Set):
- s = joiner.join(sorted(s))
- else:
- s = joiner.join(s)
- else:
- raise TypeError("object must be a string or iterable of strings")
- return re_xml_illegal_chars.sub('', s)
-
-
-def encode_pdf_date(d: datetime) -> str:
- """Encode Python datetime object as PDF date string.
-
- From Adobe pdfmark manual:
- (D:YYYYMMDDHHmmSSOHH'mm')
- D: is an optional prefix. YYYY is the year. All fields after the year are
- optional. MM is the month (01-12), DD is the day (01-31), HH is the
- hour (00-23), mm are the minutes (00-59), and SS are the seconds
- (00-59). The remainder of the string defines the relation of local
- time to GMT. O is either + for a positive difference (local time is
- later than GMT) or - (minus) for a negative difference. HH' is the
- absolute value of the offset from GMT in hours, and mm' is the
- absolute value of the offset in minutes. If no GMT information is
- specified, the relation between the specified time and GMT is
- considered unknown. Regardless of whether or not GMT
- information is specified, the remainder of the string should specify
- the local time.
-
- 'D:' is required in PDF/A, so we always add it.
- """
- # The formatting of %Y is not consistent as described in
- # https://bugs.python.org/issue13305 and underspecification in libc.
- # So explicitly format the year with leading zeros
- s = f"D:{d.year:04d}"
- s += d.strftime(r'%m%d%H%M%S')
- tz = d.strftime('%z')
- if tz:
- sign, tz_hours, tz_mins = tz[0], tz[1:3], tz[3:5]
- s += f"{sign}{tz_hours}'{tz_mins}'"
- return s
-
-
-def decode_pdf_date(s: str) -> datetime:
- """Decode a pdfmark date to a Python datetime object.
-
- A pdfmark date is a string in a paritcular format. See the pdfmark
- Reference for the specification.
- """
- if isinstance(s, String):
- s = str(s)
- if s.startswith('D:'):
- s = s[2:]
-
- # Literal Z00'00', is incorrect but found in the wild,
- # probably made by OS X Quartz -- standardize
- if s.endswith("Z00'00'"):
- s = s.replace("Z00'00'", '+0000')
- elif s.endswith('Z'):
- s = s.replace('Z', '+0000')
- s = s.replace("'", "") # Remove apos from PDF time strings
- try:
- return datetime.strptime(s, r'%Y%m%d%H%M%S%z')
- except ValueError:
- return datetime.strptime(s, r'%Y%m%d%H%M%S')
-
-
-class Converter(ABC):
- """XMP <-> DocumentInfo converter."""
-
- @staticmethod
- @abstractmethod
- def xmp_from_docinfo(docinfo_val: str | None) -> Any: # type: ignore
- """Derive XMP metadata from a DocumentInfo string."""
-
- @staticmethod
- @abstractmethod
- def docinfo_from_xmp(xmp_val: Any) -> str | None:
- """Derive a DocumentInfo value from equivalent XMP metadata."""
-
-
-class AuthorConverter(Converter):
- """Convert XMP document authors to DocumentInfo."""
-
- @staticmethod
- def xmp_from_docinfo(docinfo_val: str | None) -> Any: # type: ignore
- """Derive XMP authors info from DocumentInfo."""
- return [docinfo_val]
-
- @staticmethod
- def docinfo_from_xmp(xmp_val):
- """Derive DocumentInfo authors from XMP.
-
- XMP supports multiple author values, while DocumentInfo has a string,
- so we return the values separated by semi-colons.
- """
- if isinstance(xmp_val, str):
- return xmp_val
- if xmp_val is None or xmp_val == [None]:
- return None
- return '; '.join(xmp_val)
-
-
-class DateConverter(Converter):
- """Convert XMP dates to DocumentInfo."""
-
- @staticmethod
- def xmp_from_docinfo(docinfo_val):
- """Derive XMP date from DocumentInfo."""
- if docinfo_val == '':
- return ''
- return decode_pdf_date(docinfo_val).isoformat()
-
- @staticmethod
- def docinfo_from_xmp(xmp_val):
- """Derive DocumentInfo from XMP."""
- if xmp_val.endswith('Z'):
- xmp_val = xmp_val[:-1] + '+00:00'
- try:
- dateobj = datetime.fromisoformat(xmp_val)
- except IndexError:
- # PyPy 3.7 may raise IndexError - convert to ValueError
- raise ValueError(f"Invalid isoformat string: '{xmp_val}'") from None
- return encode_pdf_date(dateobj)
-
-
-class DocinfoMapping(NamedTuple):
- """Map DocumentInfo keys to their XMP equivalents, along with converter."""
-
- ns: str
- key: str
- name: Name
- converter: type[Converter] | None
-
-
-def ensure_loaded(fn):
- """Ensure the XMP has been loaded and parsed.
-
- TODO: Can this be removed? Why allow the uninit'ed state to even exist?
- """
-
- @wraps(fn)
- def wrapper(self, *args, **kwargs):
- if not self._xmp:
- self._load()
- return fn(self, *args, **kwargs)
-
- return wrapper
-
-
-class PdfMetadata(MutableMapping):
- """Read and edit the metadata associated with a PDF.
-
- The PDF specification contain two types of metadata, the newer XMP
- (Extensible Metadata Platform, XML-based) and older DocumentInformation
- dictionary. The PDF 2.0 specification removes the DocumentInformation
- dictionary.
-
- This primarily works with XMP metadata, but includes methods to generate
- XMP from DocumentInformation and will also coordinate updates to
- DocumentInformation so that the two are kept consistent.
-
- XMP metadata fields may be accessed using the full XML namespace URI or
- the short name. For example ``metadata['dc:description']``
- and ``metadata['{http://purl.org/dc/elements/1.1/}description']``
- both refer to the same field. Several common XML namespaces are registered
- automatically.
-
- See the XMP specification for details of allowable fields.
-
- To update metadata, use a with block.
-
- Example:
-
- >>> with pdf.open_metadata() as records:
- records['dc:title'] = 'New Title'
-
- See Also:
- :meth:`pikepdf.Pdf.open_metadata`
- """
-
- DOCINFO_MAPPING: list[DocinfoMapping] = [
- DocinfoMapping(XMP_NS_DC, 'creator', Name.Author, AuthorConverter),
- DocinfoMapping(XMP_NS_DC, 'description', Name.Subject, None),
- DocinfoMapping(XMP_NS_DC, 'title', Name.Title, None),
- DocinfoMapping(XMP_NS_PDF, 'Keywords', Name.Keywords, None),
- DocinfoMapping(XMP_NS_PDF, 'Producer', Name.Producer, None),
- DocinfoMapping(XMP_NS_XMP, 'CreateDate', Name.CreationDate, DateConverter),
- DocinfoMapping(XMP_NS_XMP, 'CreatorTool', Name.Creator, None),
- DocinfoMapping(XMP_NS_XMP, 'ModifyDate', Name.ModDate, DateConverter),
- ]
-
- NS: dict[str, str] = {prefix: uri for uri, prefix in DEFAULT_NAMESPACES}
- REVERSE_NS: dict[str, str] = dict(DEFAULT_NAMESPACES)
-
- _PARSERS_OVERWRITE_INVALID_XML: Iterable[Callable[[bytes], Any]] = [
- _parser_basic,
- _parser_strip_illegal_bytes,
- _parser_recovery,
- _parser_replace_with_empty_xmp,
- ]
- _PARSERS_STANDARD: Iterable[Callable[[bytes], Any]] = [_parser_basic]
-
- def __init__(
- self,
- pdf: Pdf,
- pikepdf_mark: bool = True,
- sync_docinfo: bool = True,
- overwrite_invalid_xml: bool = True,
- ):
- self._pdf = pdf
- self._xmp = None
- self.mark = pikepdf_mark
- self.sync_docinfo = sync_docinfo
- self._updating = False
- self.overwrite_invalid_xml = overwrite_invalid_xml
-
- def load_from_docinfo(
- self, docinfo, delete_missing: bool = False, raise_failure: bool = False
- ) -> None:
- """Populate the XMP metadata object with DocumentInfo.
-
- Arguments:
- docinfo: a DocumentInfo, e.g pdf.docinfo
- delete_missing: if the entry is not DocumentInfo, delete the equivalent
- from XMP
- raise_failure: if True, raise any failure to convert docinfo;
- otherwise warn and continue
-
- A few entries in the deprecated DocumentInfo dictionary are considered
- approximately equivalent to certain XMP records. This method copies
- those entries into the XMP metadata.
- """
-
- def warn_or_raise(msg, e=None):
- if raise_failure:
- raise ValueError(msg) from e
- warn(msg)
-
- for uri, shortkey, docinfo_name, converter in self.DOCINFO_MAPPING:
- qname = QName(uri, shortkey)
- # docinfo might be a dict or pikepdf.Dictionary, so lookup keys
- # by str(Name)
- val = docinfo.get(str(docinfo_name))
- if val is None:
- if delete_missing and qname in self:
- del self[qname]
- continue
- try:
- val = str(val)
- if converter:
- val = converter.xmp_from_docinfo(val)
- if not val:
- continue
- self._setitem(qname, val, True)
- except (ValueError, AttributeError, NotImplementedError) as e:
- warn_or_raise(
- f"The metadata field {docinfo_name} could not be copied to XMP", e
- )
- valid_docinfo_names = {
- str(docinfo_name) for _, _, docinfo_name, _ in self.DOCINFO_MAPPING
- }
- extra_docinfo_names = {str(k) for k in docinfo.keys()} - valid_docinfo_names
- for extra in extra_docinfo_names:
- warn_or_raise(
- f"The metadata field {extra} with value '{repr(docinfo.get(extra))}' "
- "has no XMP equivalent, so it was discarded",
- )
-
- def _load(self) -> None:
- try:
- data = self._pdf.Root.Metadata.read_bytes()
- except AttributeError:
- data = b''
- self._load_from(data)
-
- def _load_from(self, data: bytes) -> None:
- if data.strip() == b'':
- data = XMP_EMPTY # on some platforms lxml chokes on empty documents
-
- parsers = (
- self._PARSERS_OVERWRITE_INVALID_XML
- if self.overwrite_invalid_xml
- else self._PARSERS_STANDARD
- )
-
- for parser in parsers:
- try:
- self._xmp = parser(data)
- except (
- XMLSyntaxError
- if self.overwrite_invalid_xml
- else NeverRaise # type: ignore
- ) as e:
- if str(e).startswith("Start tag expected, '<' not found") or str(
- e
- ).startswith("Document is empty"):
- self._xmp = _parser_replace_with_empty_xmp()
- break
- else:
- break
-
- if self._xmp is not None:
- try:
- pis = self._xmp.xpath('/processing-instruction()')
- for pi in pis:
- etree.strip_tags(self._xmp, pi.tag)
- self._get_rdf_root()
- except (
- Exception # pylint: disable=broad-except
- if self.overwrite_invalid_xml
- else NeverRaise
- ) as e:
- log.warning("Error occurred parsing XMP", exc_info=e)
- self._xmp = _parser_replace_with_empty_xmp()
- else:
- log.warning("Error occurred parsing XMP")
- self._xmp = _parser_replace_with_empty_xmp()
-
- @ensure_loaded
- def __enter__(self):
- self._updating = True
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- try:
- if exc_type is not None:
- return
- self._apply_changes()
- finally:
- self._updating = False
-
- def _update_docinfo(self):
- """Update the PDF's DocumentInfo dictionary to match XMP metadata.
-
- The standard mapping is described here:
- https://www.pdfa.org/pdfa-metadata-xmp-rdf-dublin-core/
- """
- # Touch object to ensure it exists
- self._pdf.docinfo # pylint: disable=pointless-statement
- for uri, element, docinfo_name, converter in self.DOCINFO_MAPPING:
- qname = QName(uri, element)
- try:
- value = self[qname]
- except KeyError:
- if docinfo_name in self._pdf.docinfo:
- del self._pdf.docinfo[docinfo_name]
- continue
- if converter:
- try:
- value = converter.docinfo_from_xmp(value)
- except ValueError:
- warn(
- f"The DocumentInfo field {docinfo_name} could not be "
- "updated from XMP"
- )
- value = None
- except Exception as e:
- raise ValueError(
- "An error occurred while updating DocumentInfo field "
- f"{docinfo_name} from XMP {qname} with value {value}"
- ) from e
- if value is None:
- if docinfo_name in self._pdf.docinfo:
- del self._pdf.docinfo[docinfo_name]
- continue
- value = _clean(value)
- try:
- # Try to save pure ASCII
- self._pdf.docinfo[docinfo_name] = value.encode('ascii')
- except UnicodeEncodeError:
- # qpdf will serialize this as a UTF-16 with BOM string
- self._pdf.docinfo[docinfo_name] = value
-
- def _get_xml_bytes(self, xpacket=True):
- data = BytesIO()
- if xpacket:
- data.write(XPACKET_BEGIN)
- self._xmp.write(data, encoding='utf-8', pretty_print=True)
- if xpacket:
- data.write(XPACKET_END)
- data.seek(0)
- xml_bytes = data.read()
- return xml_bytes
-
- def _apply_changes(self):
- """Serialize our changes back to the PDF in memory.
-
- Depending how we are initialized, leave our metadata mark and producer.
- """
- if self.mark:
- # We were asked to mark the file as being edited by pikepdf
- self._setitem(
- QName(XMP_NS_XMP, 'MetadataDate'),
- datetime.now(datetime.utcnow().astimezone().tzinfo).isoformat(),
- applying_mark=True,
- )
- self._setitem(
- QName(XMP_NS_PDF, 'Producer'),
- 'pikepdf ' + pikepdf_version,
- applying_mark=True,
- )
- xml = self._get_xml_bytes()
- self._pdf.Root.Metadata = Stream(self._pdf, xml)
- self._pdf.Root.Metadata[Name.Type] = Name.Metadata
- self._pdf.Root.Metadata[Name.Subtype] = Name.XML
- if self.sync_docinfo:
- self._update_docinfo()
-
- @classmethod
- def _qname(cls, name: QName | str) -> str:
- """Convert name to an XML QName.
-
- e.g. pdf:Producer -> {http://ns.adobe.com/pdf/1.3/}Producer
- """
- if isinstance(name, QName):
- return str(name)
- if not isinstance(name, str):
- raise TypeError(f"{name} must be str")
- if name == '':
- return name
- if name.startswith('{'):
- return name
- try:
- prefix, tag = name.split(':', maxsplit=1)
- except ValueError:
- # If missing the namespace, put it in the top level namespace
- # To do this completely correct we actually need to figure out
- # the namespace based on context defined by parent tags. That
- # https://www.w3.org/2001/tag/doc/qnameids.html
- prefix, tag = 'x', name
- uri = cls.NS[prefix]
- return str(QName(uri, tag))
-
- def _prefix_from_uri(self, uriname):
- """Given a fully qualified XML name, find a prefix.
-
- e.g. {http://ns.adobe.com/pdf/1.3/}Producer -> pdf:Producer
- """
- uripart, tag = uriname.split('}', maxsplit=1)
- uri = uripart.replace('{', '')
- return self.REVERSE_NS[uri] + ':' + tag
-
- def _get_subelements(self, node):
- """Gather the sub-elements attached to a node.
-
- Gather rdf:Bag and and rdf:Seq into set and list respectively. For
- alternate languages values, take the first language only for
- simplicity.
- """
- items = node.find('rdf:Alt', self.NS)
- if items is not None:
- try:
- return items[0].text
- except IndexError:
- return ''
-
- for xmlcontainer, container, insertfn in XMP_CONTAINERS:
- items = node.find(f'rdf:{xmlcontainer}', self.NS)
- if items is None:
- continue
- result = container()
- for item in items:
- insertfn(result, item.text)
- return result
- return ''
-
- def _get_rdf_root(self):
- rdf = self._xmp.find('.//rdf:RDF', self.NS)
- if rdf is None:
- rdf = self._xmp.getroot()
- if not rdf.tag == '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF':
- raise ValueError("Metadata seems to be XML but not XMP")
- return rdf
-
- def _get_elements(self, name: str | QName = ''):
- """Get elements from XMP.
-
- Core routine to find elements matching name within the XMP and yield
- them.
-
- For XMP spec 7.9.2.2, rdf:Description with property attributes,
- we yield the node which will have the desired as one of its attributes.
- qname is returned so that the node.attrib can be used to locate the
- source.
-
- For XMP spec 7.5, simple valued XMP properties, we yield the node,
- None, and the value. For structure or array valued properties we gather
- the elements. We ignore qualifiers.
-
- Args:
- name: a prefixed name or QName to look for within the
- data section of the XMP; looks for all data keys if omitted
-
- Yields:
- tuple: (node, qname_attrib, value, parent_node)
-
- """
- qname = self._qname(name)
- rdf = self._get_rdf_root()
- for rdfdesc in rdf.findall('rdf:Description[@rdf:about=""]', self.NS):
- if qname and qname in rdfdesc.keys():
- yield (rdfdesc, qname, rdfdesc.get(qname), rdf)
- elif not qname:
- for k, v in rdfdesc.items():
- if v:
- yield (rdfdesc, k, v, rdf)
- xpath = qname if name else '*'
- for node in rdfdesc.findall(xpath, self.NS):
- if node.text and node.text.strip():
- yield (node, None, node.text, rdfdesc)
- continue
- values = self._get_subelements(node)
- yield (node, None, values, rdfdesc)
-
- def _get_element_values(self, name=''):
- yield from (v[2] for v in self._get_elements(name))
-
- @ensure_loaded
- def __contains__(self, key: str | QName):
- return any(self._get_element_values(key))
-
- @ensure_loaded
- def __getitem__(self, key: str | QName):
- try:
- return next(self._get_element_values(key))
- except StopIteration:
- raise KeyError(key) from None
-
- @ensure_loaded
- def __iter__(self):
- for node, attrib, _val, _parents in self._get_elements():
- if attrib:
- yield attrib
- else:
- yield node.tag
-
- @ensure_loaded
- def __len__(self):
- return len(list(iter(self)))
-
- def _setitem(
- self,
- key: str | QName,
- val: set[str] | list[str] | str,
- applying_mark: bool = False,
- ):
- if not self._updating:
- raise RuntimeError("Metadata not opened for editing, use with block")
-
- qkey = self._qname(key)
- self._setitem_check_args(key, val, applying_mark, qkey)
-
- try:
- # Update existing node
- self._setitem_update(key, val, qkey)
- except StopIteration:
- # Insert a new node
- self._setitem_insert(key, val)
-
- def _setitem_check_args(self, key, val, applying_mark: bool, qkey: str) -> None:
- if (
- self.mark
- and not applying_mark
- and qkey
- in (
- self._qname('xmp:MetadataDate'),
- self._qname('pdf:Producer'),
- )
- ):
- # Complain if user writes self[pdf:Producer] = ... and because it will
- # be overwritten on save, unless self._updating_mark, in which case
- # the action was initiated internally
- log.warning(
- f"Update to {key} will be overwritten because metadata was opened "
- "with set_pikepdf_as_editor=True"
- )
- if isinstance(val, str) and qkey in (self._qname('dc:creator')):
- log.error(f"{key} should be set to a list of strings")
-
- def _setitem_add_array(self, node, items: Iterable) -> None:
- rdf_type = next(
- c.rdf_type for c in XMP_CONTAINERS if isinstance(items, c.py_type)
- )
- seq = etree.SubElement(node, str(QName(XMP_NS_RDF, rdf_type)))
- tag_attrib: dict[str, str] | None = None
- if rdf_type == 'Alt':
- tag_attrib = {str(QName(XMP_NS_XML, 'lang')): 'x-default'}
- for item in items:
- el = etree.SubElement(seq, str(QName(XMP_NS_RDF, 'li')), attrib=tag_attrib)
- el.text = _clean(item)
-
- def _setitem_update(self, key, val, qkey):
- # Locate existing node to replace
- node, attrib, _oldval, _parent = next(self._get_elements(key))
- if attrib:
- if not isinstance(val, str):
- if qkey == self._qname('dc:creator'):
- # dc:creator incorrectly created as an attribute - we're
- # replacing it anyway, so remove the old one
- del node.attrib[qkey]
- self._setitem_add_array(node, _clean(val))
- else:
- raise TypeError(f"Setting {key} to {val} with type {type(val)}")
- else:
- node.set(attrib, _clean(val))
- elif isinstance(val, (list, set)):
- for child in node.findall('*'):
- node.remove(child)
- self._setitem_add_array(node, val)
- elif isinstance(val, str):
- for child in node.findall('*'):
- node.remove(child)
- if str(self._qname(key)) in LANG_ALTS:
- self._setitem_add_array(node, AltList([_clean(val)]))
- else:
- node.text = _clean(val)
- else:
- raise TypeError(f"Setting {key} to {val} with type {type(val)}")
-
- def _setitem_insert(self, key, val):
- rdf = self._get_rdf_root()
- if str(self._qname(key)) in LANG_ALTS:
- val = AltList([_clean(val)])
- if isinstance(val, (list, set)):
- rdfdesc = etree.SubElement(
- rdf,
- str(QName(XMP_NS_RDF, 'Description')),
- attrib={str(QName(XMP_NS_RDF, 'about')): ''},
- )
- node = etree.SubElement(rdfdesc, self._qname(key))
- self._setitem_add_array(node, val)
- elif isinstance(val, str):
- _rdfdesc = etree.SubElement(
- rdf,
- str(QName(XMP_NS_RDF, 'Description')),
- attrib={
- QName(XMP_NS_RDF, 'about'): '',
- self._qname(key): _clean(val),
- },
- )
- else:
- raise TypeError(f"Setting {key} to {val} with type {type(val)}") from None
-
- @ensure_loaded
- def __setitem__(self, key: str | QName, val: set[str] | list[str] | str):
- return self._setitem(key, val, False)
-
- @ensure_loaded
- def __delitem__(self, key: str | QName):
- if not self._updating:
- raise RuntimeError("Metadata not opened for editing, use with block")
- try:
- node, attrib, _oldval, parent = next(self._get_elements(key))
- if attrib: # Inline
- del node.attrib[attrib]
- if (
- len(node.attrib) == 1
- and len(node) == 0
- and QName(XMP_NS_RDF, 'about') in node.attrib
- ):
- # The only thing left on this node is rdf:about="", so remove it
- parent.remove(node)
- else:
- parent.remove(node)
- except StopIteration:
- raise KeyError(key) from None
-
- @property
- def pdfa_status(self) -> str:
- """Return the PDF/A conformance level claimed by this PDF, or False.
-
- A PDF may claim to PDF/A compliant without this being true. Use an
- independent verifier such as veraPDF to test if a PDF is truly
- conformant.
-
- Returns:
- The conformance level of the PDF/A, or an empty string if the
- PDF does not claim PDF/A conformance. Possible valid values
- are: 1A, 1B, 2A, 2B, 2U, 3A, 3B, 3U.
- """
- # do same as @ensure_loaded - mypy can't handle decorated property
- if not self._xmp:
- self._load()
-
- key_part = QName(XMP_NS_PDFA_ID, 'part')
- key_conformance = QName(XMP_NS_PDFA_ID, 'conformance')
- try:
- return self[key_part] + self[key_conformance]
- except KeyError:
- return ''
-
- @property
- def pdfx_status(self) -> str:
- """Return the PDF/X conformance level claimed by this PDF, or False.
-
- A PDF may claim to PDF/X compliant without this being true. Use an
- independent verifier such as veraPDF to test if a PDF is truly
- conformant.
-
- Returns:
- The conformance level of the PDF/X, or an empty string if the
- PDF does not claim PDF/X conformance.
- """
- # do same as @ensure_loaded - mypy can't handle decorated property
- if not self._xmp:
- self._load()
-
- pdfx_version = QName(XMP_NS_PDFX_ID, 'GTS_PDFXVersion')
- try:
- return self[pdfx_version]
- except KeyError:
- return ''
-
- @ensure_loaded
- def __str__(self):
- return self._get_xml_bytes(xpacket=False).decode('utf-8')
diff --git a/env/lib/python3.10/site-packages/pikepdf/models/outlines.py b/env/lib/python3.10/site-packages/pikepdf/models/outlines.py
deleted file mode 100644
index 1143de6..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/models/outlines.py
+++ /dev/null
@@ -1,421 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow, 2020 Matthias Erll
-
-# SPDX-License-Identifier: MPL-2.0
-
-"""Support for document outlines (e.g. table of contents)."""
-
-from __future__ import annotations
-
-from enum import Enum
-from itertools import chain
-from typing import Iterable, List, cast
-
-from pikepdf import Array, Dictionary, Name, Object, Page, Pdf, String
-
-
-class PageLocation(Enum):
- """Page view location definitions, from PDF spec."""
-
- XYZ = 1
- Fit = 2
- FitH = 3
- FitV = 4
- FitR = 5
- FitB = 6
- FitBH = 7
- FitBV = 8
-
-
-PAGE_LOCATION_ARGS = {
- PageLocation.XYZ: ('left', 'top', 'zoom'),
- PageLocation.FitH: ('top',),
- PageLocation.FitV: ('left',),
- PageLocation.FitR: ('left', 'bottom', 'right', 'top'),
- PageLocation.FitBH: ('top',),
- PageLocation.FitBV: ('left',),
-}
-ALL_PAGE_LOCATION_KWARGS = set(chain.from_iterable(PAGE_LOCATION_ARGS.values()))
-
-
-def make_page_destination(
- pdf: Pdf,
- page_num: int,
- page_location: PageLocation | str | None = None,
- *,
- left: float | None = None,
- top: float | None = None,
- right: float | None = None,
- bottom: float | None = None,
- zoom: float | None = None,
-) -> Array:
- """
- Create a destination ``Array`` with reference to a Pdf document's page number.
-
- Arguments:
- pdf: PDF document object.
- page_num: Page number (zero-based).
- page_location: Optional page location, as a string or :enum:`PageLocation`.
- left: Specify page viewport rectangle.
- top: Specify page viewport rectangle.
- right: Specify page viewport rectangle.
- bottom: Specify page viewport rectangle.
- zoom: Specify page viewport rectangle's zoom level.
-
- left, top, right, bottom, zoom are used in conjunction with the page fit style
- specified by *page_location*.
- """
- return _make_page_destination(
- pdf,
- page_num,
- page_location=page_location,
- left=left,
- top=top,
- right=right,
- bottom=bottom,
- zoom=zoom,
- )
-
-
-def _make_page_destination(
- pdf: Pdf,
- page_num: int,
- page_location: PageLocation | str | None = None,
- **kwargs,
-) -> Array:
- kwargs = {k: v for k, v in kwargs.items() if v is not None}
-
- res: list[Dictionary | Name] = [pdf.pages[page_num].obj]
- if page_location:
- if isinstance(page_location, PageLocation):
- loc_key = page_location
- loc_str = loc_key.name
- else:
- loc_str = page_location
- try:
- loc_key = PageLocation[loc_str]
- except KeyError:
- raise ValueError(
- f"Invalid or unsupported page location type {loc_str}"
- ) from None
- res.append(Name(f'/{loc_str}'))
- dest_arg_names = PAGE_LOCATION_ARGS.get(loc_key)
- if dest_arg_names:
- res.extend(kwargs.get(k, 0) for k in dest_arg_names)
- else:
- res.append(Name.Fit)
- return Array(res)
-
-
-class OutlineStructureError(Exception):
- """Indicates an error in the outline data structure."""
-
-
-class OutlineItem:
- """Manage a single item in a PDF document outlines structure.
-
- Includes nested items.
-
- Arguments:
- title: Title of the outlines item.
- destination: Page number, destination name, or any other PDF object
- to be used as a reference when clicking on the outlines entry. Note
- this should be ``None`` if an action is used instead. If set to a
- page number, it will be resolved to a reference at the time of
- writing the outlines back to the document.
- page_location: Supplemental page location for a page number
- in ``destination``, e.g. ``PageLocation.Fit``. May also be
- a simple string such as ``'FitH'``.
- action: Action to perform when clicking on this item. Will be ignored
- during writing if ``destination`` is also set.
- obj: ``Dictionary`` object representing this outlines item in a ``Pdf``.
- May be ``None`` for creating a new object. If present, an existing
- object is modified in-place during writing and original attributes
- are retained.
- left, top, bottom, right, zoom: Describes the viewport position associated
- with a destination.
-
- This object does not contain any information about higher-level or
- neighboring elements.
-
- Valid destination arrays:
- [page /XYZ left top zoom]
- generally
- [page, PageLocationEntry, 0 to 4 ints]
- """
-
- def __init__(
- self,
- title: str,
- destination: Array | String | Name | int | None = None,
- page_location: PageLocation | str | None = None,
- action: Dictionary | None = None,
- obj: Dictionary | None = None,
- *,
- left: float | None = None,
- top: float | None = None,
- right: float | None = None,
- bottom: float | None = None,
- zoom: float | None = None,
- ):
- self.title = title
- self.destination = destination
- self.page_location = page_location
- self.page_location_kwargs = {}
- self.action = action
- if self.destination is not None and self.action is not None:
- raise ValueError("Only one of destination and action may be set")
- self.obj = obj
- kwargs = dict(left=left, top=top, right=right, bottom=bottom, zoom=zoom)
- self.page_location_kwargs = {k: v for k, v in kwargs.items() if v is not None}
- self.is_closed = False
- self.children: list[OutlineItem] = []
-
- def __str__(self):
- if self.children:
- if self.is_closed:
- oc_indicator = '[+]'
- else:
- oc_indicator = '[-]'
- else:
- oc_indicator = '[ ]'
- if self.destination is not None:
- if isinstance(self.destination, Array):
- # 12.3.2.2 Explicit destination
- # [raw_page, /PageLocation.SomeThing, integer parameters for viewport]
- raw_page = self.destination[0]
- page = Page(raw_page)
- dest = page.label
- elif isinstance(self.destination, String):
- # 12.3.2.2 Named destination, byte string reference to Names
- dest = f'<Named Destination in document .Root.Names dictionary: {self.destination}>'
- elif isinstance(self.destination, Name):
- # 12.3.2.2 Named destination, name object (PDF 1.1)
- dest = f'<Named Destination in document .Root.Dests dictionary: {self.destination}>'
- elif isinstance(self.destination, int):
- # Page number
- dest = f'<Page {self.destination}>'
- else:
- dest = '<Action>'
- return f'{oc_indicator} {self.title} -> {dest}'
-
- def __repr__(self):
- return f'<pikepdf.{self.__class__.__name__}: "{self.title}">'
-
- @classmethod
- def from_dictionary_object(cls, obj: Dictionary):
- """Creates a ``OutlineItem`` from a ``Dictionary``.
-
- Does not process nested items.
-
- Arguments:
- obj: ``Dictionary`` object representing a single outline node.
- """
- title = str(obj.Title)
- destination = obj.get(Name.Dest)
- if destination is not None and not isinstance(
- destination, (Array, String, Name)
- ):
- # 12.3.3: /Dest may be a name, byte string or array
- raise OutlineStructureError(
- f"Unexpected object type in Outline's /Dest: {destination!r}"
- )
- action = obj.get(Name.A)
- if action is not None and not isinstance(action, Dictionary):
- raise OutlineStructureError(
- f"Unexpected object type in Outline's /A: {action!r}"
- )
- return cls(title, destination=destination, action=action, obj=obj)
-
- def to_dictionary_object(self, pdf: Pdf, create_new: bool = False) -> Dictionary:
- """Creates/updates a ``Dictionary`` object from this outline node.
-
- Page numbers are resolved to a page reference on the input
- ``Pdf`` object.
-
- Arguments:
- pdf: PDF document object.
- create_new: If set to ``True``, creates a new object instead of
- modifying an existing one in-place.
- """
- if create_new or self.obj is None:
- self.obj = obj = pdf.make_indirect(Dictionary())
- else:
- obj = self.obj
- obj.Title = self.title
- if self.destination is not None:
- if isinstance(self.destination, int):
- self.destination = make_page_destination(
- pdf,
- self.destination,
- self.page_location,
- **self.page_location_kwargs,
- )
- obj.Dest = self.destination
- if Name.A in obj:
- del obj.A
- elif self.action is not None:
- obj.A = self.action
- if Name.Dest in obj:
- del obj.Dest
- return obj
-
-
-class Outline:
- """Maintains a intuitive interface for creating and editing PDF document outlines.
-
- See |pdfrm| section 12.3.
-
- Arguments:
- pdf: PDF document object.
- max_depth: Maximum recursion depth to consider when reading the outline.
- strict: If set to ``False`` (default) silently ignores structural errors.
- Setting it to ``True`` raises a
- :class:`pikepdf.OutlineStructureError`
- if any object references re-occur while the outline is being read or
- written.
-
- See Also:
- :meth:`pikepdf.Pdf.open_outline`
- """
-
- def __init__(self, pdf: Pdf, max_depth: int = 15, strict: bool = False):
- self._root: list[OutlineItem] | None = None
- self._pdf = pdf
- self._max_depth = max_depth
- self._strict = strict
- self._updating = False
-
- def __str__(self):
- return str(self.root)
-
- def __repr__(self):
- return f'<pikepdf.{self.__class__.__name__}: {len(self.root)} items>'
-
- def __enter__(self):
- self._updating = True
- return self
-
- def __exit__(self, exc_type, exc_val, exc_tb):
- try:
- if exc_type is not None:
- return
- self._save()
- finally:
- self._updating = False
-
- def _save_level_outline(
- self,
- parent: Dictionary,
- outline_items: Iterable[OutlineItem],
- level: int,
- visited_objs: set[tuple[int, int]],
- ):
- count = 0
- prev: Dictionary | None = None
- first: Dictionary | None = None
- for item in outline_items:
- out_obj = item.to_dictionary_object(self._pdf)
- objgen = out_obj.objgen
- if objgen in visited_objs:
- if self._strict:
- raise OutlineStructureError(
- f"Outline object {objgen} reoccurred in structure"
- )
- out_obj = item.to_dictionary_object(self._pdf, create_new=True)
- else:
- visited_objs.add(objgen)
-
- out_obj.Parent = parent
- count += 1
- if prev is not None:
- prev.Next = out_obj
- out_obj.Prev = prev
- else:
- first = out_obj
- if Name.Prev in out_obj:
- del out_obj.Prev
- prev = out_obj
- if level < self._max_depth:
- sub_items: Iterable[OutlineItem] = item.children
- else:
- sub_items = ()
- self._save_level_outline(out_obj, sub_items, level + 1, visited_objs)
- if item.is_closed:
- out_obj.Count = -cast(int, out_obj.Count)
- else:
- count += cast(int, out_obj.Count)
- if count:
- assert prev is not None and first is not None
- if Name.Next in prev:
- del prev.Next
- parent.First = first
- parent.Last = prev
- else:
- if Name.First in parent:
- del parent.First
- if Name.Last in parent:
- del parent.Last
- parent.Count = count
-
- def _load_level_outline(
- self,
- first_obj: Dictionary,
- outline_items: list[Object],
- level: int,
- visited_objs: set[tuple[int, int]],
- ):
- current_obj: Dictionary | None = first_obj
- while current_obj:
- objgen = current_obj.objgen
- if objgen in visited_objs:
- if self._strict:
- raise OutlineStructureError(
- f"Outline object {objgen} reoccurred in structure"
- )
- return
- visited_objs.add(objgen)
-
- item = OutlineItem.from_dictionary_object(current_obj)
- first_child = current_obj.get(Name.First)
- if isinstance(first_child, Dictionary) and level < self._max_depth:
- self._load_level_outline(
- first_child, item.children, level + 1, visited_objs
- )
- count = current_obj.get(Name.Count)
- if isinstance(count, int) and count < 0:
- item.is_closed = True
- outline_items.append(item)
- next_obj = current_obj.get(Name.Next)
- if next_obj is None or isinstance(next_obj, Dictionary):
- current_obj = next_obj
- else:
- raise OutlineStructureError(
- f"Outline object {objgen} points to non-dictionary"
- )
-
- def _save(self):
- if self._root is None:
- return
- if Name.Outlines in self._pdf.Root:
- outlines = self._pdf.Root.Outlines
- else:
- self._pdf.Root.Outlines = outlines = self._pdf.make_indirect(
- Dictionary(Type=Name.Outlines)
- )
- self._save_level_outline(outlines, self._root, 0, set())
-
- def _load(self):
- self._root = root = []
- if Name.Outlines not in self._pdf.Root:
- return
- outlines = self._pdf.Root.Outlines or {}
- first_obj = outlines.get(Name.First)
- if first_obj:
- self._load_level_outline(first_obj, root, 0, set())
-
- @property
- def root(self) -> list[OutlineItem]:
- """Return the root node of the outline."""
- if self._root is None:
- self._load()
- return cast(List[OutlineItem], self._root)
diff --git a/env/lib/python3.10/site-packages/pikepdf/objects.py b/env/lib/python3.10/site-packages/pikepdf/objects.py
deleted file mode 100644
index 338d9f0..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/objects.py
+++ /dev/null
@@ -1,300 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Provide classes to stand in for PDF objects.
-
-The purpose of these is to provide nice-looking classes to allow explicit
-construction of PDF objects and more pythonic idioms and facilitate discovery
-by documentation generators and linters.
-
-It's also a place to narrow the scope of input types to those more easily
-converted to C++.
-
-There is some deliberate "smoke and mirrors" here: all of the objects are truly
-instances of ``pikepdf.Object``, which is a variant container object. The
-``__new__`` constructs a ``pikepdf.Object`` in each case, and the rest of the
-class definition is present as an aide for code introspection.
-"""
-
-from __future__ import annotations
-
-# pylint: disable=unused-import, abstract-method
-from secrets import token_urlsafe
-from typing import TYPE_CHECKING, Any, Iterable, Mapping, cast
-from warnings import warn
-
-from . import _qpdf
-from ._qpdf import Object, ObjectType, Rectangle
-
-if TYPE_CHECKING: # pragma: no cover
- from pikepdf import Pdf
-
-# By default pikepdf.Object will identify itself as pikepdf._qpdf.Object
-# Here we change the module to discourage people from using that internal name
-# Instead it will become pikepdf.objects.Object
-Object.__module__ = __name__
-ObjectType.__module__ = __name__
-
-
-# type(Object) is the metaclass that pybind11 defines; we wish to extend that
-# pylint cannot see the C++ metaclass definition and is thoroughly confused.
-# pylint: disable=invalid-metaclass
-
-
-class _ObjectMeta(type(Object)): # type: ignore
- """Support instance checking."""
-
- def __instancecheck__(self, instance: Any) -> bool:
- # Note: since this class is a metaclass, self is a class object
- if type(instance) != Object:
- return False
- return self.object_type == instance._type_code
-
-
-class _NameObjectMeta(_ObjectMeta):
- """Support usage pikepdf.Name.Whatever -> Name('/Whatever')."""
-
- def __getattr__(self, attr: str) -> Any:
- if attr.startswith('_') or attr == 'object_type':
- return getattr(_ObjectMeta, attr)
- return Name('/' + attr)
-
- def __setattr__(self, attr: str, value: Any) -> None:
- # No need for a symmetric .startswith('_'). To prevent user error, we
- # simply don't allow mucking with the pikepdf.Name class's attributes.
- # There is no reason to ever assign to them.
- raise AttributeError(
- "Attributes may not be set on pikepdf.Name. Perhaps you meant to "
- "modify a Dictionary rather than a Name?"
- )
-
- def __getitem__(self, item: str) -> Name:
- if item.startswith('/'):
- item = item[1:]
- raise TypeError(
- "pikepdf.Name is not subscriptable. You probably meant:\n"
- f" pikepdf.Name.{item}\n"
- "or\n"
- f" pikepdf.Name('/{item}')\n"
- )
-
-
-class Name(Object, metaclass=_NameObjectMeta):
- """Construct a PDF Name object.
-
- Names can be constructed with two notations:
-
- 1. ``Name.Resources``
-
- 2. ``Name('/Resources')``
-
- The two are semantically equivalent. The former is preferred for names
- that are normally expected to be in a PDF. The latter is preferred for
- dynamic names and attributes.
- """
-
- object_type = ObjectType.name_
-
- def __new__(cls, name: str | Name) -> Name:
- """Construct a PDF Name."""
- # QPDF_Name::unparse ensures that names are always saved in a UTF-8
- # compatible way, so we only need to guard the input.
- if isinstance(name, bytes):
- raise TypeError("Name should be str")
- if isinstance(name, Name):
- return name # Names are immutable so we can return a reference
- return _qpdf._new_name(name)
-
- @classmethod
- def random(cls, len_: int = 16, prefix: str = '') -> Name:
- """Generate a cryptographically strong random, valid PDF Name.
-
- This function uses Python's secrets.token_urlsafe, which returns a
- URL-safe encoded random number of the desired length. An optional
- *prefix* may be prepended. (The encoding is ultimately done with
- :func:`base64.urlsafe_b64encode`.) Serendipitously, URL-safe is also
- PDF-safe.
-
- When the length parameter is 16 (16 random bytes or 128 bits), the result
- is probably globally unique and can be treated as never colliding with
- other names.
-
- The length of the string may vary because it is encoded.
- """
- random_string = token_urlsafe(len_)
- return _qpdf._new_name(f"/{prefix}{random_string}")
-
-
-class Operator(Object, metaclass=_ObjectMeta):
- """Construct an operator for use in a content stream.
-
- An Operator is one of a limited set of commands that can appear in PDF content
- streams (roughly the mini-language that draws objects, lines and text on a
- virtual PDF canvas). The commands :func:`parse_content_stream` and
- :func:`unparse_content_stream` create and expect Operators respectively, along
- with their operands.
-
- pikepdf uses the special Operator "INLINE IMAGE" to denote an inline image
- in a content stream.
- """
-
- object_type = ObjectType.operator
-
- def __new__(cls, name: str) -> Operator:
- """Construct an operator."""
- return cast('Operator', _qpdf._new_operator(name))
-
-
-class String(Object, metaclass=_ObjectMeta):
- """Construct a PDF String object."""
-
- object_type = ObjectType.string
-
- def __new__(cls, s: str | bytes) -> String:
- """
- Construct a PDF String.
-
- Args:
- s: The string to use. String will be encoded for
- PDF, bytes will be constructed without encoding.
-
- Return type:
- pikepdf.Object
- """
- if isinstance(s, bytes):
- return _qpdf._new_string(s)
- return _qpdf._new_string_utf8(s)
-
-
-class Array(Object, metaclass=_ObjectMeta):
- """Construct a PDF Array object."""
-
- object_type = ObjectType.array
-
- def __new__(cls, a: Iterable | Rectangle | None = None) -> Array:
- """
- Construct a PDF Array.
-
- Args:
- a: An iterable of objects. All objects must be either
- `pikepdf.Object` or convertible to `pikepdf.Object`.
-
- Return type:
- pikepdf.Array
- """
- if isinstance(a, (str, bytes)):
- raise TypeError('Strings cannot be converted to arrays of chars')
-
- if a is None:
- a = []
- elif isinstance(a, Rectangle):
- return a.as_array()
- elif isinstance(a, Array):
- return cast(Array, a.__copy__())
- return _qpdf._new_array(a)
-
-
-class Dictionary(Object, metaclass=_ObjectMeta):
- """Construct a PDF Dictionary object."""
-
- object_type = ObjectType.dictionary
-
- def __new__(cls, d: Mapping | None = None, **kwargs) -> Dictionary:
- """
- Construct a PDF Dictionary.
-
- Works from either a Python ``dict`` or keyword arguments.
-
- These two examples are equivalent:
-
- .. code-block:: python
-
- pikepdf.Dictionary({'/NameOne': 1, '/NameTwo': 'Two'})
-
- pikepdf.Dictionary(NameOne=1, NameTwo='Two')
-
- In either case, the keys must be strings, and the strings
- correspond to the desired Names in the PDF Dictionary. The values
- must all be convertible to `pikepdf.Object`.
-
- Return type:
- pikepdf.Dictionary
- """
- if kwargs and d is not None:
- raise ValueError('Cannot use both a mapping object and keyword args')
- if kwargs:
- # Add leading slash
- # Allows Dictionary(MediaBox=(0,0,1,1), Type=Name('/Page')...
- return _qpdf._new_dictionary({('/' + k): v for k, v in kwargs.items()})
- if isinstance(d, Dictionary):
- # Already a dictionary
- return d.__copy__()
- if not d:
- d = {}
- if d and any(key == '/' or not key.startswith('/') for key in d.keys()):
- raise KeyError("Dictionary created from strings must begin with '/'")
- return _qpdf._new_dictionary(d)
-
-
-class Stream(Object, metaclass=_ObjectMeta):
- """Construct a PDF Stream object."""
-
- object_type = ObjectType.stream
-
- def __new__(cls, owner: Pdf, data: bytes | None = None, d=None, **kwargs) -> Stream:
- """
- Create a new stream object.
-
- Streams stores arbitrary binary data and may or may not be compressed.
- It also may or may not be a page or Form XObject's content stream.
-
- A stream dictionary is like a pikepdf.Dictionary or Python dict, except
- it has a binary payload of data attached. The dictionary describes
- how the data is compressed or encoded.
-
- The dictionary may be initialized just like pikepdf.Dictionary is initialized,
- using a mapping object or keyword arguments.
-
- Args:
- owner: The Pdf to which this stream shall be attached.
- data: The data bytes for the stream.
- d: An optional mapping object that will be used to construct the stream's
- dictionary.
- kwargs: Keyword arguments that will define the stream dictionary. Do not set
- /Length here as pikepdf will manage this value. Set /Filter
- if the data is already encoded in some format.
-
- Examples:
- Using kwargs:
- >>> s1 = pikepdf.Stream(
- pdf,
- b"uncompressed image data",
- BitsPerComponent=8,
- ColorSpace=Name.DeviceRGB,
- ...
- )
- Using dict:
- >>> d = pikepdf.Dictionary(...)
- >>> s2 = pikepdf.Stream(
- pdf,
- b"data",
- d
- )
-
- .. versionchanged:: 2.2
- Support creation of ``pikepdf.Stream`` from existing dictionary.
-
- .. versionchanged:: 3.0
- Deprecated ``obj`` argument was removed; use ``data``.
- """
- if data is None:
- raise TypeError("Must make Stream from binary data")
-
- stream_dict = None
- if d or kwargs:
- stream_dict = Dictionary(d, **kwargs)
-
- stream = _qpdf._new_stream(owner, data)
- if stream_dict:
- stream.stream_dict = stream_dict
- return stream
diff --git a/env/lib/python3.10/site-packages/pikepdf/py.typed b/env/lib/python3.10/site-packages/pikepdf/py.typed
deleted file mode 100644
index 2f90bdd..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/py.typed
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-# pikepdf is typed
diff --git a/env/lib/python3.10/site-packages/pikepdf/settings.py b/env/lib/python3.10/site-packages/pikepdf/settings.py
deleted file mode 100644
index 2e0d058..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/settings.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""pikepdf global settings."""
-
-from __future__ import annotations
-
-from ._qpdf import (
- get_decimal_precision,
- set_decimal_precision,
- set_flate_compression_level,
-)
-
-__all__ = [
- 'get_decimal_precision',
- 'set_decimal_precision',
- 'set_flate_compression_level',
-]