aboutsummaryrefslogtreecommitdiffstats
path: root/env/lib/python3.10/site-packages/pikepdf/_methods.py
diff options
context:
space:
mode:
authorLibravatarLibravatar Biswakalyan Bhuyan <biswa@surgot.in> 2022-11-13 23:46:45 +0530
committerLibravatarLibravatar Biswakalyan Bhuyan <biswa@surgot.in> 2022-11-13 23:46:45 +0530
commit9468226a9e2e2ab8cdd599f1d8538e860ca86120 (patch)
tree0a77ada226d6db80639f96b438bf83e4e756edb5 /env/lib/python3.10/site-packages/pikepdf/_methods.py
downloadidcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.tar.gz
idcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.tar.bz2
idcard-9468226a9e2e2ab8cdd599f1d8538e860ca86120.zip
id card generator
Diffstat (limited to 'env/lib/python3.10/site-packages/pikepdf/_methods.py')
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_methods.py1340
1 files changed, 1340 insertions, 0 deletions
diff --git a/env/lib/python3.10/site-packages/pikepdf/_methods.py b/env/lib/python3.10/site-packages/pikepdf/_methods.py
new file mode 100644
index 0000000..25e1d95
--- /dev/null
+++ b/env/lib/python3.10/site-packages/pikepdf/_methods.py
@@ -0,0 +1,1340 @@
+# SPDX-FileCopyrightText: 2022 James R. Barlow
+# SPDX-License-Identifier: MPL-2.0
+
+"""Implement some features in Python and monkey-patch them onto C++ classes.
+
+In several cases the implementation of some higher levels features might as
+well be in Python. Fortunately we can attach Python methods to C++ class
+bindings after the fact.
+
+We can also move the implementation to C++ if desired.
+"""
+
+from __future__ import annotations
+
+import datetime
+import mimetypes
+import shutil
+from collections.abc import KeysView, MutableMapping
+from decimal import Decimal
+from io import BytesIO
+from pathlib import Path
+from subprocess import run
+from tempfile import NamedTemporaryFile
+from typing import BinaryIO, Callable, ItemsView, Iterator, TypeVar, ValuesView
+from warnings import warn
+
+from . import Array, Dictionary, Name, Object, Page, Pdf, Stream
+from ._augments import augment_override_cpp, augments
+from ._qpdf import (
+ AccessMode,
+ AttachedFile,
+ AttachedFileSpec,
+ Attachments,
+ NameTree,
+ NumberTree,
+ ObjectStreamMode,
+ Rectangle,
+ StreamDecodeLevel,
+ StreamParser,
+ Token,
+ _ObjectMapping,
+)
+from .models import Encryption, EncryptionInfo, Outline, PdfMetadata, Permissions
+from .models.metadata import decode_pdf_date, encode_pdf_date
+
+# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
+# mypy: ignore-errors
+
+__all__ = []
+
+Numeric = TypeVar('Numeric', int, float, Decimal)
+
+
+def _single_page_pdf(page) -> bytes:
+ """Construct a single page PDF from the provided page in memory."""
+ pdf = Pdf.new()
+ pdf.pages.append(page)
+ bio = BytesIO()
+ pdf.save(bio)
+ bio.seek(0)
+ return bio.read()
+
+
+def _mudraw(buffer, fmt) -> bytes:
+ """Use mupdf draw to rasterize the PDF in the memory buffer."""
+ # mudraw cannot read from stdin so NamedTemporaryFile is required
+ with NamedTemporaryFile(suffix='.pdf') as tmp_in:
+ tmp_in.write(buffer)
+ tmp_in.seek(0)
+ tmp_in.flush()
+
+ proc = run(
+ ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
+ capture_output=True,
+ check=True,
+ )
+ return proc.stdout
+
+
+@augments(Object)
+class Extend_Object:
+ def _ipython_key_completions_(self):
+ if isinstance(self, (Dictionary, Stream)):
+ return self.keys()
+ return None
+
+ def emplace(self, other: Object, retain=(Name.Parent,)):
+ """Copy all items from other without making a new object.
+
+ Particularly when working with pages, it may be desirable to remove all
+ of the existing page's contents and emplace (insert) a new page on top
+ of it, in a way that preserves all links and references to the original
+ page. (Or similarly, for other Dictionary objects in a PDF.)
+
+ Any Dictionary keys in the iterable *retain* are preserved. By default,
+ /Parent is retained.
+
+ When a page is assigned (``pdf.pages[0] = new_page``), only the
+ application knows if references to the original the original page are
+ still valid. For example, a PDF optimizer might restructure a page
+ object into another visually similar one, and references would be valid;
+ but for a program that reorganizes page contents such as a N-up
+ compositor, references may not be valid anymore.
+
+ This method takes precautions to ensure that child objects in common
+ with ``self`` and ``other`` are not inadvertently deleted.
+
+ Example:
+ >>> pdf.pages[0].objgen
+ (16, 0)
+ >>> pdf.pages[0].emplace(pdf.pages[1])
+ >>> pdf.pages[0].objgen
+ (16, 0) # Same object
+
+ .. versionchanged:: 2.11.1
+ Added the *retain* argument.
+ """
+ if not self.same_owner_as(other):
+ raise TypeError("Objects must have the same owner for emplace()")
+
+ # .keys() returns strings, so make all strings
+ retain = {str(k) for k in retain}
+ self_keys = set(self.keys())
+ other_keys = set(other.keys())
+
+ assert all(isinstance(k, str) for k in (retain | self_keys | other_keys))
+
+ del_keys = self_keys - other_keys - retain
+ for k in (k for k in other_keys if k not in retain):
+ self[k] = other[k] # pylint: disable=unsupported-assignment-operation
+ for k in del_keys:
+ del self[k] # pylint: disable=unsupported-delete-operation
+
+ def _type_check_write(self, filter_, decode_parms):
+ if isinstance(filter_, list):
+ filter_ = Array(filter_)
+ filter_ = filter_.wrap_in_array()
+
+ if isinstance(decode_parms, list):
+ decode_parms = Array(decode_parms)
+ elif decode_parms is None:
+ decode_parms = Array([])
+ else:
+ decode_parms = decode_parms.wrap_in_array()
+
+ if not all(isinstance(item, Name) for item in filter_):
+ raise TypeError(
+ "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
+ )
+ if not all(
+ (isinstance(item, Dictionary) or item is None) for item in decode_parms
+ ):
+ raise TypeError(
+ "decode_parms must be: pikepdf.Dictionary or "
+ "pikepdf.Array([pikepdf.Dictionary])"
+ )
+ if len(decode_parms) != 0 and len(filter_) != len(decode_parms):
+ raise ValueError(
+ f"filter ({repr(filter_)}) and decode_parms "
+ f"({repr(decode_parms)}) must be arrays of same length"
+ )
+ if len(filter_) == 1:
+ filter_ = filter_[0]
+ if len(decode_parms) == 0:
+ decode_parms = None
+ elif len(decode_parms) == 1:
+ decode_parms = decode_parms[0]
+ return filter_, decode_parms
+
+ def write(
+ self,
+ data: bytes,
+ *,
+ filter: Name | Array | None = None,
+ decode_parms: Dictionary | Array | None = None,
+ type_check: bool = True,
+ ): # pylint: disable=redefined-builtin
+ """
+ Replace stream object's data with new (possibly compressed) `data`.
+
+ `filter` and `decode_parms` describe any compression that is already
+ present on the input `data`. For example, if your data is already
+ compressed with the Deflate algorithm, you would set
+ ``filter=Name.FlateDecode``.
+
+ When writing the PDF in :meth:`pikepdf.Pdf.save`,
+ pikepdf may change the compression or apply compression to data that was
+ not compressed, depending on the parameters given to that function. It
+ will never change lossless to lossy encoding.
+
+ PNG and TIFF images, even if compressed, cannot be directly inserted
+ into a PDF and displayed as images.
+
+ Args:
+ data: the new data to use for replacement
+ filter: The filter(s) with which the
+ data is (already) encoded
+ decode_parms: Parameters for the
+ filters with which the object is encode
+ type_check: Check arguments; use False only if you want to
+ intentionally create malformed PDFs.
+
+ If only one `filter` is specified, it may be a name such as
+ `Name('/FlateDecode')`. If there are multiple filters, then array
+ of names should be given.
+
+ If there is only one filter, `decode_parms` is a Dictionary of
+ parameters for that filter. If there are multiple filters, then
+ `decode_parms` is an Array of Dictionary, where each array index
+ is corresponds to the filter.
+ """
+ if type_check and filter is not None:
+ filter, decode_parms = self._type_check_write(filter, decode_parms)
+
+ self._write(data, filter=filter, decode_parms=decode_parms)
+
+
+@augments(Pdf)
+class Extend_Pdf:
+ def _repr_mimebundle_(
+ self, include=None, exclude=None
+ ): # pylint: disable=unused-argument
+ """
+ Present options to IPython or Jupyter for rich display of this object.
+
+ See https://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display
+ """
+ bio = BytesIO()
+ self.save(bio)
+ bio.seek(0)
+
+ data = {'application/pdf': bio.read()}
+ return data
+
+ @property
+ def docinfo(self) -> Dictionary:
+ """
+ Access the (deprecated) document information dictionary.
+
+ The document information dictionary is a brief metadata record that can
+ store some information about the origin of a PDF. It is deprecated and
+ removed in the PDF 2.0 specification (not deprecated from the
+ perspective of pikepdf). Use the ``.open_metadata()`` API instead, which
+ will edit the modern (and unfortunately, more complicated) XMP metadata
+ object and synchronize changes to the document information dictionary.
+
+ This property simplifies access to the actual document information
+ dictionary and ensures that it is created correctly if it needs to be
+ created.
+
+ A new, empty dictionary will be created if this property is accessed
+ and dictionary does not exist. (This is to ensure that convenient code
+ like ``pdf.docinfo[Name.Title] = "Title"`` will work when the dictionary
+ does not exist at all.)
+
+ You can delete the document information dictionary by deleting this property,
+ ``del pdf.docinfo``. Note that accessing the property after deleting it
+ will re-create with a new, empty dictionary.
+
+ .. versionchanged: 2.4
+ Added support for ``del pdf.docinfo``.
+ """
+ if Name.Info not in self.trailer:
+ self.trailer.Info = self.make_indirect(Dictionary())
+ return self.trailer.Info
+
+ @docinfo.setter
+ def docinfo(self, new_docinfo: Dictionary):
+ if not new_docinfo.is_indirect:
+ raise ValueError(
+ "docinfo must be an indirect object - use Pdf.make_indirect"
+ )
+ self.trailer.Info = new_docinfo
+
+ @docinfo.deleter
+ def docinfo(self):
+ if Name.Info in self.trailer:
+ del self.trailer.Info
+
+ def open_metadata(
+ self,
+ set_pikepdf_as_editor: bool = True,
+ update_docinfo: bool = True,
+ strict: bool = False,
+ ) -> PdfMetadata:
+ """
+ Open the PDF's XMP metadata for editing.
+
+ There is no ``.close()`` function on the metadata object, since this is
+ intended to be used inside a ``with`` block only.
+
+ For historical reasons, certain parts of PDF metadata are stored in
+ two different locations and formats. This feature coordinates edits so
+ that both types of metadata are updated consistently and "atomically"
+ (assuming single threaded access). It operates on the ``Pdf`` in memory,
+ not any file on disk. To persist metadata changes, you must still use
+ ``Pdf.save()``.
+
+ Example:
+ >>> with pdf.open_metadata() as meta:
+ meta['dc:title'] = 'Set the Dublic Core Title'
+ meta['dc:description'] = 'Put the Abstract here'
+
+ Args:
+ set_pikepdf_as_editor: Automatically update the metadata ``pdf:Producer``
+ to show that this version of pikepdf is the most recent software to
+ modify the metadata, and ``xmp:MetadataDate`` to timestamp the update.
+ Recommended, except for testing.
+
+ update_docinfo: Update the standard fields of DocumentInfo
+ (the old PDF metadata dictionary) to match the corresponding
+ XMP fields. The mapping is described in
+ :attr:`PdfMetadata.DOCINFO_MAPPING`. Nonstandard DocumentInfo
+ fields and XMP metadata fields with no DocumentInfo equivalent
+ are ignored.
+
+ strict: If ``False`` (the default), we aggressively attempt
+ to recover from any parse errors in XMP, and if that fails we
+ overwrite the XMP with an empty XMP record. If ``True``, raise
+ errors when either metadata bytes are not valid and well-formed
+ XMP (and thus, XML). Some trivial cases that are equivalent to
+ empty or incomplete "XMP skeletons" are never treated as errors,
+ and always replaced with a proper empty XMP block. Certain
+ errors may be logged.
+ """
+ return PdfMetadata(
+ self,
+ pikepdf_mark=set_pikepdf_as_editor,
+ sync_docinfo=update_docinfo,
+ overwrite_invalid_xml=not strict,
+ )
+
+ def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline:
+ """
+ Open the PDF outline ("bookmarks") for editing.
+
+ Recommend for use in a ``with`` block. Changes are committed to the
+ PDF when the block exits. (The ``Pdf`` must still be opened.)
+
+ Example:
+ >>> with pdf.open_outline() as outline:
+ outline.root.insert(0, OutlineItem('Intro', 0))
+
+ Args:
+ max_depth: Maximum recursion depth of the outline to be
+ imported and re-written to the document. ``0`` means only
+ considering the root level, ``1`` the first-level
+ sub-outline of each root element, and so on. Items beyond
+ this depth will be silently ignored. Default is ``15``.
+ strict: With the default behavior (set to ``False``),
+ structural errors (e.g. reference loops) in the PDF document
+ will only cancel processing further nodes on that particular
+ level, recovering the valid parts of the document outline
+ without raising an exception. When set to ``True``, any such
+ error will raise an ``OutlineStructureError``, leaving the
+ invalid parts in place.
+ Similarly, outline objects that have been accidentally
+ duplicated in the ``Outline`` container will be silently
+ fixed (i.e. reproduced as new objects) or raise an
+ ``OutlineStructureError``.
+ """
+ return Outline(self, max_depth=max_depth, strict=strict)
+
+ def make_stream(self, data: bytes, d=None, **kwargs) -> Stream:
+ """
+ Create a new pikepdf.Stream object that is attached to this PDF.
+
+ See:
+ :meth:`pikepdf.Stream.__new__`
+
+ """
+ return Stream(self, data, d, **kwargs)
+
+ def add_blank_page(
+ self, *, page_size: tuple[Numeric, Numeric] = (612.0, 792.0)
+ ) -> Page:
+ """
+ Add a blank page to this PDF.
+
+ If pages already exist, the page will be added to the end. Pages may be
+ reordered using ``Pdf.pages``.
+
+ The caller may add content to the page by modifying its objects after creating
+ it.
+
+ Args:
+ page_size (tuple): The size of the page in PDF units (1/72 inch or 0.35mm).
+ Default size is set to a US Letter 8.5" x 11" page.
+ """
+ for dim in page_size:
+ if not (3 <= dim <= 14400):
+ raise ValueError('Page size must be between 3 and 14400 PDF units')
+
+ page_dict = Dictionary(
+ Type=Name.Page,
+ MediaBox=Array([0, 0, page_size[0], page_size[1]]),
+ Contents=self.make_stream(b''),
+ Resources=Dictionary(),
+ )
+ page_obj = self.make_indirect(page_dict)
+ self._add_page(page_obj, first=False)
+ return Page(page_obj)
+
+ def close(self) -> None:
+ """
+ Close a ``Pdf`` object and release resources acquired by pikepdf.
+
+ If pikepdf opened the file handle it will close it (e.g. when opened with a file
+ path). If the caller opened the file for pikepdf, the caller close the file.
+ ``with`` blocks will call close when exit.
+
+ pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
+ implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
+ case for :class:`pikepdf.Stream` but can be true for any object. Do not close
+ the `Pdf` object if you might still be accessing content from it.
+
+ When an ``Object`` is copied from one ``Pdf`` to another, the ``Object`` is copied into
+ the destination ``Pdf`` immediately, so after accessing all desired information
+ from the source ``Pdf`` it may be closed.
+
+ .. versionchanged:: 3.0
+ In pikepdf 2.x, this function actually worked by resetting to a very short
+ empty PDF. Code that relied on this quirk may not function correctly.
+ """
+ self._close()
+ if getattr(self, '_tmp_stream', None):
+ self._tmp_stream.close()
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
+ @property
+ def allow(self) -> Permissions:
+ """
+ Report permissions associated with this PDF.
+
+ By default these permissions will be replicated when the PDF is
+ saved. Permissions may also only be changed when a PDF is being saved,
+ and are only available for encrypted PDFs. If a PDF is not encrypted,
+ all operations are reported as allowed.
+
+ pikepdf has no way of enforcing permissions.
+ """
+ results = {}
+ for field in Permissions._fields:
+ results[field] = getattr(self, '_allow_' + field)
+ return Permissions(**results)
+
+ @property
+ def encryption(self) -> EncryptionInfo:
+ """
+ Report encryption information for this PDF.
+
+ Encryption settings may only be changed when a PDF is saved.
+ """
+ return EncryptionInfo(self._encryption_data)
+
+ def check(self) -> list[str]:
+ """
+ Check if PDF is well-formed.
+
+ Similar to ``qpdf --check``.
+ """
+
+ class DiscardingParser(StreamParser):
+ def __init__(self): # pylint: disable=useless-super-delegation
+ super().__init__() # required for C++
+
+ def handle_object(self, *_args):
+ pass
+
+ def handle_eof(self):
+ pass
+
+ problems: list[str] = []
+
+ self._decode_all_streams_and_discard()
+
+ discarding_parser = DiscardingParser()
+ for page in self.pages:
+ page.parse_contents(discarding_parser)
+
+ for warning in self.get_warnings():
+ problems.append("WARNING: " + warning)
+
+ return problems
+
+ def save(
+ self,
+ filename_or_stream: Path | str | BinaryIO | None = None,
+ *,
+ static_id: bool = False,
+ preserve_pdfa: bool = True,
+ min_version: str | tuple[str, int] = "",
+ force_version: str | tuple[str, int] = "",
+ fix_metadata_version: bool = True,
+ compress_streams: bool = True,
+ stream_decode_level: StreamDecodeLevel | None = None,
+ object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
+ normalize_content: bool = False,
+ linearize: bool = False,
+ qdf: bool = False,
+ progress: Callable[[int], None] = None,
+ encryption: Encryption | bool | None = None,
+ recompress_flate: bool = False,
+ deterministic_id: bool = False,
+ ) -> None:
+ """
+ Save all modifications to this :class:`pikepdf.Pdf`.
+
+ Args:
+ filename_or_stream: Where to write the output. If a file
+ exists in this location it will be overwritten.
+ If the file was opened with ``allow_overwriting_input=True``,
+ then it is permitted to overwrite the original file, and
+ this parameter may be omitted to implicitly use the original
+ filename. Otherwise, the filename may not be the same as the
+ input file, as overwriting the input file would corrupt data
+ since pikepdf using lazy loading.
+
+ static_id: Indicates that the ``/ID`` metadata, normally
+ calculated as a hash of certain PDF contents and metadata
+ including the current time, should instead be set to a static
+ value. Only use this for debugging and testing. Use
+ ``deterministic_id`` if you want to get the same ``/ID`` for
+ the same document contents.
+ preserve_pdfa: Ensures that the file is generated in a
+ manner compliant with PDF/A and other stricter variants.
+ This should be True, the default, in most cases.
+
+ min_version: Sets the minimum version of PDF
+ specification that should be required. If left alone QPDF
+ will decide. If a tuple, the second element is an integer, the
+ extension level. If the version number is not a valid format,
+ QPDF will decide what to do.
+ force_version: Override the version recommend by QPDF,
+ potentially creating an invalid file that does not display
+ in old versions. See QPDF manual for details. If a tuple, the
+ second element is an integer, the extension level.
+ fix_metadata_version: If ``True`` (default) and the XMP metadata
+ contains the optional PDF version field, ensure the version in
+ metadata is correct. If the XMP metadata does not contain a PDF
+ version field, none will be added. To ensure that the field is
+ added, edit the metadata and insert a placeholder value in
+ ``pdf:PDFVersion``. If XMP metadata does not exist, it will
+ not be created regardless of the value of this argument.
+
+ object_stream_mode:
+ ``disable`` prevents the use of object streams.
+ ``preserve`` keeps object streams from the input file.
+ ``generate`` uses object streams wherever possible,
+ creating the smallest files but requiring PDF 1.5+.
+
+ compress_streams: Enables or disables the compression of
+ stream objects in the PDF that are created without specifying
+ any compression setting. Metadata is never compressed.
+ By default this is set to ``True``, and should be except
+ for debugging. Existing streams in the PDF or streams will not
+ be modified. To decompress existing streams, you must set
+ both ``compress_streams=False`` and ``stream_decode_level``
+ to the desired decode level (e.g. ``.generalized`` will
+ decompress most non-image content).
+
+ stream_decode_level: Specifies how
+ to encode stream objects. See documentation for
+ :class:`pikepdf.StreamDecodeLevel`.
+
+ recompress_flate: When disabled (the default), qpdf does not
+ uncompress and recompress streams compressed with the Flate
+ compression algorithm. If True, pikepdf will instruct qpdf to
+ do this, which may be useful if recompressing streams to a
+ higher compression level.
+
+ normalize_content: Enables parsing and reformatting the
+ content stream within PDFs. This may debugging PDFs easier.
+
+ linearize: Enables creating linear or "fast web view",
+ where the file's contents are organized sequentially so that
+ a viewer can begin rendering before it has the whole file.
+ As a drawback, it tends to make files larger.
+
+ qdf: Save output QDF mode. QDF mode is a special output
+ mode in QPDF to allow editing of PDFs in a text editor. Use
+ the program ``fix-qdf`` to fix convert back to a standard
+ PDF.
+
+ progress: Specify a callback function that is called
+ as the PDF is written. The function will be called with an
+ integer between 0-100 as the sole parameter, the progress
+ percentage. This function may not access or modify the PDF
+ while it is being written, or data corruption will almost
+ certainly occur.
+
+ encryption: If ``False``
+ or omitted, existing encryption will be removed. If ``True``
+ encryption settings are copied from the originating PDF.
+ Alternately, an ``Encryption`` object may be provided that
+ sets the parameters for new encryption.
+
+ deterministic_id: Indicates that the ``/ID`` metadata, normally
+ calculated as a hash of certain PDF contents and metadata
+ including the current time, should instead be computed using
+ only deterministic data like the file contents. At a small
+ runtime cost, this enables generation of the same ``/ID`` if
+ the same inputs are converted in the same way multiple times.
+ Does not work for encrypted files.
+
+ Raises:
+ PdfError
+ ForeignObjectError
+ ValueError
+
+ You may call ``.save()`` multiple times with different parameters
+ to generate different versions of a file, and you *may* continue
+ to modify the file after saving it. ``.save()`` does not modify
+ the ``Pdf`` object in memory, except possibly by updating the XMP
+ metadata version with ``fix_metadata_version``.
+
+ .. note::
+
+ :meth:`pikepdf.Pdf.remove_unreferenced_resources` before saving
+ may eliminate unnecessary resources from the output file if there
+ are any objects (such as images) that are referenced in a page's
+ Resources dictionary but never called in the page's content stream.
+
+ .. note::
+
+ pikepdf can read PDFs with incremental updates, but always
+ coalesces any incremental updates into a single non-incremental
+ PDF file when saving.
+
+ .. versionchanged:: 2.7
+ Added *recompress_flate*.
+
+ .. versionchanged:: 3.0
+ Keyword arguments now mandatory for everything except the first
+ argument.
+ """
+ if not filename_or_stream and getattr(self, '_original_filename', None):
+ filename_or_stream = self._original_filename
+ if not filename_or_stream:
+ raise ValueError(
+ "Cannot save to original filename because the original file was "
+ "not opening using Pdf.open(..., allow_overwriting_input=True). "
+ "Either specify a new destination filename/file stream or open "
+ "with allow_overwriting_input=True. If this Pdf was created using "
+ "Pdf.new(), you must specify a destination object since there is "
+ "no original filename to save to."
+ )
+ self._save(
+ filename_or_stream,
+ static_id=static_id,
+ preserve_pdfa=preserve_pdfa,
+ min_version=min_version,
+ force_version=force_version,
+ fix_metadata_version=fix_metadata_version,
+ compress_streams=compress_streams,
+ stream_decode_level=stream_decode_level,
+ object_stream_mode=object_stream_mode,
+ normalize_content=normalize_content,
+ linearize=linearize,
+ qdf=qdf,
+ progress=progress,
+ encryption=encryption,
+ samefile_check=getattr(self, '_tmp_stream', None) is None,
+ recompress_flate=recompress_flate,
+ deterministic_id=deterministic_id,
+ )
+
+ @staticmethod
+ def open(
+ filename_or_stream: Path | str | BinaryIO,
+ *,
+ password: str | bytes = "",
+ hex_password: bool = False,
+ ignore_xref_streams: bool = False,
+ suppress_warnings: bool = True,
+ attempt_recovery: bool = True,
+ inherit_page_attributes: bool = True,
+ access_mode: AccessMode = AccessMode.default,
+ allow_overwriting_input: bool = False,
+ ) -> Pdf:
+ """
+ Open an existing file at *filename_or_stream*.
+
+ If *filename_or_stream* is path-like, the file will be opened for reading.
+ The file should not be modified by another process while it is open in
+ pikepdf, or undefined behavior may occur. This is because the file may be
+ lazily loaded. Despite this restriction, pikepdf does not try to use any OS
+ services to obtain an exclusive lock on the file. Some applications may
+ want to attempt this or copy the file to a temporary location before
+ editing. This behaviour changes if *allow_overwriting_input* is set: the whole
+ file is then read and copied to memory, so that pikepdf can overwrite it
+ when calling ``.save()``.
+
+ When this function is called with a stream-like object, you must ensure
+ that the data it returns cannot be modified, or undefined behavior will
+ occur.
+
+ Any changes to the file must be persisted by using ``.save()``.
+
+ If *filename_or_stream* has ``.read()`` and ``.seek()`` methods, the file
+ will be accessed as a readable binary stream. pikepdf will read the
+ entire stream into a private buffer.
+
+ ``.open()`` may be used in a ``with``-block; ``.close()`` will be called when
+ the block exits, if applicable.
+
+ Whenever pikepdf opens a file, it will close it. If you open the file
+ for pikepdf or give it a stream-like object to read from, you must
+ release that object when appropriate.
+
+ Examples:
+ >>> with Pdf.open("test.pdf") as pdf:
+ ...
+
+ >>> pdf = Pdf.open("test.pdf", password="rosebud")
+
+ Args:
+ filename_or_stream: Filename or Python readable and seekable file
+ stream of PDF to open.
+ password: User or owner password to open an
+ encrypted PDF. If the type of this parameter is ``str``
+ it will be encoded as UTF-8. If the type is ``bytes`` it will
+ be saved verbatim. Passwords are always padded or
+ truncated to 32 bytes internally. Use ASCII passwords for
+ maximum compatibility.
+ hex_password: If True, interpret the password as a
+ hex-encoded version of the exact encryption key to use, without
+ performing the normal key computation. Useful in forensics.
+ ignore_xref_streams: If True, ignore cross-reference
+ streams. See qpdf documentation.
+ suppress_warnings: If True (default), warnings are not
+ printed to stderr. Use :meth:`pikepdf.Pdf.get_warnings()` to
+ retrieve warnings.
+ attempt_recovery: If True (default), attempt to recover
+ from PDF parsing errors.
+ inherit_page_attributes: If True (default), push attributes
+ set on a group of pages to individual pages
+ access_mode: If ``.default``, pikepdf will
+ decide how to access the file. Currently, it will always
+ selected stream access. To attempt memory mapping and fallback
+ to stream if memory mapping failed, use ``.mmap``. Use
+ ``.mmap_only`` to require memory mapping or fail
+ (this is expected to only be useful for testing). Applications
+ should be prepared to handle the SIGBUS signal on POSIX in
+ the event that the file is successfully mapped but later goes
+ away.
+ allow_overwriting_input: If True, allows calling ``.save()``
+ to overwrite the input file. This is performed by loading the
+ entire input file into memory at open time; this will use more
+ memory and may recent performance especially when the opened
+ file will not be modified.
+
+ Raises:
+ pikepdf.PasswordError: If the password failed to open the
+ file.
+ pikepdf.PdfError: If for other reasons we could not open
+ the file.
+ TypeError: If the type of ``filename_or_stream`` is not
+ usable.
+ FileNotFoundError: If the file was not found.
+
+ Note:
+ When *filename_or_stream* is a stream and the stream is located on a
+ network, pikepdf assumes that the stream using buffering and read caches
+ to achieve reasonable performance. Streams that fetch data over a network
+ in response to every read or seek request, no matter how small, will
+ perform poorly. It may be easier to download a PDF from network to
+ temporary local storage (such as ``io.BytesIO``), manipulate it, and
+ then re-upload it.
+
+ .. versionchanged:: 3.0
+ Keyword arguments now mandatory for everything except the first
+ argument.
+ """
+ if isinstance(filename_or_stream, bytes) and filename_or_stream.startswith(
+ b'%PDF-'
+ ):
+ warn(
+ "It looks like you called with Pdf.open(data) with a bytes-like object "
+ "containing a PDF. This will probably fail because this function "
+ "expects a filename or opened file-like object. Instead, please use "
+ "Pdf.open(BytesIO(data))."
+ )
+
+ tmp_stream, original_filename = None, False
+ if allow_overwriting_input:
+ try:
+ Path(filename_or_stream)
+ except TypeError as error:
+ raise ValueError(
+ '"allow_overwriting_input=True" requires "open" first argument '
+ 'to be a file path'
+ ) from error
+ original_filename = Path(filename_or_stream)
+ with open(original_filename, 'rb') as pdf_file:
+ tmp_stream = BytesIO()
+ shutil.copyfileobj(pdf_file, tmp_stream)
+ pdf = Pdf._open(
+ tmp_stream or filename_or_stream,
+ password=password,
+ hex_password=hex_password,
+ ignore_xref_streams=ignore_xref_streams,
+ suppress_warnings=suppress_warnings,
+ attempt_recovery=attempt_recovery,
+ inherit_page_attributes=inherit_page_attributes,
+ access_mode=access_mode,
+ )
+ pdf._tmp_stream = tmp_stream
+ pdf._original_filename = original_filename
+ return pdf
+
+
+@augments(_ObjectMapping)
+class Extend_ObjectMapping:
+ def get(self, key, default=None) -> Object:
+ try:
+ return self[key]
+ except KeyError:
+ return default
+
+
+def check_is_box(obj) -> None:
+ try:
+ if obj.is_rectangle:
+ return
+ except AttributeError:
+ pass
+
+ try:
+ pdfobj = Array(obj)
+ if pdfobj.is_rectangle:
+ return
+ except Exception as e:
+ raise ValueError("object is not a rectangle") from e
+
+ raise ValueError("object is not a rectangle")
+
+
+@augments(Page)
+class Extend_Page:
+ @property
+ def mediabox(self):
+ """Return page's /MediaBox, in PDF units."""
+ return self._get_mediabox(True)
+
+ @mediabox.setter
+ def mediabox(self, value):
+ check_is_box(value)
+ self.obj['/MediaBox'] = value
+
+ @property
+ def cropbox(self):
+ """Return page's effective /CropBox, in PDF units.
+
+ If the /CropBox is not defined, the /MediaBox is returned.
+ """
+ return self._get_cropbox(True, False)
+
+ @cropbox.setter
+ def cropbox(self, value):
+ check_is_box(value)
+ self.obj['/CropBox'] = value
+
+ @property
+ def trimbox(self):
+ """Return page's effective /TrimBox, in PDF units.
+
+ If the /TrimBox is not defined, the /CropBox is returned (and if
+ /CropBox is not defined, /MediaBox is returned).
+ """
+ return self._get_trimbox(True, False)
+
+ @trimbox.setter
+ def trimbox(self, value):
+ check_is_box(value)
+ self.obj['/TrimBox'] = value
+
+ @property
+ def images(self) -> _ObjectMapping:
+ """Return all regular images associated with this page.
+
+ This method does not recurse into Form XObjects and does not
+ attempt to find inline images.
+ """
+ return self._images
+
+ @property
+ def resources(self) -> Dictionary:
+ """Return this page's resources dictionary."""
+ return self.obj['/Resources']
+
+ def add_resource(
+ self,
+ res: Object,
+ res_type: Name,
+ name: Name | None = None,
+ *,
+ prefix: str = '',
+ replace_existing: bool = True,
+ ) -> Name:
+ """Add a new resource to the page's Resources dictionary.
+
+ If the Resources dictionaries do not exist, they will be created.
+
+ Args:
+ self: The object to add to the resources dictionary.
+ res: The dictionary object to insert into the resources
+ dictionary.
+ res_type: Should be one of the following Resource dictionary types:
+ ExtGState, ColorSpace, Pattern, Shading, XObject, Font, Properties.
+ name: The name of the object. If omitted, a random name will be
+ generated with enough randomness to be globally unique.
+ prefix: A prefix for the name of the object. Allows conveniently
+ namespacing when using random names, e.g. prefix="Im" for images.
+ Mutually exclusive with name parameter.
+ replace_existing: If the name already exists in one of the resource
+ dictionaries, remove it.
+
+ Example:
+ >>> resource_name = pdf.pages[0].add_resource(formxobj, Name.XObject)
+
+ .. versionadded:: 2.3
+
+ .. versionchanged:: 2.14
+ If *res* does not belong to the same `Pdf` that owns this page,
+ a copy of *res* is automatically created and added instead. In previous
+ versions, it was necessary to change for this case manually.
+
+ .. versionchanged:: 4.3.0
+ Returns the name of the overlay in the resources dictionary instead
+ of returning None.
+ """
+ if Name.Resources not in self.obj:
+ self.obj.Resources = Dictionary()
+ elif not isinstance(self.obj.Resources, Dictionary):
+ raise TypeError("Page /Resources exists but is not a dictionary")
+ resources = self.obj.Resources
+
+ if res_type not in resources:
+ resources[res_type] = Dictionary()
+
+ if name is not None and prefix:
+ raise ValueError("Must specify one of name= or prefix=")
+ if name is None:
+ name = Name.random(prefix=prefix)
+
+ for res_dict in resources.as_dict().values():
+ if not isinstance(res_dict, Dictionary):
+ continue
+ if name in res_dict:
+ if replace_existing:
+ del res_dict[name]
+ else:
+ raise ValueError(f"Name {name} already exists in page /Resources")
+
+ resources[res_type][name] = res.with_same_owner_as(self.obj)
+ return name
+
+ def _over_underlay(
+ self,
+ other,
+ rect: Rectangle | None,
+ under: bool,
+ push_stack: bool,
+ shrink: bool,
+ expand: bool,
+ ) -> Name:
+ formx = None
+ if isinstance(other, Page):
+ formx = other.as_form_xobject()
+ elif isinstance(other, Dictionary) and other.get(Name.Type) == Name.Page:
+ formx = Page(other).as_form_xobject()
+ elif (
+ isinstance(other, Stream)
+ and other.get(Name.Type) == Name.XObject
+ and other.get(Name.Subtype) == Name.Form
+ ):
+ formx = other
+
+ if formx is None:
+ raise TypeError(
+ "other object is not something we can convert to Form XObject"
+ )
+
+ if rect is None:
+ rect = Rectangle(self.trimbox)
+
+ formx_placed_name = self.add_resource(formx, Name.XObject)
+ cs = self.calc_form_xobject_placement(
+ formx, formx_placed_name, rect, allow_shrink=shrink, allow_expand=expand
+ )
+
+ if push_stack:
+ self.contents_add(b'q\n', prepend=True) # prepend q
+ self.contents_add(b'Q\n', prepend=False) # i.e. append Q
+
+ self.contents_add(cs, prepend=under)
+ self.contents_coalesce()
+ return formx_placed_name
+
+ def add_overlay(
+ self,
+ other: Object | Page,
+ rect: Rectangle | None = None,
+ *,
+ push_stack: bool = True,
+ shrink: bool = True,
+ expand: bool = True,
+ ) -> Name:
+ """Overlay another object on this page.
+
+ Overlays will be drawn after all previous content, potentially drawing on top
+ of existing content.
+
+ Args:
+ other: A Page or Form XObject to render as an overlay on top of this
+ page.
+ rect: The PDF rectangle (in PDF units) in which to draw the overlay.
+ If omitted, this page's trimbox, cropbox or mediabox (in that order)
+ will be used.
+ push_stack: If True (default), push the graphics stack of the existing
+ content stream to ensure that the overlay is rendered correctly.
+ Officially PDF limits the graphics stack depth to 32. Most
+ viewers will tolerate more, but excessive pushes may cause problems.
+ Multiple content streams may also be coalesced into a single content
+ stream where this parameter is True, since the PDF specification
+ permits PDF writers to coalesce streams as they see fit.
+ shrink: If True (default), allow the object to shrink to fit inside the
+ rectangle. The aspect ratio will be preserved.
+ expand: If True (default), allow the object to expand to fit inside the
+ rectangle. The aspect ratio will be preserved.
+
+ Returns:
+ The name of the Form XObject that contains the overlay.
+
+ .. versionadded:: 2.14
+
+ .. versionchanged:: 4.0.0
+ Added the *push_stack* parameter. Previously, this method behaved
+ as if *push_stack* were False.
+
+ .. versionchanged:: 4.2.0
+ Added the *shrink* and *expand* parameters. Previously, this method
+ behaved as if ``shrink=True, expand=False``.
+
+ .. versionchanged:: 4.3.0
+ Returns the name of the overlay in the resources dictionary instead
+ of returning None.
+ """
+ return self._over_underlay(
+ other,
+ rect,
+ under=False,
+ push_stack=push_stack,
+ expand=expand,
+ shrink=shrink,
+ )
+
+ def add_underlay(
+ self,
+ other: Object | Page,
+ rect: Rectangle | None = None,
+ *,
+ shrink: bool = True,
+ expand: bool = True,
+ ) -> Name:
+ """Underlay another object beneath this page.
+
+ Underlays will be drawn before all other content, so they may be overdrawn
+ partially or completely.
+
+ There is no *push_stack* parameter for this function, since adding an
+ underlay can be done without manipulating the graphics stack.
+
+ Args:
+ other: A Page or Form XObject to render as an underlay underneath this
+ page.
+ rect: The PDF rectangle (in PDF units) in which to draw the underlay.
+ If omitted, this page's trimbox, cropbox or mediabox (in that order)
+ will be used.
+ shrink: If True (default), allow the object to shrink to fit inside the
+ rectangle. The aspect ratio will be preserved.
+ expand: If True (default), allow the object to expand to fit inside the
+ rectangle. The aspect ratio will be preserved.
+
+ Returns:
+ The name of the Form XObject that contains the underlay.
+
+ .. versionadded:: 2.14
+
+ .. versionchanged:: 4.2.0
+ Added the *shrink* and *expand* parameters. Previously, this method
+ behaved as if ``shrink=True, expand=False``. Fixed issue with wrong
+ page rect being selected.
+ """
+ return self._over_underlay(
+ other, rect, under=True, push_stack=False, expand=expand, shrink=shrink
+ )
+
+ def contents_add(self, contents: Stream | bytes, *, prepend: bool = False):
+ """Append or prepend to an existing page's content stream.
+
+ Args:
+ contents: An existing content stream to append or prepend.
+ prepend: Prepend if true, append if false (default).
+
+ .. versionadded:: 2.14
+ """
+ return self._contents_add(contents, prepend=prepend)
+
+ def __getattr__(self, name):
+ return getattr(self.obj, name)
+
+ @augment_override_cpp
+ def __setattr__(self, name, value):
+ if hasattr(self.__class__, name):
+ object.__setattr__(self, name, value)
+ else:
+ setattr(self.obj, name, value)
+
+ @augment_override_cpp
+ def __delattr__(self, name):
+ if hasattr(self.__class__, name):
+ object.__delattr__(self, name)
+ else:
+ delattr(self.obj, name)
+
+ def __getitem__(self, key):
+ return self.obj[key]
+
+ def __setitem__(self, key, value):
+ self.obj[key] = value
+
+ def __delitem__(self, key):
+ del self.obj[key]
+
+ def __contains__(self, key):
+ return key in self.obj
+
+ def get(self, key, default=None):
+ try:
+ return self[key]
+ except KeyError:
+ return default
+
+ def emplace(self, other: Page, retain=(Name.Parent,)):
+ return self.obj.emplace(other.obj, retain=retain)
+
+ def __repr__(self):
+ return (
+ repr(self.obj)
+ .replace('Dictionary', 'Page', 1)
+ .replace('(Type="/Page")', '', 1)
+ )
+
+ def _repr_mimebundle_(self, include=None, exclude=None):
+ data = {}
+ bundle = {'application/pdf', 'image/png'}
+ if include:
+ bundle = {k for k in bundle if k in include}
+ if exclude:
+ bundle = {k for k in bundle if k not in exclude}
+ pagedata = _single_page_pdf(self.obj)
+ if 'application/pdf' in bundle:
+ data['application/pdf'] = pagedata
+ if 'image/png' in bundle:
+ try:
+ data['image/png'] = _mudraw(pagedata, 'png')
+ except (FileNotFoundError, RuntimeError):
+ pass
+ return data
+
+
+@augments(Token)
+class Extend_Token:
+ def __repr__(self):
+ return f'pikepdf.Token({self.type_}, {self.raw_value})'
+
+
+@augments(Rectangle)
+class Extend_Rectangle:
+ def __repr__(self):
+ return f'pikepdf.Rectangle({self.llx}, {self.lly}, {self.urx}, {self.ury})'
+
+ def __hash__(self):
+ return hash((self.llx, self.lly, self.urx, self.ury))
+
+
+@augments(Attachments)
+class Extend_Attachments(MutableMapping):
+ def __getitem__(self, k: str) -> AttachedFileSpec:
+ filespec = self._get_filespec(k)
+ if filespec is None:
+ raise KeyError(k)
+ return filespec
+
+ def __setitem__(self, k: str, v: AttachedFileSpec) -> None:
+ if not v.filename:
+ v.filename = k
+ return self._add_replace_filespec(k, v)
+
+ def __delitem__(self, k: str) -> None:
+ return self._remove_filespec(k)
+
+ def __len__(self):
+ return len(self._get_all_filespecs())
+
+ def __iter__(self) -> Iterator[str]:
+ yield from self._get_all_filespecs()
+
+ def __repr__(self):
+ return f"<pikepdf._qpdf.Attachments with {len(self)} attached files>"
+
+
+@augments(AttachedFileSpec)
+class Extend_AttachedFileSpec:
+ @staticmethod
+ def from_filepath(pdf: Pdf, path: Path | str, *, description: str = ''):
+ """Construct a file specification from a file path.
+
+ This function will automatically add a creation and modified date
+ using the file system, and a MIME type inferred from the file's extension.
+
+ If the data required for the attach is in memory, use
+ :meth:`pikepdf.AttachedFileSpec` instead.
+
+ Args:
+ pdf: The Pdf to attach this file specification to.
+ path: A file path for the file to attach to this Pdf.
+ description: An optional description. May be shown to the user in
+ PDF viewers.
+ """
+ mime, _ = mimetypes.guess_type(str(path))
+ if mime is None:
+ mime = ''
+ if not isinstance(path, Path):
+ path = Path(path)
+
+ stat = path.stat()
+ return AttachedFileSpec(
+ pdf,
+ path.read_bytes(),
+ description=description,
+ filename=str(path.name),
+ mime_type=mime,
+ creation_date=encode_pdf_date(
+ datetime.datetime.fromtimestamp(stat.st_ctime)
+ ),
+ mod_date=encode_pdf_date(datetime.datetime.fromtimestamp(stat.st_mtime)),
+ )
+
+ def __repr__(self):
+ if self.filename:
+ return (
+ f"<pikepdf._qpdf.AttachedFileSpec for {self.filename!r}, "
+ f"description {self.description!r}>"
+ )
+ return f"<pikepdf._qpdf.AttachedFileSpec description {self.description!r}>"
+
+
+@augments(AttachedFile)
+class Extend_AttachedFile:
+ @property
+ def creation_date(self) -> datetime.datetime | None:
+ if not self._creation_date:
+ return None
+ return decode_pdf_date(self._creation_date)
+
+ @creation_date.setter
+ def creation_date(self, value: datetime.datetime):
+ self._creation_date = encode_pdf_date(value)
+
+ @property
+ def mod_date(self) -> datetime.datetime | None:
+ if not self._mod_date:
+ return None
+ return decode_pdf_date(self._mod_date)
+
+ @mod_date.setter
+ def mod_date(self, value: datetime.datetime):
+ self._mod_date = encode_pdf_date(value)
+
+ def read_bytes(self) -> bytes:
+ return self.obj.read_bytes()
+
+ def __repr__(self):
+ return (
+ f'<pikepdf._qpdf.AttachedFile objid={self.obj.objgen} size={self.size} '
+ f'mime_type={self.mime_type} creation_date={self.creation_date} '
+ f'mod_date={self.mod_date}>'
+ )
+
+
+@augments(NameTree)
+class Extend_NameTree:
+ def keys(self):
+ return KeysView(self._as_map())
+
+ def values(self):
+ return ValuesView(self._as_map())
+
+ def items(self):
+ return ItemsView(self._as_map())
+
+ get = MutableMapping.get
+ pop = MutableMapping.pop
+ popitem = MutableMapping.popitem
+ clear = MutableMapping.clear
+ update = MutableMapping.update
+ setdefault = MutableMapping.setdefault
+
+
+MutableMapping.register(NameTree)
+
+
+@augments(NumberTree)
+class Extend_NumberTree:
+ def keys(self):
+ return KeysView(self._as_map())
+
+ def values(self):
+ return ValuesView(self._as_map())
+
+ def items(self):
+ return ItemsView(self._as_map())
+
+ get = MutableMapping.get
+ pop = MutableMapping.pop
+ popitem = MutableMapping.popitem
+ clear = MutableMapping.clear
+ update = MutableMapping.update
+ setdefault = MutableMapping.setdefault
+
+
+MutableMapping.register(NumberTree)