aboutsummaryrefslogtreecommitdiffstats
path: root/env/lib/python3.10/site-packages/pikepdf/_methods.py
diff options
context:
space:
mode:
Diffstat (limited to 'env/lib/python3.10/site-packages/pikepdf/_methods.py')
-rw-r--r--env/lib/python3.10/site-packages/pikepdf/_methods.py1340
1 files changed, 0 insertions, 1340 deletions
diff --git a/env/lib/python3.10/site-packages/pikepdf/_methods.py b/env/lib/python3.10/site-packages/pikepdf/_methods.py
deleted file mode 100644
index 25e1d95..0000000
--- a/env/lib/python3.10/site-packages/pikepdf/_methods.py
+++ /dev/null
@@ -1,1340 +0,0 @@
-# SPDX-FileCopyrightText: 2022 James R. Barlow
-# SPDX-License-Identifier: MPL-2.0
-
-"""Implement some features in Python and monkey-patch them onto C++ classes.
-
-In several cases the implementation of some higher levels features might as
-well be in Python. Fortunately we can attach Python methods to C++ class
-bindings after the fact.
-
-We can also move the implementation to C++ if desired.
-"""
-
-from __future__ import annotations
-
-import datetime
-import mimetypes
-import shutil
-from collections.abc import KeysView, MutableMapping
-from decimal import Decimal
-from io import BytesIO
-from pathlib import Path
-from subprocess import run
-from tempfile import NamedTemporaryFile
-from typing import BinaryIO, Callable, ItemsView, Iterator, TypeVar, ValuesView
-from warnings import warn
-
-from . import Array, Dictionary, Name, Object, Page, Pdf, Stream
-from ._augments import augment_override_cpp, augments
-from ._qpdf import (
- AccessMode,
- AttachedFile,
- AttachedFileSpec,
- Attachments,
- NameTree,
- NumberTree,
- ObjectStreamMode,
- Rectangle,
- StreamDecodeLevel,
- StreamParser,
- Token,
- _ObjectMapping,
-)
-from .models import Encryption, EncryptionInfo, Outline, PdfMetadata, Permissions
-from .models.metadata import decode_pdf_date, encode_pdf_date
-
-# pylint: disable=no-member,unsupported-membership-test,unsubscriptable-object
-# mypy: ignore-errors
-
-__all__ = []
-
-Numeric = TypeVar('Numeric', int, float, Decimal)
-
-
-def _single_page_pdf(page) -> bytes:
- """Construct a single page PDF from the provided page in memory."""
- pdf = Pdf.new()
- pdf.pages.append(page)
- bio = BytesIO()
- pdf.save(bio)
- bio.seek(0)
- return bio.read()
-
-
-def _mudraw(buffer, fmt) -> bytes:
- """Use mupdf draw to rasterize the PDF in the memory buffer."""
- # mudraw cannot read from stdin so NamedTemporaryFile is required
- with NamedTemporaryFile(suffix='.pdf') as tmp_in:
- tmp_in.write(buffer)
- tmp_in.seek(0)
- tmp_in.flush()
-
- proc = run(
- ['mudraw', '-F', fmt, '-o', '-', tmp_in.name],
- capture_output=True,
- check=True,
- )
- return proc.stdout
-
-
-@augments(Object)
-class Extend_Object:
- def _ipython_key_completions_(self):
- if isinstance(self, (Dictionary, Stream)):
- return self.keys()
- return None
-
- def emplace(self, other: Object, retain=(Name.Parent,)):
- """Copy all items from other without making a new object.
-
- Particularly when working with pages, it may be desirable to remove all
- of the existing page's contents and emplace (insert) a new page on top
- of it, in a way that preserves all links and references to the original
- page. (Or similarly, for other Dictionary objects in a PDF.)
-
- Any Dictionary keys in the iterable *retain* are preserved. By default,
- /Parent is retained.
-
- When a page is assigned (``pdf.pages[0] = new_page``), only the
- application knows if references to the original the original page are
- still valid. For example, a PDF optimizer might restructure a page
- object into another visually similar one, and references would be valid;
- but for a program that reorganizes page contents such as a N-up
- compositor, references may not be valid anymore.
-
- This method takes precautions to ensure that child objects in common
- with ``self`` and ``other`` are not inadvertently deleted.
-
- Example:
- >>> pdf.pages[0].objgen
- (16, 0)
- >>> pdf.pages[0].emplace(pdf.pages[1])
- >>> pdf.pages[0].objgen
- (16, 0) # Same object
-
- .. versionchanged:: 2.11.1
- Added the *retain* argument.
- """
- if not self.same_owner_as(other):
- raise TypeError("Objects must have the same owner for emplace()")
-
- # .keys() returns strings, so make all strings
- retain = {str(k) for k in retain}
- self_keys = set(self.keys())
- other_keys = set(other.keys())
-
- assert all(isinstance(k, str) for k in (retain | self_keys | other_keys))
-
- del_keys = self_keys - other_keys - retain
- for k in (k for k in other_keys if k not in retain):
- self[k] = other[k] # pylint: disable=unsupported-assignment-operation
- for k in del_keys:
- del self[k] # pylint: disable=unsupported-delete-operation
-
- def _type_check_write(self, filter_, decode_parms):
- if isinstance(filter_, list):
- filter_ = Array(filter_)
- filter_ = filter_.wrap_in_array()
-
- if isinstance(decode_parms, list):
- decode_parms = Array(decode_parms)
- elif decode_parms is None:
- decode_parms = Array([])
- else:
- decode_parms = decode_parms.wrap_in_array()
-
- if not all(isinstance(item, Name) for item in filter_):
- raise TypeError(
- "filter must be: pikepdf.Name or pikepdf.Array([pikepdf.Name])"
- )
- if not all(
- (isinstance(item, Dictionary) or item is None) for item in decode_parms
- ):
- raise TypeError(
- "decode_parms must be: pikepdf.Dictionary or "
- "pikepdf.Array([pikepdf.Dictionary])"
- )
- if len(decode_parms) != 0 and len(filter_) != len(decode_parms):
- raise ValueError(
- f"filter ({repr(filter_)}) and decode_parms "
- f"({repr(decode_parms)}) must be arrays of same length"
- )
- if len(filter_) == 1:
- filter_ = filter_[0]
- if len(decode_parms) == 0:
- decode_parms = None
- elif len(decode_parms) == 1:
- decode_parms = decode_parms[0]
- return filter_, decode_parms
-
- def write(
- self,
- data: bytes,
- *,
- filter: Name | Array | None = None,
- decode_parms: Dictionary | Array | None = None,
- type_check: bool = True,
- ): # pylint: disable=redefined-builtin
- """
- Replace stream object's data with new (possibly compressed) `data`.
-
- `filter` and `decode_parms` describe any compression that is already
- present on the input `data`. For example, if your data is already
- compressed with the Deflate algorithm, you would set
- ``filter=Name.FlateDecode``.
-
- When writing the PDF in :meth:`pikepdf.Pdf.save`,
- pikepdf may change the compression or apply compression to data that was
- not compressed, depending on the parameters given to that function. It
- will never change lossless to lossy encoding.
-
- PNG and TIFF images, even if compressed, cannot be directly inserted
- into a PDF and displayed as images.
-
- Args:
- data: the new data to use for replacement
- filter: The filter(s) with which the
- data is (already) encoded
- decode_parms: Parameters for the
- filters with which the object is encode
- type_check: Check arguments; use False only if you want to
- intentionally create malformed PDFs.
-
- If only one `filter` is specified, it may be a name such as
- `Name('/FlateDecode')`. If there are multiple filters, then array
- of names should be given.
-
- If there is only one filter, `decode_parms` is a Dictionary of
- parameters for that filter. If there are multiple filters, then
- `decode_parms` is an Array of Dictionary, where each array index
- is corresponds to the filter.
- """
- if type_check and filter is not None:
- filter, decode_parms = self._type_check_write(filter, decode_parms)
-
- self._write(data, filter=filter, decode_parms=decode_parms)
-
-
-@augments(Pdf)
-class Extend_Pdf:
- def _repr_mimebundle_(
- self, include=None, exclude=None
- ): # pylint: disable=unused-argument
- """
- Present options to IPython or Jupyter for rich display of this object.
-
- See https://ipython.readthedocs.io/en/stable/config/integrating.html#rich-display
- """
- bio = BytesIO()
- self.save(bio)
- bio.seek(0)
-
- data = {'application/pdf': bio.read()}
- return data
-
- @property
- def docinfo(self) -> Dictionary:
- """
- Access the (deprecated) document information dictionary.
-
- The document information dictionary is a brief metadata record that can
- store some information about the origin of a PDF. It is deprecated and
- removed in the PDF 2.0 specification (not deprecated from the
- perspective of pikepdf). Use the ``.open_metadata()`` API instead, which
- will edit the modern (and unfortunately, more complicated) XMP metadata
- object and synchronize changes to the document information dictionary.
-
- This property simplifies access to the actual document information
- dictionary and ensures that it is created correctly if it needs to be
- created.
-
- A new, empty dictionary will be created if this property is accessed
- and dictionary does not exist. (This is to ensure that convenient code
- like ``pdf.docinfo[Name.Title] = "Title"`` will work when the dictionary
- does not exist at all.)
-
- You can delete the document information dictionary by deleting this property,
- ``del pdf.docinfo``. Note that accessing the property after deleting it
- will re-create with a new, empty dictionary.
-
- .. versionchanged: 2.4
- Added support for ``del pdf.docinfo``.
- """
- if Name.Info not in self.trailer:
- self.trailer.Info = self.make_indirect(Dictionary())
- return self.trailer.Info
-
- @docinfo.setter
- def docinfo(self, new_docinfo: Dictionary):
- if not new_docinfo.is_indirect:
- raise ValueError(
- "docinfo must be an indirect object - use Pdf.make_indirect"
- )
- self.trailer.Info = new_docinfo
-
- @docinfo.deleter
- def docinfo(self):
- if Name.Info in self.trailer:
- del self.trailer.Info
-
- def open_metadata(
- self,
- set_pikepdf_as_editor: bool = True,
- update_docinfo: bool = True,
- strict: bool = False,
- ) -> PdfMetadata:
- """
- Open the PDF's XMP metadata for editing.
-
- There is no ``.close()`` function on the metadata object, since this is
- intended to be used inside a ``with`` block only.
-
- For historical reasons, certain parts of PDF metadata are stored in
- two different locations and formats. This feature coordinates edits so
- that both types of metadata are updated consistently and "atomically"
- (assuming single threaded access). It operates on the ``Pdf`` in memory,
- not any file on disk. To persist metadata changes, you must still use
- ``Pdf.save()``.
-
- Example:
- >>> with pdf.open_metadata() as meta:
- meta['dc:title'] = 'Set the Dublic Core Title'
- meta['dc:description'] = 'Put the Abstract here'
-
- Args:
- set_pikepdf_as_editor: Automatically update the metadata ``pdf:Producer``
- to show that this version of pikepdf is the most recent software to
- modify the metadata, and ``xmp:MetadataDate`` to timestamp the update.
- Recommended, except for testing.
-
- update_docinfo: Update the standard fields of DocumentInfo
- (the old PDF metadata dictionary) to match the corresponding
- XMP fields. The mapping is described in
- :attr:`PdfMetadata.DOCINFO_MAPPING`. Nonstandard DocumentInfo
- fields and XMP metadata fields with no DocumentInfo equivalent
- are ignored.
-
- strict: If ``False`` (the default), we aggressively attempt
- to recover from any parse errors in XMP, and if that fails we
- overwrite the XMP with an empty XMP record. If ``True``, raise
- errors when either metadata bytes are not valid and well-formed
- XMP (and thus, XML). Some trivial cases that are equivalent to
- empty or incomplete "XMP skeletons" are never treated as errors,
- and always replaced with a proper empty XMP block. Certain
- errors may be logged.
- """
- return PdfMetadata(
- self,
- pikepdf_mark=set_pikepdf_as_editor,
- sync_docinfo=update_docinfo,
- overwrite_invalid_xml=not strict,
- )
-
- def open_outline(self, max_depth: int = 15, strict: bool = False) -> Outline:
- """
- Open the PDF outline ("bookmarks") for editing.
-
- Recommend for use in a ``with`` block. Changes are committed to the
- PDF when the block exits. (The ``Pdf`` must still be opened.)
-
- Example:
- >>> with pdf.open_outline() as outline:
- outline.root.insert(0, OutlineItem('Intro', 0))
-
- Args:
- max_depth: Maximum recursion depth of the outline to be
- imported and re-written to the document. ``0`` means only
- considering the root level, ``1`` the first-level
- sub-outline of each root element, and so on. Items beyond
- this depth will be silently ignored. Default is ``15``.
- strict: With the default behavior (set to ``False``),
- structural errors (e.g. reference loops) in the PDF document
- will only cancel processing further nodes on that particular
- level, recovering the valid parts of the document outline
- without raising an exception. When set to ``True``, any such
- error will raise an ``OutlineStructureError``, leaving the
- invalid parts in place.
- Similarly, outline objects that have been accidentally
- duplicated in the ``Outline`` container will be silently
- fixed (i.e. reproduced as new objects) or raise an
- ``OutlineStructureError``.
- """
- return Outline(self, max_depth=max_depth, strict=strict)
-
- def make_stream(self, data: bytes, d=None, **kwargs) -> Stream:
- """
- Create a new pikepdf.Stream object that is attached to this PDF.
-
- See:
- :meth:`pikepdf.Stream.__new__`
-
- """
- return Stream(self, data, d, **kwargs)
-
- def add_blank_page(
- self, *, page_size: tuple[Numeric, Numeric] = (612.0, 792.0)
- ) -> Page:
- """
- Add a blank page to this PDF.
-
- If pages already exist, the page will be added to the end. Pages may be
- reordered using ``Pdf.pages``.
-
- The caller may add content to the page by modifying its objects after creating
- it.
-
- Args:
- page_size (tuple): The size of the page in PDF units (1/72 inch or 0.35mm).
- Default size is set to a US Letter 8.5" x 11" page.
- """
- for dim in page_size:
- if not (3 <= dim <= 14400):
- raise ValueError('Page size must be between 3 and 14400 PDF units')
-
- page_dict = Dictionary(
- Type=Name.Page,
- MediaBox=Array([0, 0, page_size[0], page_size[1]]),
- Contents=self.make_stream(b''),
- Resources=Dictionary(),
- )
- page_obj = self.make_indirect(page_dict)
- self._add_page(page_obj, first=False)
- return Page(page_obj)
-
- def close(self) -> None:
- """
- Close a ``Pdf`` object and release resources acquired by pikepdf.
-
- If pikepdf opened the file handle it will close it (e.g. when opened with a file
- path). If the caller opened the file for pikepdf, the caller close the file.
- ``with`` blocks will call close when exit.
-
- pikepdf lazily loads data from PDFs, so some :class:`pikepdf.Object` may
- implicitly depend on the :class:`pikepdf.Pdf` being open. This is always the
- case for :class:`pikepdf.Stream` but can be true for any object. Do not close
- the `Pdf` object if you might still be accessing content from it.
-
- When an ``Object`` is copied from one ``Pdf`` to another, the ``Object`` is copied into
- the destination ``Pdf`` immediately, so after accessing all desired information
- from the source ``Pdf`` it may be closed.
-
- .. versionchanged:: 3.0
- In pikepdf 2.x, this function actually worked by resetting to a very short
- empty PDF. Code that relied on this quirk may not function correctly.
- """
- self._close()
- if getattr(self, '_tmp_stream', None):
- self._tmp_stream.close()
-
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
-
- @property
- def allow(self) -> Permissions:
- """
- Report permissions associated with this PDF.
-
- By default these permissions will be replicated when the PDF is
- saved. Permissions may also only be changed when a PDF is being saved,
- and are only available for encrypted PDFs. If a PDF is not encrypted,
- all operations are reported as allowed.
-
- pikepdf has no way of enforcing permissions.
- """
- results = {}
- for field in Permissions._fields:
- results[field] = getattr(self, '_allow_' + field)
- return Permissions(**results)
-
- @property
- def encryption(self) -> EncryptionInfo:
- """
- Report encryption information for this PDF.
-
- Encryption settings may only be changed when a PDF is saved.
- """
- return EncryptionInfo(self._encryption_data)
-
- def check(self) -> list[str]:
- """
- Check if PDF is well-formed.
-
- Similar to ``qpdf --check``.
- """
-
- class DiscardingParser(StreamParser):
- def __init__(self): # pylint: disable=useless-super-delegation
- super().__init__() # required for C++
-
- def handle_object(self, *_args):
- pass
-
- def handle_eof(self):
- pass
-
- problems: list[str] = []
-
- self._decode_all_streams_and_discard()
-
- discarding_parser = DiscardingParser()
- for page in self.pages:
- page.parse_contents(discarding_parser)
-
- for warning in self.get_warnings():
- problems.append("WARNING: " + warning)
-
- return problems
-
- def save(
- self,
- filename_or_stream: Path | str | BinaryIO | None = None,
- *,
- static_id: bool = False,
- preserve_pdfa: bool = True,
- min_version: str | tuple[str, int] = "",
- force_version: str | tuple[str, int] = "",
- fix_metadata_version: bool = True,
- compress_streams: bool = True,
- stream_decode_level: StreamDecodeLevel | None = None,
- object_stream_mode: ObjectStreamMode = ObjectStreamMode.preserve,
- normalize_content: bool = False,
- linearize: bool = False,
- qdf: bool = False,
- progress: Callable[[int], None] = None,
- encryption: Encryption | bool | None = None,
- recompress_flate: bool = False,
- deterministic_id: bool = False,
- ) -> None:
- """
- Save all modifications to this :class:`pikepdf.Pdf`.
-
- Args:
- filename_or_stream: Where to write the output. If a file
- exists in this location it will be overwritten.
- If the file was opened with ``allow_overwriting_input=True``,
- then it is permitted to overwrite the original file, and
- this parameter may be omitted to implicitly use the original
- filename. Otherwise, the filename may not be the same as the
- input file, as overwriting the input file would corrupt data
- since pikepdf using lazy loading.
-
- static_id: Indicates that the ``/ID`` metadata, normally
- calculated as a hash of certain PDF contents and metadata
- including the current time, should instead be set to a static
- value. Only use this for debugging and testing. Use
- ``deterministic_id`` if you want to get the same ``/ID`` for
- the same document contents.
- preserve_pdfa: Ensures that the file is generated in a
- manner compliant with PDF/A and other stricter variants.
- This should be True, the default, in most cases.
-
- min_version: Sets the minimum version of PDF
- specification that should be required. If left alone QPDF
- will decide. If a tuple, the second element is an integer, the
- extension level. If the version number is not a valid format,
- QPDF will decide what to do.
- force_version: Override the version recommend by QPDF,
- potentially creating an invalid file that does not display
- in old versions. See QPDF manual for details. If a tuple, the
- second element is an integer, the extension level.
- fix_metadata_version: If ``True`` (default) and the XMP metadata
- contains the optional PDF version field, ensure the version in
- metadata is correct. If the XMP metadata does not contain a PDF
- version field, none will be added. To ensure that the field is
- added, edit the metadata and insert a placeholder value in
- ``pdf:PDFVersion``. If XMP metadata does not exist, it will
- not be created regardless of the value of this argument.
-
- object_stream_mode:
- ``disable`` prevents the use of object streams.
- ``preserve`` keeps object streams from the input file.
- ``generate`` uses object streams wherever possible,
- creating the smallest files but requiring PDF 1.5+.
-
- compress_streams: Enables or disables the compression of
- stream objects in the PDF that are created without specifying
- any compression setting. Metadata is never compressed.
- By default this is set to ``True``, and should be except
- for debugging. Existing streams in the PDF or streams will not
- be modified. To decompress existing streams, you must set
- both ``compress_streams=False`` and ``stream_decode_level``
- to the desired decode level (e.g. ``.generalized`` will
- decompress most non-image content).
-
- stream_decode_level: Specifies how
- to encode stream objects. See documentation for
- :class:`pikepdf.StreamDecodeLevel`.
-
- recompress_flate: When disabled (the default), qpdf does not
- uncompress and recompress streams compressed with the Flate
- compression algorithm. If True, pikepdf will instruct qpdf to
- do this, which may be useful if recompressing streams to a
- higher compression level.
-
- normalize_content: Enables parsing and reformatting the
- content stream within PDFs. This may debugging PDFs easier.
-
- linearize: Enables creating linear or "fast web view",
- where the file's contents are organized sequentially so that
- a viewer can begin rendering before it has the whole file.
- As a drawback, it tends to make files larger.
-
- qdf: Save output QDF mode. QDF mode is a special output
- mode in QPDF to allow editing of PDFs in a text editor. Use
- the program ``fix-qdf`` to fix convert back to a standard
- PDF.
-
- progress: Specify a callback function that is called
- as the PDF is written. The function will be called with an
- integer between 0-100 as the sole parameter, the progress
- percentage. This function may not access or modify the PDF
- while it is being written, or data corruption will almost
- certainly occur.
-
- encryption: If ``False``
- or omitted, existing encryption will be removed. If ``True``
- encryption settings are copied from the originating PDF.
- Alternately, an ``Encryption`` object may be provided that
- sets the parameters for new encryption.
-
- deterministic_id: Indicates that the ``/ID`` metadata, normally
- calculated as a hash of certain PDF contents and metadata
- including the current time, should instead be computed using
- only deterministic data like the file contents. At a small
- runtime cost, this enables generation of the same ``/ID`` if
- the same inputs are converted in the same way multiple times.
- Does not work for encrypted files.
-
- Raises:
- PdfError
- ForeignObjectError
- ValueError
-
- You may call ``.save()`` multiple times with different parameters
- to generate different versions of a file, and you *may* continue
- to modify the file after saving it. ``.save()`` does not modify
- the ``Pdf`` object in memory, except possibly by updating the XMP
- metadata version with ``fix_metadata_version``.
-
- .. note::
-
- :meth:`pikepdf.Pdf.remove_unreferenced_resources` before saving
- may eliminate unnecessary resources from the output file if there
- are any objects (such as images) that are referenced in a page's
- Resources dictionary but never called in the page's content stream.
-
- .. note::
-
- pikepdf can read PDFs with incremental updates, but always
- coalesces any incremental updates into a single non-incremental
- PDF file when saving.
-
- .. versionchanged:: 2.7
- Added *recompress_flate*.
-
- .. versionchanged:: 3.0
- Keyword arguments now mandatory for everything except the first
- argument.
- """
- if not filename_or_stream and getattr(self, '_original_filename', None):
- filename_or_stream = self._original_filename
- if not filename_or_stream:
- raise ValueError(
- "Cannot save to original filename because the original file was "
- "not opening using Pdf.open(..., allow_overwriting_input=True). "
- "Either specify a new destination filename/file stream or open "
- "with allow_overwriting_input=True. If this Pdf was created using "
- "Pdf.new(), you must specify a destination object since there is "
- "no original filename to save to."
- )
- self._save(
- filename_or_stream,
- static_id=static_id,
- preserve_pdfa=preserve_pdfa,
- min_version=min_version,
- force_version=force_version,
- fix_metadata_version=fix_metadata_version,
- compress_streams=compress_streams,
- stream_decode_level=stream_decode_level,
- object_stream_mode=object_stream_mode,
- normalize_content=normalize_content,
- linearize=linearize,
- qdf=qdf,
- progress=progress,
- encryption=encryption,
- samefile_check=getattr(self, '_tmp_stream', None) is None,
- recompress_flate=recompress_flate,
- deterministic_id=deterministic_id,
- )
-
- @staticmethod
- def open(
- filename_or_stream: Path | str | BinaryIO,
- *,
- password: str | bytes = "",
- hex_password: bool = False,
- ignore_xref_streams: bool = False,
- suppress_warnings: bool = True,
- attempt_recovery: bool = True,
- inherit_page_attributes: bool = True,
- access_mode: AccessMode = AccessMode.default,
- allow_overwriting_input: bool = False,
- ) -> Pdf:
- """
- Open an existing file at *filename_or_stream*.
-
- If *filename_or_stream* is path-like, the file will be opened for reading.
- The file should not be modified by another process while it is open in
- pikepdf, or undefined behavior may occur. This is because the file may be
- lazily loaded. Despite this restriction, pikepdf does not try to use any OS
- services to obtain an exclusive lock on the file. Some applications may
- want to attempt this or copy the file to a temporary location before
- editing. This behaviour changes if *allow_overwriting_input* is set: the whole
- file is then read and copied to memory, so that pikepdf can overwrite it
- when calling ``.save()``.
-
- When this function is called with a stream-like object, you must ensure
- that the data it returns cannot be modified, or undefined behavior will
- occur.
-
- Any changes to the file must be persisted by using ``.save()``.
-
- If *filename_or_stream* has ``.read()`` and ``.seek()`` methods, the file
- will be accessed as a readable binary stream. pikepdf will read the
- entire stream into a private buffer.
-
- ``.open()`` may be used in a ``with``-block; ``.close()`` will be called when
- the block exits, if applicable.
-
- Whenever pikepdf opens a file, it will close it. If you open the file
- for pikepdf or give it a stream-like object to read from, you must
- release that object when appropriate.
-
- Examples:
- >>> with Pdf.open("test.pdf") as pdf:
- ...
-
- >>> pdf = Pdf.open("test.pdf", password="rosebud")
-
- Args:
- filename_or_stream: Filename or Python readable and seekable file
- stream of PDF to open.
- password: User or owner password to open an
- encrypted PDF. If the type of this parameter is ``str``
- it will be encoded as UTF-8. If the type is ``bytes`` it will
- be saved verbatim. Passwords are always padded or
- truncated to 32 bytes internally. Use ASCII passwords for
- maximum compatibility.
- hex_password: If True, interpret the password as a
- hex-encoded version of the exact encryption key to use, without
- performing the normal key computation. Useful in forensics.
- ignore_xref_streams: If True, ignore cross-reference
- streams. See qpdf documentation.
- suppress_warnings: If True (default), warnings are not
- printed to stderr. Use :meth:`pikepdf.Pdf.get_warnings()` to
- retrieve warnings.
- attempt_recovery: If True (default), attempt to recover
- from PDF parsing errors.
- inherit_page_attributes: If True (default), push attributes
- set on a group of pages to individual pages
- access_mode: If ``.default``, pikepdf will
- decide how to access the file. Currently, it will always
- selected stream access. To attempt memory mapping and fallback
- to stream if memory mapping failed, use ``.mmap``. Use
- ``.mmap_only`` to require memory mapping or fail
- (this is expected to only be useful for testing). Applications
- should be prepared to handle the SIGBUS signal on POSIX in
- the event that the file is successfully mapped but later goes
- away.
- allow_overwriting_input: If True, allows calling ``.save()``
- to overwrite the input file. This is performed by loading the
- entire input file into memory at open time; this will use more
- memory and may recent performance especially when the opened
- file will not be modified.
-
- Raises:
- pikepdf.PasswordError: If the password failed to open the
- file.
- pikepdf.PdfError: If for other reasons we could not open
- the file.
- TypeError: If the type of ``filename_or_stream`` is not
- usable.
- FileNotFoundError: If the file was not found.
-
- Note:
- When *filename_or_stream* is a stream and the stream is located on a
- network, pikepdf assumes that the stream using buffering and read caches
- to achieve reasonable performance. Streams that fetch data over a network
- in response to every read or seek request, no matter how small, will
- perform poorly. It may be easier to download a PDF from network to
- temporary local storage (such as ``io.BytesIO``), manipulate it, and
- then re-upload it.
-
- .. versionchanged:: 3.0
- Keyword arguments now mandatory for everything except the first
- argument.
- """
- if isinstance(filename_or_stream, bytes) and filename_or_stream.startswith(
- b'%PDF-'
- ):
- warn(
- "It looks like you called with Pdf.open(data) with a bytes-like object "
- "containing a PDF. This will probably fail because this function "
- "expects a filename or opened file-like object. Instead, please use "
- "Pdf.open(BytesIO(data))."
- )
-
- tmp_stream, original_filename = None, False
- if allow_overwriting_input:
- try:
- Path(filename_or_stream)
- except TypeError as error:
- raise ValueError(
- '"allow_overwriting_input=True" requires "open" first argument '
- 'to be a file path'
- ) from error
- original_filename = Path(filename_or_stream)
- with open(original_filename, 'rb') as pdf_file:
- tmp_stream = BytesIO()
- shutil.copyfileobj(pdf_file, tmp_stream)
- pdf = Pdf._open(
- tmp_stream or filename_or_stream,
- password=password,
- hex_password=hex_password,
- ignore_xref_streams=ignore_xref_streams,
- suppress_warnings=suppress_warnings,
- attempt_recovery=attempt_recovery,
- inherit_page_attributes=inherit_page_attributes,
- access_mode=access_mode,
- )
- pdf._tmp_stream = tmp_stream
- pdf._original_filename = original_filename
- return pdf
-
-
-@augments(_ObjectMapping)
-class Extend_ObjectMapping:
- def get(self, key, default=None) -> Object:
- try:
- return self[key]
- except KeyError:
- return default
-
-
-def check_is_box(obj) -> None:
- try:
- if obj.is_rectangle:
- return
- except AttributeError:
- pass
-
- try:
- pdfobj = Array(obj)
- if pdfobj.is_rectangle:
- return
- except Exception as e:
- raise ValueError("object is not a rectangle") from e
-
- raise ValueError("object is not a rectangle")
-
-
-@augments(Page)
-class Extend_Page:
- @property
- def mediabox(self):
- """Return page's /MediaBox, in PDF units."""
- return self._get_mediabox(True)
-
- @mediabox.setter
- def mediabox(self, value):
- check_is_box(value)
- self.obj['/MediaBox'] = value
-
- @property
- def cropbox(self):
- """Return page's effective /CropBox, in PDF units.
-
- If the /CropBox is not defined, the /MediaBox is returned.
- """
- return self._get_cropbox(True, False)
-
- @cropbox.setter
- def cropbox(self, value):
- check_is_box(value)
- self.obj['/CropBox'] = value
-
- @property
- def trimbox(self):
- """Return page's effective /TrimBox, in PDF units.
-
- If the /TrimBox is not defined, the /CropBox is returned (and if
- /CropBox is not defined, /MediaBox is returned).
- """
- return self._get_trimbox(True, False)
-
- @trimbox.setter
- def trimbox(self, value):
- check_is_box(value)
- self.obj['/TrimBox'] = value
-
- @property
- def images(self) -> _ObjectMapping:
- """Return all regular images associated with this page.
-
- This method does not recurse into Form XObjects and does not
- attempt to find inline images.
- """
- return self._images
-
- @property
- def resources(self) -> Dictionary:
- """Return this page's resources dictionary."""
- return self.obj['/Resources']
-
- def add_resource(
- self,
- res: Object,
- res_type: Name,
- name: Name | None = None,
- *,
- prefix: str = '',
- replace_existing: bool = True,
- ) -> Name:
- """Add a new resource to the page's Resources dictionary.
-
- If the Resources dictionaries do not exist, they will be created.
-
- Args:
- self: The object to add to the resources dictionary.
- res: The dictionary object to insert into the resources
- dictionary.
- res_type: Should be one of the following Resource dictionary types:
- ExtGState, ColorSpace, Pattern, Shading, XObject, Font, Properties.
- name: The name of the object. If omitted, a random name will be
- generated with enough randomness to be globally unique.
- prefix: A prefix for the name of the object. Allows conveniently
- namespacing when using random names, e.g. prefix="Im" for images.
- Mutually exclusive with name parameter.
- replace_existing: If the name already exists in one of the resource
- dictionaries, remove it.
-
- Example:
- >>> resource_name = pdf.pages[0].add_resource(formxobj, Name.XObject)
-
- .. versionadded:: 2.3
-
- .. versionchanged:: 2.14
- If *res* does not belong to the same `Pdf` that owns this page,
- a copy of *res* is automatically created and added instead. In previous
- versions, it was necessary to change for this case manually.
-
- .. versionchanged:: 4.3.0
- Returns the name of the overlay in the resources dictionary instead
- of returning None.
- """
- if Name.Resources not in self.obj:
- self.obj.Resources = Dictionary()
- elif not isinstance(self.obj.Resources, Dictionary):
- raise TypeError("Page /Resources exists but is not a dictionary")
- resources = self.obj.Resources
-
- if res_type not in resources:
- resources[res_type] = Dictionary()
-
- if name is not None and prefix:
- raise ValueError("Must specify one of name= or prefix=")
- if name is None:
- name = Name.random(prefix=prefix)
-
- for res_dict in resources.as_dict().values():
- if not isinstance(res_dict, Dictionary):
- continue
- if name in res_dict:
- if replace_existing:
- del res_dict[name]
- else:
- raise ValueError(f"Name {name} already exists in page /Resources")
-
- resources[res_type][name] = res.with_same_owner_as(self.obj)
- return name
-
- def _over_underlay(
- self,
- other,
- rect: Rectangle | None,
- under: bool,
- push_stack: bool,
- shrink: bool,
- expand: bool,
- ) -> Name:
- formx = None
- if isinstance(other, Page):
- formx = other.as_form_xobject()
- elif isinstance(other, Dictionary) and other.get(Name.Type) == Name.Page:
- formx = Page(other).as_form_xobject()
- elif (
- isinstance(other, Stream)
- and other.get(Name.Type) == Name.XObject
- and other.get(Name.Subtype) == Name.Form
- ):
- formx = other
-
- if formx is None:
- raise TypeError(
- "other object is not something we can convert to Form XObject"
- )
-
- if rect is None:
- rect = Rectangle(self.trimbox)
-
- formx_placed_name = self.add_resource(formx, Name.XObject)
- cs = self.calc_form_xobject_placement(
- formx, formx_placed_name, rect, allow_shrink=shrink, allow_expand=expand
- )
-
- if push_stack:
- self.contents_add(b'q\n', prepend=True) # prepend q
- self.contents_add(b'Q\n', prepend=False) # i.e. append Q
-
- self.contents_add(cs, prepend=under)
- self.contents_coalesce()
- return formx_placed_name
-
- def add_overlay(
- self,
- other: Object | Page,
- rect: Rectangle | None = None,
- *,
- push_stack: bool = True,
- shrink: bool = True,
- expand: bool = True,
- ) -> Name:
- """Overlay another object on this page.
-
- Overlays will be drawn after all previous content, potentially drawing on top
- of existing content.
-
- Args:
- other: A Page or Form XObject to render as an overlay on top of this
- page.
- rect: The PDF rectangle (in PDF units) in which to draw the overlay.
- If omitted, this page's trimbox, cropbox or mediabox (in that order)
- will be used.
- push_stack: If True (default), push the graphics stack of the existing
- content stream to ensure that the overlay is rendered correctly.
- Officially PDF limits the graphics stack depth to 32. Most
- viewers will tolerate more, but excessive pushes may cause problems.
- Multiple content streams may also be coalesced into a single content
- stream where this parameter is True, since the PDF specification
- permits PDF writers to coalesce streams as they see fit.
- shrink: If True (default), allow the object to shrink to fit inside the
- rectangle. The aspect ratio will be preserved.
- expand: If True (default), allow the object to expand to fit inside the
- rectangle. The aspect ratio will be preserved.
-
- Returns:
- The name of the Form XObject that contains the overlay.
-
- .. versionadded:: 2.14
-
- .. versionchanged:: 4.0.0
- Added the *push_stack* parameter. Previously, this method behaved
- as if *push_stack* were False.
-
- .. versionchanged:: 4.2.0
- Added the *shrink* and *expand* parameters. Previously, this method
- behaved as if ``shrink=True, expand=False``.
-
- .. versionchanged:: 4.3.0
- Returns the name of the overlay in the resources dictionary instead
- of returning None.
- """
- return self._over_underlay(
- other,
- rect,
- under=False,
- push_stack=push_stack,
- expand=expand,
- shrink=shrink,
- )
-
- def add_underlay(
- self,
- other: Object | Page,
- rect: Rectangle | None = None,
- *,
- shrink: bool = True,
- expand: bool = True,
- ) -> Name:
- """Underlay another object beneath this page.
-
- Underlays will be drawn before all other content, so they may be overdrawn
- partially or completely.
-
- There is no *push_stack* parameter for this function, since adding an
- underlay can be done without manipulating the graphics stack.
-
- Args:
- other: A Page or Form XObject to render as an underlay underneath this
- page.
- rect: The PDF rectangle (in PDF units) in which to draw the underlay.
- If omitted, this page's trimbox, cropbox or mediabox (in that order)
- will be used.
- shrink: If True (default), allow the object to shrink to fit inside the
- rectangle. The aspect ratio will be preserved.
- expand: If True (default), allow the object to expand to fit inside the
- rectangle. The aspect ratio will be preserved.
-
- Returns:
- The name of the Form XObject that contains the underlay.
-
- .. versionadded:: 2.14
-
- .. versionchanged:: 4.2.0
- Added the *shrink* and *expand* parameters. Previously, this method
- behaved as if ``shrink=True, expand=False``. Fixed issue with wrong
- page rect being selected.
- """
- return self._over_underlay(
- other, rect, under=True, push_stack=False, expand=expand, shrink=shrink
- )
-
- def contents_add(self, contents: Stream | bytes, *, prepend: bool = False):
- """Append or prepend to an existing page's content stream.
-
- Args:
- contents: An existing content stream to append or prepend.
- prepend: Prepend if true, append if false (default).
-
- .. versionadded:: 2.14
- """
- return self._contents_add(contents, prepend=prepend)
-
- def __getattr__(self, name):
- return getattr(self.obj, name)
-
- @augment_override_cpp
- def __setattr__(self, name, value):
- if hasattr(self.__class__, name):
- object.__setattr__(self, name, value)
- else:
- setattr(self.obj, name, value)
-
- @augment_override_cpp
- def __delattr__(self, name):
- if hasattr(self.__class__, name):
- object.__delattr__(self, name)
- else:
- delattr(self.obj, name)
-
- def __getitem__(self, key):
- return self.obj[key]
-
- def __setitem__(self, key, value):
- self.obj[key] = value
-
- def __delitem__(self, key):
- del self.obj[key]
-
- def __contains__(self, key):
- return key in self.obj
-
- def get(self, key, default=None):
- try:
- return self[key]
- except KeyError:
- return default
-
- def emplace(self, other: Page, retain=(Name.Parent,)):
- return self.obj.emplace(other.obj, retain=retain)
-
- def __repr__(self):
- return (
- repr(self.obj)
- .replace('Dictionary', 'Page', 1)
- .replace('(Type="/Page")', '', 1)
- )
-
- def _repr_mimebundle_(self, include=None, exclude=None):
- data = {}
- bundle = {'application/pdf', 'image/png'}
- if include:
- bundle = {k for k in bundle if k in include}
- if exclude:
- bundle = {k for k in bundle if k not in exclude}
- pagedata = _single_page_pdf(self.obj)
- if 'application/pdf' in bundle:
- data['application/pdf'] = pagedata
- if 'image/png' in bundle:
- try:
- data['image/png'] = _mudraw(pagedata, 'png')
- except (FileNotFoundError, RuntimeError):
- pass
- return data
-
-
-@augments(Token)
-class Extend_Token:
- def __repr__(self):
- return f'pikepdf.Token({self.type_}, {self.raw_value})'
-
-
-@augments(Rectangle)
-class Extend_Rectangle:
- def __repr__(self):
- return f'pikepdf.Rectangle({self.llx}, {self.lly}, {self.urx}, {self.ury})'
-
- def __hash__(self):
- return hash((self.llx, self.lly, self.urx, self.ury))
-
-
-@augments(Attachments)
-class Extend_Attachments(MutableMapping):
- def __getitem__(self, k: str) -> AttachedFileSpec:
- filespec = self._get_filespec(k)
- if filespec is None:
- raise KeyError(k)
- return filespec
-
- def __setitem__(self, k: str, v: AttachedFileSpec) -> None:
- if not v.filename:
- v.filename = k
- return self._add_replace_filespec(k, v)
-
- def __delitem__(self, k: str) -> None:
- return self._remove_filespec(k)
-
- def __len__(self):
- return len(self._get_all_filespecs())
-
- def __iter__(self) -> Iterator[str]:
- yield from self._get_all_filespecs()
-
- def __repr__(self):
- return f"<pikepdf._qpdf.Attachments with {len(self)} attached files>"
-
-
-@augments(AttachedFileSpec)
-class Extend_AttachedFileSpec:
- @staticmethod
- def from_filepath(pdf: Pdf, path: Path | str, *, description: str = ''):
- """Construct a file specification from a file path.
-
- This function will automatically add a creation and modified date
- using the file system, and a MIME type inferred from the file's extension.
-
- If the data required for the attach is in memory, use
- :meth:`pikepdf.AttachedFileSpec` instead.
-
- Args:
- pdf: The Pdf to attach this file specification to.
- path: A file path for the file to attach to this Pdf.
- description: An optional description. May be shown to the user in
- PDF viewers.
- """
- mime, _ = mimetypes.guess_type(str(path))
- if mime is None:
- mime = ''
- if not isinstance(path, Path):
- path = Path(path)
-
- stat = path.stat()
- return AttachedFileSpec(
- pdf,
- path.read_bytes(),
- description=description,
- filename=str(path.name),
- mime_type=mime,
- creation_date=encode_pdf_date(
- datetime.datetime.fromtimestamp(stat.st_ctime)
- ),
- mod_date=encode_pdf_date(datetime.datetime.fromtimestamp(stat.st_mtime)),
- )
-
- def __repr__(self):
- if self.filename:
- return (
- f"<pikepdf._qpdf.AttachedFileSpec for {self.filename!r}, "
- f"description {self.description!r}>"
- )
- return f"<pikepdf._qpdf.AttachedFileSpec description {self.description!r}>"
-
-
-@augments(AttachedFile)
-class Extend_AttachedFile:
- @property
- def creation_date(self) -> datetime.datetime | None:
- if not self._creation_date:
- return None
- return decode_pdf_date(self._creation_date)
-
- @creation_date.setter
- def creation_date(self, value: datetime.datetime):
- self._creation_date = encode_pdf_date(value)
-
- @property
- def mod_date(self) -> datetime.datetime | None:
- if not self._mod_date:
- return None
- return decode_pdf_date(self._mod_date)
-
- @mod_date.setter
- def mod_date(self, value: datetime.datetime):
- self._mod_date = encode_pdf_date(value)
-
- def read_bytes(self) -> bytes:
- return self.obj.read_bytes()
-
- def __repr__(self):
- return (
- f'<pikepdf._qpdf.AttachedFile objid={self.obj.objgen} size={self.size} '
- f'mime_type={self.mime_type} creation_date={self.creation_date} '
- f'mod_date={self.mod_date}>'
- )
-
-
-@augments(NameTree)
-class Extend_NameTree:
- def keys(self):
- return KeysView(self._as_map())
-
- def values(self):
- return ValuesView(self._as_map())
-
- def items(self):
- return ItemsView(self._as_map())
-
- get = MutableMapping.get
- pop = MutableMapping.pop
- popitem = MutableMapping.popitem
- clear = MutableMapping.clear
- update = MutableMapping.update
- setdefault = MutableMapping.setdefault
-
-
-MutableMapping.register(NameTree)
-
-
-@augments(NumberTree)
-class Extend_NumberTree:
- def keys(self):
- return KeysView(self._as_map())
-
- def values(self):
- return ValuesView(self._as_map())
-
- def items(self):
- return ItemsView(self._as_map())
-
- get = MutableMapping.get
- pop = MutableMapping.pop
- popitem = MutableMapping.popitem
- clear = MutableMapping.clear
- update = MutableMapping.update
- setdefault = MutableMapping.setdefault
-
-
-MutableMapping.register(NumberTree)