env/lib/python3.10/site-packages/pikepdf/objects.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300

# SPDX-FileCopyrightText: 2022 James R. Barlow
# SPDX-License-Identifier: MPL-2.0

"""Provide classes to stand in for PDF objects.

The purpose of these is to provide nice-looking classes to allow explicit
construction of PDF objects and more pythonic idioms and facilitate discovery
by documentation generators and linters.

It's also a place to narrow the scope of input types to those more easily
converted to C++.

There is some deliberate "smoke and mirrors" here: all of the objects are truly
instances of ``pikepdf.Object``, which is a variant container object. The
``__new__`` constructs a ``pikepdf.Object`` in each case, and the rest of the
class definition is present as an aide for code introspection.
"""

from __future__ import annotations

# pylint: disable=unused-import, abstract-method
from secrets import token_urlsafe
from typing import TYPE_CHECKING, Any, Iterable, Mapping, cast
from warnings import warn

from . import _qpdf
from ._qpdf import Object, ObjectType, Rectangle

if TYPE_CHECKING:  # pragma: no cover
    from pikepdf import Pdf

# By default pikepdf.Object will identify itself as pikepdf._qpdf.Object
# Here we change the module to discourage people from using that internal name
# Instead it will become pikepdf.objects.Object
Object.__module__ = __name__
ObjectType.__module__ = __name__


# type(Object) is the metaclass that pybind11 defines; we wish to extend that
# pylint cannot see the C++ metaclass definition and is thoroughly confused.
# pylint: disable=invalid-metaclass


class _ObjectMeta(type(Object)):  # type: ignore
    """Support instance checking."""

    def __instancecheck__(self, instance: Any) -> bool:
        # Note: since this class is a metaclass, self is a class object
        if type(instance) != Object:
            return False
        return self.object_type == instance._type_code


class _NameObjectMeta(_ObjectMeta):
    """Support usage pikepdf.Name.Whatever -> Name('/Whatever')."""

    def __getattr__(self, attr: str) -> Any:
        if attr.startswith('_') or attr == 'object_type':
            return getattr(_ObjectMeta, attr)
        return Name('/' + attr)

    def __setattr__(self, attr: str, value: Any) -> None:
        # No need for a symmetric .startswith('_'). To prevent user error, we
        # simply don't allow mucking with the pikepdf.Name class's attributes.
        # There is no reason to ever assign to them.
        raise AttributeError(
            "Attributes may not be set on pikepdf.Name. Perhaps you meant to "
            "modify a Dictionary rather than a Name?"
        )

    def __getitem__(self, item: str) -> Name:
        if item.startswith('/'):
            item = item[1:]
        raise TypeError(
            "pikepdf.Name is not subscriptable. You probably meant:\n"
            f"    pikepdf.Name.{item}\n"
            "or\n"
            f"    pikepdf.Name('/{item}')\n"
        )


class Name(Object, metaclass=_NameObjectMeta):
    """Construct a PDF Name object.

    Names can be constructed with two notations:

        1. ``Name.Resources``

        2. ``Name('/Resources')``

    The two are semantically equivalent. The former is preferred for names
    that are normally expected to be in a PDF. The latter is preferred for
    dynamic names and attributes.
    """

    object_type = ObjectType.name_

    def __new__(cls, name: str | Name) -> Name:
        """Construct a PDF Name."""
        # QPDF_Name::unparse ensures that names are always saved in a UTF-8
        # compatible way, so we only need to guard the input.
        if isinstance(name, bytes):
            raise TypeError("Name should be str")
        if isinstance(name, Name):
            return name  # Names are immutable so we can return a reference
        return _qpdf._new_name(name)

    @classmethod
    def random(cls, len_: int = 16, prefix: str = '') -> Name:
        """Generate a cryptographically strong random, valid PDF Name.

        This function uses Python's secrets.token_urlsafe, which returns a
        URL-safe encoded random number of the desired length. An optional
        *prefix* may be prepended. (The encoding is ultimately done with
        :func:`base64.urlsafe_b64encode`.) Serendipitously, URL-safe is also
        PDF-safe.

        When the length parameter is 16 (16 random bytes or 128 bits), the result
        is probably globally unique and can be treated as never colliding with
        other names.

        The length of the string may vary because it is encoded.
        """
        random_string = token_urlsafe(len_)
        return _qpdf._new_name(f"/{prefix}{random_string}")


class Operator(Object, metaclass=_ObjectMeta):
    """Construct an operator for use in a content stream.

    An Operator is one of a limited set of commands that can appear in PDF content
    streams (roughly the mini-language that draws objects, lines and text on a
    virtual PDF canvas). The commands :func:`parse_content_stream` and
    :func:`unparse_content_stream` create and expect Operators respectively, along
    with their operands.

    pikepdf uses the special Operator "INLINE IMAGE" to denote an inline image
    in a content stream.
    """

    object_type = ObjectType.operator

    def __new__(cls, name: str) -> Operator:
        """Construct an operator."""
        return cast('Operator', _qpdf._new_operator(name))


class String(Object, metaclass=_ObjectMeta):
    """Construct a PDF String object."""

    object_type = ObjectType.string

    def __new__(cls, s: str | bytes) -> String:
        """
        Construct a PDF String.

        Args:
            s: The string to use. String will be encoded for
                PDF, bytes will be constructed without encoding.

        Return type:
            pikepdf.Object
        """
        if isinstance(s, bytes):
            return _qpdf._new_string(s)
        return _qpdf._new_string_utf8(s)


class Array(Object, metaclass=_ObjectMeta):
    """Construct a PDF Array object."""

    object_type = ObjectType.array

    def __new__(cls, a: Iterable | Rectangle | None = None) -> Array:
        """
        Construct a PDF Array.

        Args:
            a: An iterable of objects. All objects must be either
                `pikepdf.Object` or convertible to `pikepdf.Object`.

        Return type:
            pikepdf.Array
        """
        if isinstance(a, (str, bytes)):
            raise TypeError('Strings cannot be converted to arrays of chars')

        if a is None:
            a = []
        elif isinstance(a, Rectangle):
            return a.as_array()
        elif isinstance(a, Array):
            return cast(Array, a.__copy__())
        return _qpdf._new_array(a)


class Dictionary(Object, metaclass=_ObjectMeta):
    """Construct a PDF Dictionary object."""

    object_type = ObjectType.dictionary

    def __new__(cls, d: Mapping | None = None, **kwargs) -> Dictionary:
        """
        Construct a PDF Dictionary.

        Works from either a Python ``dict`` or keyword arguments.

        These two examples are equivalent:

        .. code-block:: python

            pikepdf.Dictionary({'/NameOne': 1, '/NameTwo': 'Two'})

            pikepdf.Dictionary(NameOne=1, NameTwo='Two')

        In either case, the keys must be strings, and the strings
        correspond to the desired Names in the PDF Dictionary. The values
        must all be convertible to `pikepdf.Object`.

        Return type:
            pikepdf.Dictionary
        """
        if kwargs and d is not None:
            raise ValueError('Cannot use both a mapping object and keyword args')
        if kwargs:
            # Add leading slash
            # Allows Dictionary(MediaBox=(0,0,1,1), Type=Name('/Page')...
            return _qpdf._new_dictionary({('/' + k): v for k, v in kwargs.items()})
        if isinstance(d, Dictionary):
            # Already a dictionary
            return d.__copy__()
        if not d:
            d = {}
        if d and any(key == '/' or not key.startswith('/') for key in d.keys()):
            raise KeyError("Dictionary created from strings must begin with '/'")
        return _qpdf._new_dictionary(d)


class Stream(Object, metaclass=_ObjectMeta):
    """Construct a PDF Stream object."""

    object_type = ObjectType.stream

    def __new__(cls, owner: Pdf, data: bytes | None = None, d=None, **kwargs) -> Stream:
        """
        Create a new stream object.

        Streams stores arbitrary binary data and may or may not be compressed.
        It also may or may not be a page or Form XObject's content stream.

        A stream dictionary is like a pikepdf.Dictionary or Python dict, except
        it has a binary payload of data attached. The dictionary describes
        how the data is compressed or encoded.

        The dictionary may be initialized just like pikepdf.Dictionary is initialized,
        using a mapping object or keyword arguments.

        Args:
            owner: The Pdf to which this stream shall be attached.
            data: The data bytes for the stream.
            d: An optional mapping object that will be used to construct the stream's
                dictionary.
            kwargs: Keyword arguments that will define the stream dictionary. Do not set
                /Length here as pikepdf will manage this value. Set /Filter
                if the data is already encoded in some format.

        Examples:
            Using kwargs:
                >>> s1 = pikepdf.Stream(
                        pdf,
                        b"uncompressed image data",
                        BitsPerComponent=8,
                        ColorSpace=Name.DeviceRGB,
                        ...
                    )
            Using dict:
                >>> d = pikepdf.Dictionary(...)
                >>> s2 = pikepdf.Stream(
                        pdf,
                        b"data",
                        d
                    )

        .. versionchanged:: 2.2
            Support creation of ``pikepdf.Stream`` from existing dictionary.

        .. versionchanged:: 3.0
            Deprecated ``obj`` argument was removed; use ``data``.
        """
        if data is None:
            raise TypeError("Must make Stream from binary data")

        stream_dict = None
        if d or kwargs:
            stream_dict = Dictionary(d, **kwargs)

        stream = _qpdf._new_stream(owner, data)
        if stream_dict:
            stream.stream_dict = stream_dict
        return stream