diff options
author | Biswakalyan Bhuyan <biswa@surgot.in> | 2022-11-14 16:43:12 +0530 |
---|---|---|
committer | Biswakalyan Bhuyan <biswa@surgot.in> | 2022-11-14 16:43:12 +0530 |
commit | d47f8b48935d258f4c5c3e2267911753bebd5214 (patch) | |
tree | 3ed04e75bc3fc7c8e4ce618f527565da1df630a1 /env/lib/python3.10/site-packages/img2pdf.py | |
parent | 9468226a9e2e2ab8cdd599f1d8538e860ca86120 (diff) | |
download | idcard-d47f8b48935d258f4c5c3e2267911753bebd5214.tar.gz idcard-d47f8b48935d258f4c5c3e2267911753bebd5214.tar.bz2 idcard-d47f8b48935d258f4c5c3e2267911753bebd5214.zip |
id card
Diffstat (limited to 'env/lib/python3.10/site-packages/img2pdf.py')
-rw-r--r-- | env/lib/python3.10/site-packages/img2pdf.py | 4102 |
1 files changed, 0 insertions, 4102 deletions
diff --git a/env/lib/python3.10/site-packages/img2pdf.py b/env/lib/python3.10/site-packages/img2pdf.py deleted file mode 100644 index 39a311b..0000000 --- a/env/lib/python3.10/site-packages/img2pdf.py +++ /dev/null @@ -1,4102 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -# Copyright (C) 2012-2021 Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> -# -# This program is free software: you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation, either -# version 3 of the License, or (at your option) any later -# version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public -# License along with this program. If not, see -# <http://www.gnu.org/licenses/>. - -import sys -import os -import zlib -import argparse -from PIL import Image, TiffImagePlugin, GifImagePlugin - -if hasattr(GifImagePlugin, "LoadingStrategy"): - # Pillow 9.0.0 started emitting all frames but the first as RGB instead of - # P to make sure that more than 256 colors can be represented. But palette - # images compress far better than RGB images in PDF so we instruct Pillow - # to only emit RGB frames if the palette differs and return P otherwise. - # This works since Pillow 9.1.0. - GifImagePlugin.LOADING_STRATEGY = ( - GifImagePlugin.LoadingStrategy.RGB_AFTER_DIFFERENT_PALETTE_ONLY - ) - -# TiffImagePlugin.DEBUG = True -from PIL.ExifTags import TAGS -from datetime import datetime -from jp2 import parsejp2 -from enum import Enum -from io import BytesIO -import logging -import struct -import platform -import hashlib -from itertools import chain - -logger = logging.getLogger(__name__) - -have_pdfrw = True -try: - import pdfrw -except ImportError: - have_pdfrw = False - -have_pikepdf = True -try: - import pikepdf -except ImportError: - have_pikepdf = False - -__version__ = "0.4.4" -default_dpi = 96.0 -papersizes = { - "letter": "8.5inx11in", - "a0": "841mmx1189mm", - "a1": "594mmx841mm", - "a2": "420mmx594mm", - "a3": "297mmx420mm", - "a4": "210mmx297mm", - "a5": "148mmx210mm", - "a6": "105mmx148mm", - "b0": "1000mmx1414mm", - "b1": "707mmx1000mm", - "b2": "500mmx707mm", - "b3": "353mmx500mm", - "b4": "250mmx353mm", - "b5": "176mmx250mm", - "b6": "125mmx176mm", - "jb0": "1030mmx1456mm", - "jb1": "728mmx1030mm", - "jb2": "515mmx728mm", - "jb3": "364mmx515mm", - "jb4": "257mmx364mm", - "jb5": "182mmx257mm", - "jb6": "128mmx182mm", - "legal": "8.5inx14in", - "tabloid": "11inx17in", -} -papernames = { - "letter": "Letter", - "a0": "A0", - "a1": "A1", - "a2": "A2", - "a3": "A3", - "a4": "A4", - "a5": "A5", - "a6": "A6", - "b0": "B0", - "b1": "B1", - "b2": "B2", - "b3": "B3", - "b4": "B4", - "b5": "B5", - "b6": "B6", - "jb0": "JB0", - "jb1": "JB1", - "jb2": "JB2", - "jb3": "JB3", - "jb4": "JB4", - "jb5": "JB5", - "jb6": "JB6", - "legal": "Legal", - "tabloid": "Tabloid", -} - -Engine = Enum("Engine", "internal pdfrw pikepdf") - -Rotation = Enum("Rotation", "auto none ifvalid 0 90 180 270") - -FitMode = Enum("FitMode", "into fill exact shrink enlarge") - -PageOrientation = Enum("PageOrientation", "portrait landscape") - -Colorspace = Enum("Colorspace", "RGB RGBA L LA 1 CMYK CMYK;I P PA other") - -ImageFormat = Enum("ImageFormat", "JPEG JPEG2000 CCITTGroup4 PNG GIF TIFF MPO other") - -PageMode = Enum("PageMode", "none outlines thumbs") - -PageLayout = Enum( - "PageLayout", - "single onecolumn twocolumnright twocolumnleft twopageright twopageleft", -) - -Magnification = Enum("Magnification", "fit fith fitbh") - -ImgSize = Enum("ImgSize", "abs perc dpi") - -Unit = Enum("Unit", "pt cm mm inch") - -ImgUnit = Enum("ImgUnit", "pt cm mm inch perc dpi") - -TIFFBitRevTable = [ - 0x00, - 0x80, - 0x40, - 0xC0, - 0x20, - 0xA0, - 0x60, - 0xE0, - 0x10, - 0x90, - 0x50, - 0xD0, - 0x30, - 0xB0, - 0x70, - 0xF0, - 0x08, - 0x88, - 0x48, - 0xC8, - 0x28, - 0xA8, - 0x68, - 0xE8, - 0x18, - 0x98, - 0x58, - 0xD8, - 0x38, - 0xB8, - 0x78, - 0xF8, - 0x04, - 0x84, - 0x44, - 0xC4, - 0x24, - 0xA4, - 0x64, - 0xE4, - 0x14, - 0x94, - 0x54, - 0xD4, - 0x34, - 0xB4, - 0x74, - 0xF4, - 0x0C, - 0x8C, - 0x4C, - 0xCC, - 0x2C, - 0xAC, - 0x6C, - 0xEC, - 0x1C, - 0x9C, - 0x5C, - 0xDC, - 0x3C, - 0xBC, - 0x7C, - 0xFC, - 0x02, - 0x82, - 0x42, - 0xC2, - 0x22, - 0xA2, - 0x62, - 0xE2, - 0x12, - 0x92, - 0x52, - 0xD2, - 0x32, - 0xB2, - 0x72, - 0xF2, - 0x0A, - 0x8A, - 0x4A, - 0xCA, - 0x2A, - 0xAA, - 0x6A, - 0xEA, - 0x1A, - 0x9A, - 0x5A, - 0xDA, - 0x3A, - 0xBA, - 0x7A, - 0xFA, - 0x06, - 0x86, - 0x46, - 0xC6, - 0x26, - 0xA6, - 0x66, - 0xE6, - 0x16, - 0x96, - 0x56, - 0xD6, - 0x36, - 0xB6, - 0x76, - 0xF6, - 0x0E, - 0x8E, - 0x4E, - 0xCE, - 0x2E, - 0xAE, - 0x6E, - 0xEE, - 0x1E, - 0x9E, - 0x5E, - 0xDE, - 0x3E, - 0xBE, - 0x7E, - 0xFE, - 0x01, - 0x81, - 0x41, - 0xC1, - 0x21, - 0xA1, - 0x61, - 0xE1, - 0x11, - 0x91, - 0x51, - 0xD1, - 0x31, - 0xB1, - 0x71, - 0xF1, - 0x09, - 0x89, - 0x49, - 0xC9, - 0x29, - 0xA9, - 0x69, - 0xE9, - 0x19, - 0x99, - 0x59, - 0xD9, - 0x39, - 0xB9, - 0x79, - 0xF9, - 0x05, - 0x85, - 0x45, - 0xC5, - 0x25, - 0xA5, - 0x65, - 0xE5, - 0x15, - 0x95, - 0x55, - 0xD5, - 0x35, - 0xB5, - 0x75, - 0xF5, - 0x0D, - 0x8D, - 0x4D, - 0xCD, - 0x2D, - 0xAD, - 0x6D, - 0xED, - 0x1D, - 0x9D, - 0x5D, - 0xDD, - 0x3D, - 0xBD, - 0x7D, - 0xFD, - 0x03, - 0x83, - 0x43, - 0xC3, - 0x23, - 0xA3, - 0x63, - 0xE3, - 0x13, - 0x93, - 0x53, - 0xD3, - 0x33, - 0xB3, - 0x73, - 0xF3, - 0x0B, - 0x8B, - 0x4B, - 0xCB, - 0x2B, - 0xAB, - 0x6B, - 0xEB, - 0x1B, - 0x9B, - 0x5B, - 0xDB, - 0x3B, - 0xBB, - 0x7B, - 0xFB, - 0x07, - 0x87, - 0x47, - 0xC7, - 0x27, - 0xA7, - 0x67, - 0xE7, - 0x17, - 0x97, - 0x57, - 0xD7, - 0x37, - 0xB7, - 0x77, - 0xF7, - 0x0F, - 0x8F, - 0x4F, - 0xCF, - 0x2F, - 0xAF, - 0x6F, - 0xEF, - 0x1F, - 0x9F, - 0x5F, - 0xDF, - 0x3F, - 0xBF, - 0x7F, - 0xFF, -] - - -class NegativeDimensionError(Exception): - pass - - -class UnsupportedColorspaceError(Exception): - pass - - -class ImageOpenError(Exception): - pass - - -class JpegColorspaceError(Exception): - pass - - -class PdfTooLargeError(Exception): - pass - - -class AlphaChannelError(Exception): - pass - - -class ExifOrientationError(Exception): - pass - - -# temporary change the attribute of an object using a context manager -class temp_attr: - def __init__(self, obj, field, value): - self.obj = obj - self.field = field - self.value = value - - def __enter__(self): - self.exists = False - if hasattr(self.obj, self.field): - self.exists = True - self.old_value = getattr(self.obj, self.field) - print(f"setting {self.obj}.{self.field} = {self.value}") - setattr(self.obj, self.field, self.value) - - def __exit__(self, exctype, excinst, exctb): - if self.exists: - setattr(self.obj, self.field, self.old_value) - else: - delattr(self.obj, self.field) - - -# without pdfrw this function is a no-op -def my_convert_load(string): - return string - - -def parse(cont, indent=1): - if type(cont) is dict: - return ( - b"<<\n" - + b"\n".join( - [ - 4 * indent * b" " + k + b" " + parse(v, indent + 1) - for k, v in sorted(cont.items()) - ] - ) - + b"\n" - + 4 * (indent - 1) * b" " - + b">>" - ) - elif type(cont) is int: - return str(cont).encode() - elif type(cont) is float: - if int(cont) == cont: - return parse(int(cont)) - else: - return ("%0.4f" % cont).rstrip("0").encode() - elif isinstance(cont, MyPdfDict): - # if cont got an identifier, then addobj() has been called with it - # and a link to it will be added, otherwise add it inline - if hasattr(cont, "identifier"): - return ("%d 0 R" % cont.identifier).encode() - else: - return parse(cont.content, indent) - elif type(cont) is str or isinstance(cont, bytes): - if type(cont) is str and type(cont) is not bytes: - raise TypeError( - "parse must be passed a bytes object in py3. Got: %s" % cont - ) - return cont - elif isinstance(cont, list): - return b"[ " + b" ".join([parse(c, indent) for c in cont]) + b" ]" - else: - raise TypeError("cannot handle type %s with content %s" % (type(cont), cont)) - - -class MyPdfDict(object): - def __init__(self, *args, **kw): - self.content = dict() - if args: - if len(args) == 1: - args = args[0] - self.content.update(args) - self.stream = None - for key, value in kw.items(): - if key == "stream": - self.stream = value - self.content[MyPdfName.Length] = len(value) - elif key == "indirect": - pass - else: - self.content[getattr(MyPdfName, key)] = value - - def tostring(self): - if self.stream is not None: - return ( - ("%d 0 obj\n" % self.identifier).encode() - + parse(self.content) - + b"\nstream\n" - + self.stream - + b"\nendstream\nendobj\n" - ) - else: - return ( - ("%d 0 obj\n" % self.identifier).encode() - + parse(self.content) - + b"\nendobj\n" - ) - - def __setitem__(self, key, value): - self.content[key] = value - - def __getitem__(self, key): - return self.content[key] - - def __contains__(self, key): - return key in self.content - - -class MyPdfName: - def __getattr__(self, name): - return b"/" + name.encode("ascii") - - -MyPdfName = MyPdfName() - - -class MyPdfObject(bytes): - def __new__(cls, string): - return bytes.__new__(cls, string.encode("ascii")) - - -class MyPdfArray(list): - pass - - -class MyPdfWriter: - def __init__(self): - self.objects = [] - # create an incomplete pages object so that a /Parent entry can be - # added to each page - self.pages = MyPdfDict(Type=MyPdfName.Pages, Kids=[], Count=0) - self.catalog = MyPdfDict(Pages=self.pages, Type=MyPdfName.Catalog) - self.pagearray = [] - - def addobj(self, obj): - newid = len(self.objects) + 1 - obj.identifier = newid - self.objects.append(obj) - - def tostream(self, info, stream, version="1.3", ident=None): - xreftable = list() - - # justification of the random binary garbage in the header from - # adobe: - # - # > Note: If a PDF file contains binary data, as most do (see Section - # > 3.1, “Lexical Conventions”), it is recommended that the header - # > line be immediately followed by a comment line containing at - # > least four binary characters—that is, characters whose codes are - # > 128 or greater. This ensures proper behavior of file transfer - # > applications that inspect data near the beginning of a file to - # > determine whether to treat the file’s contents as text or as - # > binary. - # - # the choice of binary characters is arbitrary but those four seem to - # be used elsewhere. - pdfheader = ("%%PDF-%s\n" % version).encode("ascii") - pdfheader += b"%\xe2\xe3\xcf\xd3\n" - stream.write(pdfheader) - - # From section 3.4.3 of the PDF Reference (version 1.7): - # - # > Each entry is exactly 20 bytes long, including the end-of-line - # > marker. - # > - # > [...] - # > - # > The format of an in-use entry is - # > nnnnnnnnnn ggggg n eol - # > where - # > nnnnnnnnnn is a 10-digit byte offset - # > ggggg is a 5-digit generation number - # > n is a literal keyword identifying this as an in-use entry - # > eol is a 2-character end-of-line sequence - # > - # > [...] - # > - # > If the file’s end-of-line marker is a single character (either a - # > carriage return or a line feed), it is preceded by a single space; - # - # Since we chose to use a single character eol marker, we precede it by - # a space - pos = len(pdfheader) - xreftable.append(b"0000000000 65535 f \n") - for o in self.objects: - xreftable.append(("%010d 00000 n \n" % pos).encode()) - content = o.tostring() - stream.write(content) - pos += len(content) - - xrefoffset = pos - stream.write(b"xref\n") - stream.write(("0 %d\n" % len(xreftable)).encode()) - for x in xreftable: - stream.write(x) - stream.write(b"trailer\n") - trailer = {b"/Size": len(xreftable), b"/Info": info, b"/Root": self.catalog} - if ident is not None: - md5 = hashlib.md5(ident).hexdigest().encode("ascii") - trailer[b"/ID"] = b"[<%s><%s>]" % (md5, md5) - stream.write(parse(trailer) + b"\n") - stream.write(b"startxref\n") - stream.write(("%d\n" % xrefoffset).encode()) - stream.write(b"%%EOF\n") - return - - def addpage(self, page): - page[b"/Parent"] = self.pages - self.pagearray.append(page) - self.pages.content[b"/Kids"].append(page) - self.pages.content[b"/Count"] += 1 - self.addobj(page) - - -class MyPdfString: - @classmethod - def encode(cls, string, hextype=False): - if hextype: - return ( - b"< " + b" ".join(("%06x" % c).encode("ascii") for c in string) + b" >" - ) - else: - try: - string = string.encode("ascii") - except UnicodeEncodeError: - string = b"\xfe\xff" + string.encode("utf-16-be") - # We should probably encode more here because at least - # ghostscript interpretes a carriage return byte (0x0D) as a - # new line byte (0x0A) - # PDF supports: \n, \r, \t, \b and \f - string = string.replace(b"\\", b"\\\\") - string = string.replace(b"(", b"\\(") - string = string.replace(b")", b"\\)") - return b"(" + string + b")" - - -class pdfdoc(object): - def __init__( - self, - engine=Engine.internal, - version="1.3", - title=None, - author=None, - creator=None, - producer=None, - creationdate=None, - moddate=None, - subject=None, - keywords=None, - nodate=False, - panes=None, - initial_page=None, - magnification=None, - page_layout=None, - fit_window=False, - center_window=False, - fullscreen=False, - pdfa=None, - ): - if engine is None: - if have_pikepdf: - engine = Engine.pikepdf - elif have_pdfrw: - engine = Engine.pdfrw - else: - engine = Engine.internal - - if engine == Engine.pikepdf: - PdfWriter = pikepdf.new - PdfDict = pikepdf.Dictionary - PdfName = pikepdf.Name - elif engine == Engine.pdfrw: - from pdfrw import PdfWriter, PdfDict, PdfName, PdfString - elif engine == Engine.internal: - PdfWriter = MyPdfWriter - PdfDict = MyPdfDict - PdfName = MyPdfName - PdfString = MyPdfString - else: - raise ValueError("unknown engine: %s" % engine) - - self.writer = PdfWriter() - if engine != Engine.pikepdf: - self.writer.docinfo = PdfDict(indirect=True) - - def datetime_to_pdfdate(dt): - return dt.strftime("%Y%m%d%H%M%SZ") - - for k in ["Title", "Author", "Creator", "Producer", "Subject"]: - v = locals()[k.lower()] - if v is None or v == "": - continue - if engine != Engine.pikepdf: - v = PdfString.encode(v) - self.writer.docinfo[getattr(PdfName, k)] = v - - now = datetime.now() - for k in ["CreationDate", "ModDate"]: - v = locals()[k.lower()] - if v is None and nodate: - continue - if v is None: - v = now - v = ("D:" + datetime_to_pdfdate(v)).encode("ascii") - if engine == Engine.internal: - v = b"(" + v + b")" - self.writer.docinfo[getattr(PdfName, k)] = v - if keywords is not None: - if engine == Engine.pikepdf: - self.writer.docinfo[PdfName.Keywords] = ",".join(keywords) - else: - self.writer.docinfo[PdfName.Keywords] = PdfString.encode( - ",".join(keywords) - ) - - def datetime_to_xmpdate(dt): - return dt.strftime("%Y-%m-%dT%H:%M:%SZ") - - self.xmp = b"""<?xpacket begin='\xef\xbb\xbf' id='W5M0MpCehiHzreSzNTczkc9d'?> -<x:xmpmeta xmlns:x='adobe:ns:meta/' x:xmptk='XMP toolkit 2.9.1-13, framework 1.6'> -<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' xmlns:iX='http://ns.adobe.com/iX/1.0/'> - <rdf:Description rdf:about='' xmlns:pdf='http://ns.adobe.com/pdf/1.3/'%s/> - <rdf:Description rdf:about='' xmlns:xmp='http://ns.adobe.com/xap/1.0/'> - %s - %s - </rdf:Description> - <rdf:Description rdf:about='' xmlns:pdfaid='http://www.aiim.org/pdfa/ns/id/' pdfaid:part='1' pdfaid:conformance='B'/> -</rdf:RDF> -</x:xmpmeta> - -<?xpacket end='w'?> -""" % ( - b" pdf:Producer='%s'" % producer.encode("ascii") - if producer is not None - else b"", - b"" - if creationdate is None and nodate - else b"<xmp:ModifyDate>%s</xmp:ModifyDate>" - % datetime_to_xmpdate(now if creationdate is None else creationdate).encode( - "ascii" - ), - b"" - if moddate is None and nodate - else b"<xmp:CreateDate>%s</xmp:CreateDate>" - % datetime_to_xmpdate(now if moddate is None else moddate).encode("ascii"), - ) - - if engine != Engine.pikepdf: - # this is done because pdfrw adds info, catalog and pages as the first - # three objects in this order - if engine == Engine.internal: - self.writer.addobj(self.writer.docinfo) - self.writer.addobj(self.writer.catalog) - self.writer.addobj(self.writer.pages) - - self.panes = panes - self.initial_page = initial_page - self.magnification = magnification - self.page_layout = page_layout - self.fit_window = fit_window - self.center_window = center_window - self.fullscreen = fullscreen - self.engine = engine - self.output_version = version - self.pdfa = pdfa - - def add_imagepage( - self, - color, - imgwidthpx, - imgheightpx, - imgformat, - imgdata, - smaskdata, - imgwidthpdf, - imgheightpdf, - imgxpdf, - imgypdf, - pagewidth, - pageheight, - userunit=None, - palette=None, - inverted=False, - depth=0, - rotate=0, - cropborder=None, - bleedborder=None, - trimborder=None, - artborder=None, - iccp=None, - ): - assert (color != Colorspace.RGBA and color != Colorspace.LA) or ( - imgformat == ImageFormat.PNG and smaskdata is not None - ) - - if self.engine == Engine.pikepdf: - PdfArray = pikepdf.Array - PdfDict = pikepdf.Dictionary - PdfName = pikepdf.Name - elif self.engine == Engine.pdfrw: - from pdfrw import PdfDict, PdfName, PdfObject, PdfString - from pdfrw.py23_diffs import convert_load - elif self.engine == Engine.internal: - PdfDict = MyPdfDict - PdfName = MyPdfName - PdfObject = MyPdfObject - PdfString = MyPdfString - convert_load = my_convert_load - else: - raise ValueError("unknown engine: %s" % self.engine) - TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true") - FalseObject = False if self.engine == Engine.pikepdf else PdfObject("false") - - if color == Colorspace["1"] or color == Colorspace.L or color == Colorspace.LA: - colorspace = PdfName.DeviceGray - elif color == Colorspace.RGB or color == Colorspace.RGBA: - colorspace = PdfName.DeviceRGB - elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: - colorspace = PdfName.DeviceCMYK - elif color == Colorspace.P: - if self.engine == Engine.pdfrw: - # https://github.com/pmaupin/pdfrw/issues/128 - # https://github.com/pmaupin/pdfrw/issues/147 - raise Exception( - "pdfrw does not support hex strings for " - "palette image input, re-run with " - "--engine=internal or --engine=pikepdf" - ) - assert len(palette) % 3 == 0 - colorspace = [ - PdfName.Indexed, - PdfName.DeviceRGB, - (len(palette) // 3) - 1, - bytes(palette) - if self.engine == Engine.pikepdf - else PdfString.encode( - [ - int.from_bytes(palette[i : i + 3], "big") - for i in range(0, len(palette), 3) - ], - hextype=True, - ), - ] - else: - raise UnsupportedColorspaceError("unsupported color space: %s" % color.name) - - if iccp is not None: - if self.engine == Engine.pikepdf: - iccpdict = self.writer.make_stream(iccp) - else: - iccpdict = PdfDict(stream=convert_load(iccp)) - iccpdict[PdfName.Alternate] = colorspace - if ( - color == Colorspace["1"] - or color == Colorspace.L - or color == Colorspace.LA - ): - iccpdict[PdfName.N] = 1 - elif color == Colorspace.RGB or color == Colorspace.RGBA: - iccpdict[PdfName.N] = 3 - elif color == Colorspace.CMYK or color == Colorspace["CMYK;I"]: - iccpdict[PdfName.N] = 4 - elif color == Colorspace.P: - raise Exception("Cannot have Palette images with ICC profile") - colorspace = [PdfName.ICCBased, iccpdict] - - # either embed the whole jpeg or deflate the bitmap representation - if imgformat is ImageFormat.JPEG: - ofilter = PdfName.DCTDecode - elif imgformat is ImageFormat.JPEG2000: - ofilter = PdfName.JPXDecode - self.output_version = "1.5" # jpeg2000 needs pdf 1.5 - elif imgformat is ImageFormat.CCITTGroup4: - ofilter = [PdfName.CCITTFaxDecode] - else: - ofilter = PdfName.FlateDecode - - if self.engine == Engine.pikepdf: - image = self.writer.make_stream(imgdata) - else: - image = PdfDict(stream=convert_load(imgdata)) - - image[PdfName.Type] = PdfName.XObject - image[PdfName.Subtype] = PdfName.Image - image[PdfName.Filter] = ofilter - image[PdfName.Width] = imgwidthpx - image[PdfName.Height] = imgheightpx - image[PdfName.ColorSpace] = colorspace - image[PdfName.BitsPerComponent] = depth - - smask = None - - if color == Colorspace["CMYK;I"]: - # Inverts all four channels - image[PdfName.Decode] = [1, 0, 1, 0, 1, 0, 1, 0] - - if imgformat is ImageFormat.CCITTGroup4: - decodeparms = PdfDict() - # The default for the K parameter is 0 which indicates Group 3 1-D - # encoding. We set it to -1 because we want Group 4 encoding. - decodeparms[PdfName.K] = -1 - if inverted: - decodeparms[PdfName.BlackIs1] = FalseObject - else: - decodeparms[PdfName.BlackIs1] = TrueObject - decodeparms[PdfName.Columns] = imgwidthpx - decodeparms[PdfName.Rows] = imgheightpx - image[PdfName.DecodeParms] = [decodeparms] - elif imgformat is ImageFormat.PNG: - if smaskdata is not None: - if self.engine == Engine.pikepdf: - smask = self.writer.make_stream(smaskdata) - else: - smask = PdfDict(stream=convert_load(smaskdata)) - smask[PdfName.Type] = PdfName.XObject - smask[PdfName.Subtype] = PdfName.Image - smask[PdfName.Filter] = PdfName.FlateDecode - smask[PdfName.Width] = imgwidthpx - smask[PdfName.Height] = imgheightpx - smask[PdfName.ColorSpace] = PdfName.DeviceGray - smask[PdfName.BitsPerComponent] = depth - - decodeparms = PdfDict() - decodeparms[PdfName.Predictor] = 15 - decodeparms[PdfName.Colors] = 1 - decodeparms[PdfName.Columns] = imgwidthpx - decodeparms[PdfName.BitsPerComponent] = depth - smask[PdfName.DecodeParms] = decodeparms - - image[PdfName.SMask] = smask - - # /SMask requires PDF 1.4 - if self.output_version < "1.4": - self.output_version = "1.4" - - decodeparms = PdfDict() - decodeparms[PdfName.Predictor] = 15 - if color in [Colorspace.P, Colorspace["1"], Colorspace.L, Colorspace.LA]: - decodeparms[PdfName.Colors] = 1 - else: - decodeparms[PdfName.Colors] = 3 - decodeparms[PdfName.Columns] = imgwidthpx - decodeparms[PdfName.BitsPerComponent] = depth - image[PdfName.DecodeParms] = decodeparms - - text = ( - "q\n%0.4f 0 0 %0.4f %0.4f %0.4f cm\n/Im0 Do\nQ" - % (imgwidthpdf, imgheightpdf, imgxpdf, imgypdf) - ).encode("ascii") - - if self.engine == Engine.pikepdf: - content = self.writer.make_stream(text) - else: - content = PdfDict(stream=convert_load(text)) - resources = PdfDict(XObject=PdfDict(Im0=image)) - - if self.engine == Engine.pikepdf: - page = self.writer.add_blank_page(page_size=(pagewidth, pageheight)) - else: - page = PdfDict(indirect=True) - page[PdfName.Type] = PdfName.Page - page[PdfName.MediaBox] = [0, 0, pagewidth, pageheight] - # 14.11.2 Page Boundaries - # ... - # The crop, bleed, trim, and art boxes shall not ordinarily extend - # beyond the boundaries of the media box. If they do, they are - # effectively reduced to their intersection with the media box. - if cropborder is not None: - page[PdfName.CropBox] = [ - cropborder[1], - cropborder[0], - pagewidth - cropborder[1], - pageheight - cropborder[0], - ] - if bleedborder is None: - if PdfName.CropBox in page: - page[PdfName.BleedBox] = page[PdfName.CropBox] - else: - page[PdfName.BleedBox] = [ - bleedborder[1], - bleedborder[0], - pagewidth - bleedborder[1], - pageheight - bleedborder[0], - ] - if trimborder is None: - if PdfName.CropBox in page: - page[PdfName.TrimBox] = page[PdfName.CropBox] - else: - page[PdfName.TrimBox] = [ - trimborder[1], - trimborder[0], - pagewidth - trimborder[1], - pageheight - trimborder[0], - ] - if artborder is None: - if PdfName.CropBox in page: - page[PdfName.ArtBox] = page[PdfName.CropBox] - else: - page[PdfName.ArtBox] = [ - artborder[1], - artborder[0], - pagewidth - artborder[1], - pageheight - artborder[0], - ] - page[PdfName.Resources] = resources - page[PdfName.Contents] = content - if rotate != 0: - page[PdfName.Rotate] = rotate - if userunit is not None: - # /UserUnit requires PDF 1.6 - if self.output_version < "1.6": - self.output_version = "1.6" - page[PdfName.UserUnit] = userunit - - if self.engine != Engine.pikepdf: - self.writer.addpage(page) - - if self.engine == Engine.internal: - self.writer.addobj(content) - self.writer.addobj(image) - if smask is not None: - self.writer.addobj(smask) - if iccp is not None: - self.writer.addobj(iccpdict) - - def tostring(self): - stream = BytesIO() - self.tostream(stream) - return stream.getvalue() - - def tostream(self, outputstream): - if self.engine == Engine.pikepdf: - PdfArray = pikepdf.Array - PdfDict = pikepdf.Dictionary - PdfName = pikepdf.Name - elif self.engine == Engine.pdfrw: - from pdfrw import PdfDict, PdfName, PdfArray, PdfObject - from pdfrw.py23_diffs import convert_load - elif self.engine == Engine.internal: - PdfDict = MyPdfDict - PdfName = MyPdfName - PdfObject = MyPdfObject - PdfArray = MyPdfArray - convert_load = my_convert_load - else: - raise ValueError("unknown engine: %s" % self.engine) - NullObject = None if self.engine == Engine.pikepdf else PdfObject("null") - TrueObject = True if self.engine == Engine.pikepdf else PdfObject("true") - - # We fill the catalog with more information like /ViewerPreferences, - # /PageMode, /PageLayout or /OpenAction because the latter refers to a - # page object which has to be present so that we can get its id. - # - # Furthermore, if using pdfrw, the trailer is cleared every time a page - # is added, so we can only start using it after all pages have been - # written. - - if self.engine == Engine.pikepdf: - catalog = self.writer.Root - elif self.engine == Engine.pdfrw: - catalog = self.writer.trailer.Root - elif self.engine == Engine.internal: - catalog = self.writer.catalog - else: - raise ValueError("unknown engine: %s" % self.engine) - - if ( - self.fullscreen - or self.fit_window - or self.center_window - or self.panes is not None - ): - catalog[PdfName.ViewerPreferences] = PdfDict() - - if self.fullscreen: - # this setting might be overwritten later by the page mode - catalog[PdfName.ViewerPreferences][ - PdfName.NonFullScreenPageMode - ] = PdfName.UseNone - - if self.panes == PageMode.thumbs: - catalog[PdfName.ViewerPreferences][ - PdfName.NonFullScreenPageMode - ] = PdfName.UseThumbs - # this setting might be overwritten later if fullscreen - catalog[PdfName.PageMode] = PdfName.UseThumbs - elif self.panes == PageMode.outlines: - catalog[PdfName.ViewerPreferences][ - PdfName.NonFullScreenPageMode - ] = PdfName.UseOutlines - # this setting might be overwritten later if fullscreen - catalog[PdfName.PageMode] = PdfName.UseOutlines - elif self.panes in [PageMode.none, None]: - pass - else: - raise ValueError("unknown page mode: %s" % self.panes) - - if self.fit_window: - catalog[PdfName.ViewerPreferences][PdfName.FitWindow] = TrueObject - - if self.center_window: - catalog[PdfName.ViewerPreferences][PdfName.CenterWindow] = TrueObject - - if self.fullscreen: - catalog[PdfName.PageMode] = PdfName.FullScreen - - # see table 8.2 in section 8.2.1 in - # http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf - # Fit - Fits the page to the window. - # FitH - Fits the width of the page to the window. - # FitV - Fits the height of the page to the window. - # FitR - Fits the rectangle specified by the four coordinates to the - # window. - # FitB - Fits the page bounding box to the window. This basically - # reduces the amount of whitespace (margins) that is displayed - # and thus focussing more on the text content. - # FitBH - Fits the width of the page bounding box to the window. - # FitBV - Fits the height of the page bounding box to the window. - - # by default the initial page is the first one - if self.engine == Engine.pikepdf: - initial_page = self.writer.pages[0] - else: - initial_page = self.writer.pagearray[0] - # we set the open action here to make sure we open on the requested - # initial page but this value might be overwritten by a custom open - # action later while still taking the requested initial page into - # account - if self.initial_page is not None: - if self.engine == Engine.pikepdf: - initial_page = self.writer.pages[self.initial_page - 1] - else: - initial_page = self.writer.pagearray[self.initial_page - 1] - catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.XYZ, NullObject, NullObject, 0] - ) - - # The /OpenAction array must contain the page as an indirect object. - # This changed some time after 4.2.0 and on or before 5.0.0 and current - # versions require to use .obj or otherwise we get: - # TypeError: Can't convert ObjectHelper (or subclass) to Object - # implicitly. Use .obj to get access the underlying object. - # See https://github.com/pikepdf/pikepdf/issues/313 for details. - if self.engine == Engine.pikepdf: - if isinstance(initial_page, pikepdf.Page): - initial_page = self.writer.make_indirect(initial_page.obj) - else: - initial_page = self.writer.make_indirect(initial_page) - - if self.magnification == Magnification.fit: - catalog[PdfName.OpenAction] = PdfArray([initial_page, PdfName.Fit]) - elif self.magnification == Magnification.fith: - pagewidth = initial_page[PdfName.MediaBox][2] - catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.FitH, pagewidth] - ) - elif self.magnification == Magnification.fitbh: - # quick hack to determine the image width on the page - imgwidth = float(initial_page[PdfName.Contents].stream.split()[4]) - catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.FitBH, imgwidth] - ) - elif isinstance(self.magnification, float): - catalog[PdfName.OpenAction] = PdfArray( - [initial_page, PdfName.XYZ, NullObject, NullObject, self.magnification] - ) - elif self.magnification is None: - pass - else: - raise ValueError("unknown magnification: %s" % self.magnification) - - if self.page_layout == PageLayout.single: - catalog[PdfName.PageLayout] = PdfName.SinglePage - elif self.page_layout == PageLayout.onecolumn: - catalog[PdfName.PageLayout] = PdfName.OneColumn - elif self.page_layout == PageLayout.twocolumnright: - catalog[PdfName.PageLayout] = PdfName.TwoColumnRight - elif self.page_layout == PageLayout.twocolumnleft: - catalog[PdfName.PageLayout] = PdfName.TwoColumnLeft - elif self.page_layout == PageLayout.twopageright: - catalog[PdfName.PageLayout] = PdfName.TwoPageRight - if self.output_version < "1.5": - self.output_version = "1.5" - elif self.page_layout == PageLayout.twopageleft: - catalog[PdfName.PageLayout] = PdfName.TwoPageLeft - if self.output_version < "1.5": - self.output_version = "1.5" - elif self.page_layout is None: - pass - else: - raise ValueError("unknown page layout: %s" % self.page_layout) - - if self.pdfa is not None: - if self.engine == Engine.pikepdf: - metadata = self.writer.make_stream(self.xmp) - else: - metadata = PdfDict(stream=convert_load(self.xmp)) - metadata[PdfName.Subtype] = PdfName.XML - metadata[PdfName.Type] = PdfName.Metadata - with open(self.pdfa, "rb") as f: - icc = f.read() - intents = PdfDict() - if self.engine == Engine.pikepdf: - iccstream = self.writer.make_stream(icc) - iccstream.stream_dict.N = 3 - else: - iccstream = PdfDict(stream=convert_load(zlib.compress(icc))) - iccstream[PdfName.N] = 3 - iccstream[PdfName.Filter] = PdfName.FlateDecode - intents[PdfName.S] = PdfName.GTS_PDFA1 - intents[PdfName.Type] = PdfName.OutputIntent - intents[PdfName.OutputConditionIdentifier] = ( - b"sRGB" if self.engine == Engine.pikepdf else b"(sRGB)" - ) - intents[PdfName.DestOutputProfile] = iccstream - catalog[PdfName.OutputIntents] = PdfArray([intents]) - catalog[PdfName.Metadata] = metadata - - if self.engine == Engine.internal: - self.writer.addobj(metadata) - self.writer.addobj(iccstream) - - # now write out the PDF - if self.engine == Engine.pikepdf: - self.writer.save( - outputstream, min_version=self.output_version, linearize=True - ) - elif self.engine == Engine.pdfrw: - self.writer.trailer.Info = self.writer.docinfo - # setting the version attribute of the pdfrw PdfWriter object will - # influence the behaviour of the write() function - self.writer.version = self.output_version - if self.pdfa: - md5 = hashlib.md5(b"").hexdigest().encode("ascii") - self.writer.trailer[PdfName.ID] = PdfArray([md5, md5]) - self.writer.write(outputstream) - elif self.engine == Engine.internal: - self.writer.tostream( - self.writer.docinfo, - outputstream, - self.output_version, - None if self.pdfa is None else b"", - ) - else: - raise ValueError("unknown engine: %s" % self.engine) - - -def get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata=None, rotreq=None -): - if imgformat == ImageFormat.JPEG2000 and rawdata is not None and imgdata is None: - # this codepath gets called if the PIL installation is not able to - # handle JPEG2000 files - imgwidthpx, imgheightpx, ics, hdpi, vdpi = parsejp2(rawdata) - - if hdpi is None: - hdpi = default_dpi - if vdpi is None: - vdpi = default_dpi - ndpi = (hdpi, vdpi) - else: - imgwidthpx, imgheightpx = imgdata.size - - ndpi = imgdata.info.get("dpi", (default_dpi, default_dpi)) - # In python3, the returned dpi value for some tiff images will - # not be an integer but a float. To make the behaviour of - # img2pdf the same between python2 and python3, we convert that - # float into an integer by rounding. - # Search online for the 72.009 dpi problem for more info. - ndpi = (int(round(ndpi[0])), int(round(ndpi[1]))) - ics = imgdata.mode - - # GIF and PNG files with transparency are supported - if (imgformat == ImageFormat.PNG or imgformat == ImageFormat.GIF) and ( - ics in ["RGBA", "LA"] or "transparency" in imgdata.info - ): - # Must check the IHDR chunk for the bit depth, because PIL would lossily - # convert 16-bit RGBA/LA images to 8-bit. - if imgformat == ImageFormat.PNG and rawdata is not None: - depth = rawdata[24] - if depth > 8: - logger.warning("Image with transparency and a bit depth of %d." % depth) - logger.warning("This is unsupported due to PIL limitations.") - raise AlphaChannelError( - "Refusing to work with multiple >8bit channels." - ) - elif ics in ["LA", "PA", "RGBA"] or "transparency" in imgdata.info: - raise AlphaChannelError("This function must not be called on images with alpha") - - # Since commit 07a96209597c5e8dfe785c757d7051ce67a980fb or release 4.1.0 - # Pillow retrieves the DPI from EXIF if it cannot find the DPI in the JPEG - # header. In that case it can happen that the horizontal and vertical DPI - # are set to zero. - if ndpi == (0, 0): - ndpi = (default_dpi, default_dpi) - - # PIL defaults to a dpi of 1 if a TIFF image does not specify the dpi. - # In that case, we want to use a different default. - if ndpi == (1, 1) and imgformat == ImageFormat.TIFF: - ndpi = ( - imgdata.tag_v2.get(TiffImagePlugin.X_RESOLUTION, default_dpi), - imgdata.tag_v2.get(TiffImagePlugin.Y_RESOLUTION, default_dpi), - ) - - logger.debug("input dpi = %d x %d", *ndpi) - - rotation = 0 - if rotreq in (None, Rotation.auto, Rotation.ifvalid): - if hasattr(imgdata, "_getexif") and imgdata._getexif() is not None: - for tag, value in imgdata._getexif().items(): - if TAGS.get(tag, tag) == "Orientation": - # Detailed information on EXIF rotation tags: - # http://impulseadventure.com/photo/exif-orientation.html - if value == 1: - rotation = 0 - elif value == 6: - rotation = 90 - elif value == 3: - rotation = 180 - elif value == 8: - rotation = 270 - elif value in (2, 4, 5, 7): - if rotreq == Rotation.ifvalid: - logger.warning( - "Unsupported flipped rotation mode (%d): use " - "--rotation=ifvalid or " - "rotation=img2pdf.Rotation.ifvalid to ignore", - value, - ) - else: - raise ExifOrientationError( - "Unsupported flipped rotation mode (%d): use " - "--rotation=ifvalid or " - "rotation=img2pdf.Rotation.ifvalid to ignore" % value - ) - else: - if rotreq == Rotation.ifvalid: - logger.warning("Invalid rotation (%d)", value) - else: - raise ExifOrientationError( - "Invalid rotation (%d): use --rotation=ifvalid " - "or rotation=img2pdf.Rotation.ifvalid to ignore" % value - ) - elif rotreq in (Rotation.none, Rotation["0"]): - rotation = 0 - elif rotreq == Rotation["90"]: - rotation = 90 - elif rotreq == Rotation["180"]: - rotation = 180 - elif rotreq == Rotation["270"]: - rotation = 270 - else: - raise Exception("invalid rotreq") - - logger.debug("rotation = %d°", rotation) - - if colorspace: - color = colorspace - logger.debug("input colorspace (forced) = %s", color) - else: - color = None - for c in Colorspace: - if c.name == ics: - color = c - if color is None: - # PIL does not provide the information about the original - # colorspace for 16bit grayscale PNG images. Thus, we retrieve - # that info manually by looking at byte 10 in the IHDR chunk. We - # know where to find that in the file because the IHDR chunk must - # be the first chunk - if ( - rawdata is not None - and imgformat == ImageFormat.PNG - and rawdata[25] == 0 - ): - color = Colorspace.L - else: - raise ValueError("unknown colorspace") - if color == Colorspace.CMYK and imgformat == ImageFormat.JPEG: - # Adobe inverts CMYK JPEGs for some reason, and others - # have followed suit as well. Some software assumes the - # JPEG is inverted if the Adobe tag (APP14), while other - # software assumes all CMYK JPEGs are inverted. I don't - # have enough experience with these to know which is - # better for images currently in the wild, so I'm going - # with the first approach for now. - if "adobe" in imgdata.info: - color = Colorspace["CMYK;I"] - logger.debug("input colorspace = %s", color.name) - - iccp = None - if "icc_profile" in imgdata.info: - iccp = imgdata.info.get("icc_profile") - - logger.debug("width x height = %dpx x %dpx", imgwidthpx, imgheightpx) - - return (color, ndpi, imgwidthpx, imgheightpx, rotation, iccp) - - -def ccitt_payload_location_from_pil(img): - # If Pillow is passed an invalid compression argument it will ignore it; - # make sure the image actually got compressed. - if img.info["compression"] != "group4": - raise ValueError( - "Image not compressed with CCITT Group 4 but with: %s" - % img.info["compression"] - ) - - # Read the TIFF tags to find the offset(s) of the compressed data strips. - strip_offsets = img.tag_v2[TiffImagePlugin.STRIPOFFSETS] - strip_bytes = img.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS] - - # PIL always seems to create a single strip even for very large TIFFs when - # it saves images, so assume we only have to read a single strip. - # A test ~10 GPixel image was still encoded as a single strip. Just to be - # safe check throw an error if there is more than one offset. - if len(strip_offsets) != 1 or len(strip_bytes) != 1: - raise NotImplementedError( - "Transcoding multiple strips not supported by the PDF format" - ) - - (offset,), (length,) = strip_offsets, strip_bytes - - logger.debug("TIFF strip_offsets: %d" % offset) - logger.debug("TIFF strip_bytes: %d" % length) - - return offset, length - - -def transcode_monochrome(imgdata): - """Convert the open PIL.Image imgdata to compressed CCITT Group4 data""" - - logger.debug("Converting monochrome to CCITT Group4") - - # Convert the image to Group 4 in memory. If libtiff is not installed and - # Pillow is not compiled against it, .save() will raise an exception. - newimgio = BytesIO() - - # we create a whole new PIL image or otherwise it might happen with some - # input images, that libtiff fails an assert and the whole process is - # killed by a SIGABRT: - # https://gitlab.mister-muffin.de/josch/img2pdf/issues/46 - im = Image.frombytes(imgdata.mode, imgdata.size, imgdata.tobytes()) - - # Since version 8.3.0 Pillow limits strips to 64 KB. Since PDF only - # supports single strip CCITT Group4 payloads, we have to coerce it back - # into putting everything into a single strip. Thanks to Andrew Murray for - # the hack. - # - # Since version 8.4.0 Pillow allows us to modify the strip size explicitly - tmp_strip_size = (imgdata.size[0] + 7) // 8 * imgdata.size[1] - if hasattr(TiffImagePlugin, "STRIP_SIZE"): - # we are using Pillow 8.4.0 or later - with temp_attr(TiffImagePlugin, "STRIP_SIZE", tmp_strip_size): - im.save(newimgio, format="TIFF", compression="group4") - else: - # only needed for Pillow 8.3.x but works for versions before that as - # well - pillow__getitem__ = TiffImagePlugin.ImageFileDirectory_v2.__getitem__ - - def __getitem__(self, tag): - overrides = { - TiffImagePlugin.ROWSPERSTRIP: imgdata.size[1], - TiffImagePlugin.STRIPBYTECOUNTS: [tmp_strip_size], - TiffImagePlugin.STRIPOFFSETS: [0], - } - return overrides.get(tag, pillow__getitem__(self, tag)) - - with temp_attr( - TiffImagePlugin.ImageFileDirectory_v2, "__getitem__", __getitem__ - ): - im.save(newimgio, format="TIFF", compression="group4") - - # Open new image in memory - newimgio.seek(0) - newimg = Image.open(newimgio) - - offset, length = ccitt_payload_location_from_pil(newimg) - - newimgio.seek(offset) - return newimgio.read(length) - - -def parse_png(rawdata): - pngidat = b"" - palette = b"" - i = 16 - while i < len(rawdata): - # once we can require Python >= 3.2 we can use int.from_bytes() instead - (n,) = struct.unpack(">I", rawdata[i - 8 : i - 4]) - if i + n > len(rawdata): - raise Exception("invalid png: %d %d %d" % (i, n, len(rawdata))) - if rawdata[i - 4 : i] == b"IDAT": - pngidat += rawdata[i : i + n] - elif rawdata[i - 4 : i] == b"PLTE": - palette += rawdata[i : i + n] - i += n - i += 12 - return pngidat, palette - - -def read_images(rawdata, colorspace, first_frame_only=False, rot=None): - im = BytesIO(rawdata) - im.seek(0) - imgdata = None - try: - imgdata = Image.open(im) - except IOError as e: - # test if it is a jpeg2000 image - if rawdata[:12] != b"\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A": - raise ImageOpenError( - "cannot read input image (not jpeg2000). " - "PIL: error reading image: %s" % e - ) - # image is jpeg2000 - imgformat = ImageFormat.JPEG2000 - else: - logger.debug("PIL format = %s", imgdata.format) - imgformat = None - for f in ImageFormat: - if f.name == imgdata.format: - imgformat = f - if imgformat is None: - imgformat = ImageFormat.other - - def cleanup(): - if imgdata is not None: - # the python-pil version 2.3.0-1ubuntu3 in Ubuntu does not have the - # close() method - try: - imgdata.close() - except AttributeError: - pass - im.close() - - logger.debug("imgformat = %s", imgformat.name) - - # depending on the input format, determine whether to pass the raw - # image or the zlib compressed color information - - # JPEG and JPEG2000 can be embedded into the PDF as-is - if imgformat == ImageFormat.JPEG or imgformat == ImageFormat.JPEG2000: - color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata, rot - ) - if color == Colorspace["1"]: - raise JpegColorspaceError("jpeg can't be monochrome") - if color == Colorspace["P"]: - raise JpegColorspaceError("jpeg can't have a color palette") - if color == Colorspace["RGBA"]: - raise JpegColorspaceError("jpeg can't have an alpha channel") - logger.debug("read_images() embeds a JPEG") - cleanup() - return [ - ( - color, - ndpi, - imgformat, - rawdata, - None, - imgwidthpx, - imgheightpx, - [], - False, - 8, - rotation, - iccp, - ) - ] - - # The MPO format is multiple JPEG images concatenated together - # we use the offset and size information to dissect the MPO into its - # individual JPEG images and then embed those into the PDF individually. - # - # The downside is, that this truncates the first JPEG as the MPO metadata - # will still be in it but the referenced images are chopped off. We still - # do it that way instead of adding the full MPO as the first image to not - # store duplicate image data. - if imgformat == ImageFormat.MPO: - result = [] - img_page_count = 0 - for offset, mpent in zip( - imgdata._MpoImageFile__mpoffsets, imgdata.mpinfo[0xB002] - ): - if first_frame_only and img_page_count > 0: - break - with BytesIO(rawdata[offset : offset + mpent["Size"]]) as rawframe: - with Image.open(rawframe) as imframe: - # The first frame contains the data that makes the JPEG a MPO - # Could we thus embed an MPO into another MPO? Lets not support - # such madness ;) - if img_page_count > 0 and imframe.format != "JPEG": - raise Exception("MPO payload must be a JPEG %s", imframe.format) - ( - color, - ndpi, - imgwidthpx, - imgheightpx, - rotation, - iccp, - ) = get_imgmetadata( - imframe, ImageFormat.JPEG, default_dpi, colorspace, rotreq=rot - ) - if color == Colorspace["1"]: - raise JpegColorspaceError("jpeg can't be monochrome") - if color == Colorspace["P"]: - raise JpegColorspaceError("jpeg can't have a color palette") - if color == Colorspace["RGBA"]: - raise JpegColorspaceError("jpeg can't have an alpha channel") - logger.debug("read_images() embeds a JPEG from MPO") - result.append( - ( - color, - ndpi, - ImageFormat.JPEG, - rawdata[offset : offset + mpent["Size"]], - None, - imgwidthpx, - imgheightpx, - [], - False, - 8, - rotation, - iccp, - ) - ) - img_page_count += 1 - cleanup() - return result - - # We can directly embed the IDAT chunk of PNG images if the PNG is not - # interlaced - # - # PIL does not provide the information whether a PNG was stored interlaced - # or not. Thus, we retrieve that info manually by looking at byte 13 in the - # IHDR chunk. We know where to find that in the file because the IHDR chunk - # must be the first chunk. - if imgformat == ImageFormat.PNG and rawdata[28] == 0: - color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata, rot - ) - if ( - color != Colorspace.RGBA - and color != Colorspace.LA - and color != Colorspace.PA - and "transparency" not in imgdata.info - ): - pngidat, palette = parse_png(rawdata) - # PIL does not provide the information about the original bits per - # sample. Thus, we retrieve that info manually by looking at byte 9 in - # the IHDR chunk. We know where to find that in the file because the - # IHDR chunk must be the first chunk - depth = rawdata[24] - if depth not in [1, 2, 4, 8, 16]: - raise ValueError("invalid bit depth: %d" % depth) - # we embed the PNG only if it is not at the same time palette based - # and has an icc profile because PDF doesn't support icc profiles - # on palette images - if palette == b"" or iccp is None: - logger.debug("read_images() embeds a PNG") - cleanup() - return [ - ( - color, - ndpi, - imgformat, - pngidat, - None, - imgwidthpx, - imgheightpx, - palette, - False, - depth, - rotation, - iccp, - ) - ] - - # If our input is not JPEG or PNG, then we might have a format that - # supports multiple frames (like TIFF or GIF), so we need a loop to - # iterate through all frames of the image. - # - # Each frame gets compressed using PNG compression *except* if: - # - # * The image is monochrome => encode using CCITT group 4 - # - # * The image is CMYK => zip plain RGB data - # - # * We are handling a CCITT encoded TIFF frame => embed data - - result = [] - img_page_count = 0 - # loop through all frames of the image (example: multipage TIFF) - while True: - try: - imgdata.seek(img_page_count) - except EOFError: - break - - if first_frame_only and img_page_count > 0: - break - - # PIL is unable to preserve the data of 16-bit RGB TIFF files and will - # convert it to 8-bit without the possibility to retrieve the original - # data - # https://github.com/python-pillow/Pillow/issues/1888 - # - # Some tiff images do not have BITSPERSAMPLE set. Use this to create - # such a tiff: tiffset -u 258 test.tif - if ( - imgformat == ImageFormat.TIFF - and max(imgdata.tag_v2.get(TiffImagePlugin.BITSPERSAMPLE, [1])) > 8 - ): - raise ValueError("PIL is unable to preserve more than 8 bits per sample") - - # We can directly copy the data out of a CCITT Group 4 encoded TIFF, if it - # only contains a single strip - if ( - imgformat == ImageFormat.TIFF - and imgdata.info["compression"] == "group4" - and len(imgdata.tag_v2[TiffImagePlugin.STRIPOFFSETS]) == 1 - and len(imgdata.tag_v2[TiffImagePlugin.STRIPBYTECOUNTS]) == 1 - ): - photo = imgdata.tag_v2[TiffImagePlugin.PHOTOMETRIC_INTERPRETATION] - inverted = False - if photo == 0: - inverted = True - elif photo != 1: - raise ValueError( - "unsupported photometric interpretation for " - "group4 tiff: %d" % photo - ) - color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rawdata, rot - ) - offset, length = ccitt_payload_location_from_pil(imgdata) - im.seek(offset) - rawdata = im.read(length) - fillorder = imgdata.tag_v2.get(TiffImagePlugin.FILLORDER) - if fillorder is None: - # no FillOrder: nothing to do - pass - elif fillorder == 1: - # msb-to-lsb: nothing to do - pass - elif fillorder == 2: - logger.debug("fillorder is lsb-to-msb => reverse bits") - # lsb-to-msb: reverse bits of each byte - rawdata = bytearray(rawdata) - for i in range(len(rawdata)): - rawdata[i] = TIFFBitRevTable[rawdata[i]] - rawdata = bytes(rawdata) - else: - raise ValueError("unsupported FillOrder: %d" % fillorder) - logger.debug("read_images() embeds Group4 from TIFF") - result.append( - ( - color, - ndpi, - ImageFormat.CCITTGroup4, - rawdata, - None, - imgwidthpx, - imgheightpx, - [], - inverted, - 1, - rotation, - iccp, - ) - ) - img_page_count += 1 - continue - - logger.debug("Converting frame: %d" % img_page_count) - - color, ndpi, imgwidthpx, imgheightpx, rotation, iccp = get_imgmetadata( - imgdata, imgformat, default_dpi, colorspace, rotreq=rot - ) - - newimg = None - if color == Colorspace["1"]: - try: - ccittdata = transcode_monochrome(imgdata) - logger.debug("read_images() encoded a B/W image as CCITT group 4") - result.append( - ( - color, - ndpi, - ImageFormat.CCITTGroup4, - ccittdata, - None, - imgwidthpx, - imgheightpx, - [], - False, - 1, - rotation, - iccp, - ) - ) - img_page_count += 1 - continue - except Exception as e: - logger.debug(e) - logger.debug("Converting colorspace 1 to L") - newimg = imgdata.convert("L") - color = Colorspace.L - elif color in [ - Colorspace.RGB, - Colorspace.RGBA, - Colorspace.L, - Colorspace.LA, - Colorspace.CMYK, - Colorspace["CMYK;I"], - Colorspace.P, - ]: - logger.debug("Colorspace is OK: %s", color) - newimg = imgdata - else: - raise ValueError("unknown or unsupported colorspace: %s" % color.name) - # the PNG format does not support CMYK, so we fall back to normal - # compression - if color in [Colorspace.CMYK, Colorspace["CMYK;I"]]: - imggz = zlib.compress(newimg.tobytes()) - logger.debug("read_images() encoded CMYK with flate compression") - result.append( - ( - color, - ndpi, - imgformat, - imggz, - None, - imgwidthpx, - imgheightpx, - [], - False, - 8, - rotation, - iccp, - ) - ) - else: - if ( - color == Colorspace.RGBA - or color == Colorspace.LA - or color == Colorspace.PA - or "transparency" in newimg.info - ): - if color == Colorspace.RGBA: - newcolor = color - r, g, b, a = newimg.split() - newimg = Image.merge("RGB", (r, g, b)) - elif color == Colorspace.LA: - newcolor = color - l, a = newimg.split() - newimg = l - else: - newcolor = Colorspace.RGBA - r, g, b, a = newimg.convert(mode="RGBA").split() - newimg = Image.merge("RGB", (r, g, b)) - - smaskidat, _, _ = to_png_data(a) - logger.warning( - "Image contains an alpha channel. Computing a separate " - "soft mask (/SMask) image to store transparency in PDF." - ) - elif color in [Colorspace.P, Colorspace.PA] and iccp is not None: - # PDF does not support palette images with icc profile - if color == Colorspace.P: - newcolor = Colorspace.RGB - newimg = newimg.convert(mode="RGB") - elif color == Colorspace.PA: - newcolor = Colorspace.RGBA - newimg = newimg.convert(mode="RGBA") - smaskidat = None - else: - newcolor = color - smaskidat = None - - pngidat, palette, depth = to_png_data(newimg) - logger.debug("read_images() encoded an image as PNG") - result.append( - ( - newcolor, - ndpi, - ImageFormat.PNG, - pngidat, - smaskidat, - imgwidthpx, - imgheightpx, - palette, - False, - depth, - rotation, - iccp, - ) - ) - img_page_count += 1 - cleanup() - return result - - -def to_png_data(img): - # cheapo version to retrieve a PNG encoding of the payload is to - # just save it with PIL. In the future this could be replaced by - # dedicated function applying the Paeth PNG filter to the raw pixel - pngbuffer = BytesIO() - img.save(pngbuffer, format="png") - - pngidat, palette = parse_png(pngbuffer.getvalue()) - # PIL does not provide the information about the original bits per - # sample. Thus, we retrieve that info manually by looking at byte 9 in - # the IHDR chunk. We know where to find that in the file because the - # IHDR chunk must be the first chunk - pngbuffer.seek(24) - depth = ord(pngbuffer.read(1)) - if depth not in [1, 2, 4, 8, 16]: - raise ValueError("invalid bit depth: %d" % depth) - return pngidat, palette, depth - - -# converts a length in pixels to a length in PDF units (1/72 of an inch) -def px_to_pt(length, dpi): - return 72.0 * length / dpi - - -def cm_to_pt(length): - return (72.0 * length) / 2.54 - - -def mm_to_pt(length): - return (72.0 * length) / 25.4 - - -def in_to_pt(length): - return 72.0 * length - - -def get_layout_fun( - pagesize=None, imgsize=None, border=None, fit=None, auto_orient=False -): - def fitfun(fit, imgwidth, imgheight, fitwidth, fitheight): - if fitwidth is None and fitheight is None: - raise ValueError("fitwidth and fitheight cannot both be None") - # if fit is fill or enlarge then it is okay if one of the dimensions - # are negative but one of them must still be positive - # if fit is not fill or enlarge then both dimensions must be positive - if ( - fit in [FitMode.fill, FitMode.enlarge] - and fitwidth is not None - and fitwidth < 0 - and fitheight is not None - and fitheight < 0 - ): - raise ValueError( - "cannot fit into a rectangle where both dimensions are negative" - ) - elif fit not in [FitMode.fill, FitMode.enlarge] and ( - (fitwidth is not None and fitwidth < 0) - or (fitheight is not None and fitheight < 0) - ): - raise Exception( - "cannot fit into a rectangle where either dimensions are negative" - ) - - def default(): - if fitwidth is not None and fitheight is not None: - newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight) / imgwidth - if newimgheight > fitheight: - newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth) / imgheight - elif fitwidth is None and fitheight is not None: - newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth) / imgheight - elif fitheight is None and fitwidth is not None: - newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight) / imgwidth - else: - raise ValueError("fitwidth and fitheight cannot both be None") - return newimgwidth, newimgheight - - if fit is None or fit == FitMode.into: - return default() - elif fit == FitMode.fill: - if fitwidth is not None and fitheight is not None: - newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight) / imgwidth - if newimgheight < fitheight: - newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth) / imgheight - elif fitwidth is None and fitheight is not None: - newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth) / imgheight - elif fitheight is None and fitwidth is not None: - newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight) / imgwidth - else: - raise ValueError("fitwidth and fitheight cannot both be None") - return newimgwidth, newimgheight - elif fit == FitMode.exact: - if fitwidth is not None and fitheight is not None: - return fitwidth, fitheight - elif fitwidth is None and fitheight is not None: - newimgheight = fitheight - newimgwidth = (newimgheight * imgwidth) / imgheight - elif fitheight is None and fitwidth is not None: - newimgwidth = fitwidth - newimgheight = (newimgwidth * imgheight) / imgwidth - else: - raise ValueError("fitwidth and fitheight cannot both be None") - return newimgwidth, newimgheight - elif fit == FitMode.shrink: - if fitwidth is not None and fitheight is not None: - if imgwidth <= fitwidth and imgheight <= fitheight: - return imgwidth, imgheight - elif fitwidth is None and fitheight is not None: - if imgheight <= fitheight: - return imgwidth, imgheight - elif fitheight is None and fitwidth is not None: - if imgwidth <= fitwidth: - return imgwidth, imgheight - else: - raise ValueError("fitwidth and fitheight cannot both be None") - return default() - elif fit == FitMode.enlarge: - if fitwidth is not None and fitheight is not None: - if imgwidth > fitwidth or imgheight > fitheight: - return imgwidth, imgheight - elif fitwidth is None and fitheight is not None: - if imgheight > fitheight: - return imgwidth, imgheight - elif fitheight is None and fitwidth is not None: - if imgwidth > fitwidth: - return imgwidth, imgheight - else: - raise ValueError("fitwidth and fitheight cannot both be None") - return default() - else: - raise NotImplementedError - - # if no layout arguments are given, then the image size is equal to the - # page size and will be drawn with the default dpi - if pagesize is None and imgsize is None and border is None: - return default_layout_fun - if pagesize is None and imgsize is None and border is not None: - - def layout_fun(imgwidthpx, imgheightpx, ndpi): - imgwidthpdf = px_to_pt(imgwidthpx, ndpi[0]) - imgheightpdf = px_to_pt(imgheightpx, ndpi[1]) - pagewidth = imgwidthpdf + 2 * border[1] - pageheight = imgheightpdf + 2 * border[0] - return pagewidth, pageheight, imgwidthpdf, imgheightpdf - - return layout_fun - if border is None: - border = (0, 0) - # if the pagesize is given but the imagesize is not, then the imagesize - # will be calculated from the pagesize, taking into account the border - # and the fitting - if pagesize is not None and imgsize is None: - - def layout_fun(imgwidthpx, imgheightpx, ndpi): - if ( - pagesize[0] is not None - and pagesize[1] is not None - and auto_orient - and ( - (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) - or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) - ) - ): - pagewidth, pageheight = pagesize[1], pagesize[0] - newborder = border[1], border[0] - else: - pagewidth, pageheight = pagesize[0], pagesize[1] - newborder = border - if pagewidth is not None: - fitwidth = pagewidth - 2 * newborder[1] - else: - fitwidth = None - if pageheight is not None: - fitheight = pageheight - 2 * newborder[0] - else: - fitheight = None - if ( - fit in [FitMode.fill, FitMode.enlarge] - and fitwidth is not None - and fitwidth < 0 - and fitheight is not None - and fitheight < 0 - ): - raise NegativeDimensionError( - "at least one border dimension musts be smaller than half " - "the respective page dimension" - ) - elif fit not in [FitMode.fill, FitMode.enlarge] and ( - (fitwidth is not None and fitwidth < 0) - or (fitheight is not None and fitheight < 0) - ): - raise NegativeDimensionError( - "one border dimension is larger than half of the " - "respective page dimension" - ) - imgwidthpdf, imgheightpdf = fitfun( - fit, - px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - fitwidth, - fitheight, - ) - if pagewidth is None: - pagewidth = imgwidthpdf + border[1] * 2 - if pageheight is None: - pageheight = imgheightpdf + border[0] * 2 - return pagewidth, pageheight, imgwidthpdf, imgheightpdf - - return layout_fun - - def scale_imgsize(s, px, dpi): - if s is None: - return None - mode, value = s - if mode == ImgSize.abs: - return value - if mode == ImgSize.perc: - return (px_to_pt(px, dpi) * value) / 100 - if mode == ImgSize.dpi: - return px_to_pt(px, value) - raise NotImplementedError - - if pagesize is None and imgsize is not None: - - def layout_fun(imgwidthpx, imgheightpx, ndpi): - imgwidthpdf, imgheightpdf = fitfun( - fit, - px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), - scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), - ) - pagewidth = imgwidthpdf + 2 * border[1] - pageheight = imgheightpdf + 2 * border[0] - return pagewidth, pageheight, imgwidthpdf, imgheightpdf - - return layout_fun - if pagesize is not None and imgsize is not None: - - def layout_fun(imgwidthpx, imgheightpx, ndpi): - if ( - pagesize[0] is not None - and pagesize[1] is not None - and auto_orient - and ( - (imgwidthpx > imgheightpx and pagesize[0] < pagesize[1]) - or (imgwidthpx < imgheightpx and pagesize[0] > pagesize[1]) - ) - ): - pagewidth, pageheight = pagesize[1], pagesize[0] - else: - pagewidth, pageheight = pagesize[0], pagesize[1] - imgwidthpdf, imgheightpdf = fitfun( - fit, - px_to_pt(imgwidthpx, ndpi[0]), - px_to_pt(imgheightpx, ndpi[1]), - scale_imgsize(imgsize[0], imgwidthpx, ndpi[0]), - scale_imgsize(imgsize[1], imgheightpx, ndpi[1]), - ) - return pagewidth, pageheight, imgwidthpdf, imgheightpdf - - return layout_fun - raise NotImplementedError - - -def default_layout_fun(imgwidthpx, imgheightpx, ndpi): - imgwidthpdf = pagewidth = px_to_pt(imgwidthpx, ndpi[0]) - imgheightpdf = pageheight = px_to_pt(imgheightpx, ndpi[1]) - return pagewidth, pageheight, imgwidthpdf, imgheightpdf - - -def get_fixed_dpi_layout_fun(fixed_dpi): - """Layout function that overrides whatever DPI is claimed in input images. - - >>> layout_fun = get_fixed_dpi_layout_fun((300, 300)) - >>> convert(image1, layout_fun=layout_fun, ... outputstream=...) - """ - - def fixed_dpi_layout_fun(imgwidthpx, imgheightpx, ndpi): - return default_layout_fun(imgwidthpx, imgheightpx, fixed_dpi) - - return fixed_dpi_layout_fun - - -def find_scale(pagewidth, pageheight): - """Find the power of 10 (10, 100, 1000...) that will reduce the scale - below the PDF specification limit of 14400 PDF units (=200 inches). - In principle we could also choose a scale that is not a power of 10. - We use powers of 10 because numbers in the PDF format are represented - in base-10 and using powers of 10 will thus just shift the comma and - keep the numbers easily readable by humans as well.""" - from math import log10, ceil - - major = max(pagewidth, pageheight) - oversized = major / 14400.0 - - return 10 ** ceil(log10(oversized)) - - -# given one or more input image, depending on outputstream, either return a -# string containing the whole PDF if outputstream is None or write the PDF -# data to the given file-like object and return None -# -# Input images can be given as file like objects (they must implement read()), -# as a binary string representing the image content or as filenames to the -# images. -def convert(*images, **kwargs): - - _default_kwargs = dict( - engine=None, - title=None, - author=None, - creator=None, - producer=None, - creationdate=None, - moddate=None, - subject=None, - keywords=None, - colorspace=None, - nodate=False, - layout_fun=default_layout_fun, - viewer_panes=None, - viewer_initial_page=None, - viewer_magnification=None, - viewer_page_layout=None, - viewer_fit_window=False, - viewer_center_window=False, - viewer_fullscreen=False, - outputstream=None, - first_frame_only=False, - allow_oversized=True, - cropborder=None, - bleedborder=None, - trimborder=None, - artborder=None, - pdfa=None, - rotation=None, - ) - for kwname, default in _default_kwargs.items(): - if kwname not in kwargs: - kwargs[kwname] = default - - pdf = pdfdoc( - kwargs["engine"], - "1.3", - kwargs["title"], - kwargs["author"], - kwargs["creator"], - kwargs["producer"], - kwargs["creationdate"], - kwargs["moddate"], - kwargs["subject"], - kwargs["keywords"], - kwargs["nodate"], - kwargs["viewer_panes"], - kwargs["viewer_initial_page"], - kwargs["viewer_magnification"], - kwargs["viewer_page_layout"], - kwargs["viewer_fit_window"], - kwargs["viewer_center_window"], - kwargs["viewer_fullscreen"], - kwargs["pdfa"], - ) - - # backwards compatibility with older img2pdf versions where the first - # argument to the function had to be given as a list - if len(images) == 1: - # if only one argument was given and it is a list, expand it - if isinstance(images[0], (list, tuple)): - images = images[0] - - if not isinstance(images, (list, tuple)): - images = [images] - else: - if len(images) == 0: - raise ValueError("Unable to process empty list") - - for img in images: - # img is allowed to be a path, a binary string representing image data - # or a file-like object (really anything that implements read()) - try: - rawdata = img.read() - except AttributeError: - if not isinstance(img, (str, bytes)): - raise TypeError("Neither implements read() nor is str or bytes") - # the thing doesn't have a read() function, so try if we can treat - # it as a file name - try: - f = open(img, "rb") - except Exception: - # whatever the exception is (string could contain NUL - # characters or the path could just not exist) it's not a file - # name so we now try treating it as raw image content - rawdata = img - else: - # we are not using a "with" block here because we only want to - # catch exceptions thrown by open(). The read() may throw its - # own exceptions like MemoryError which should be handled - # differently. - rawdata = f.read() - f.close() - - for ( - color, - ndpi, - imgformat, - imgdata, - smaskdata, - imgwidthpx, - imgheightpx, - palette, - inverted, - depth, - rotation, - iccp, - ) in read_images( - rawdata, - kwargs["colorspace"], - kwargs["first_frame_only"], - kwargs["rotation"], - ): - pagewidth, pageheight, imgwidthpdf, imgheightpdf = kwargs["layout_fun"]( - imgwidthpx, imgheightpx, ndpi - ) - - userunit = None - if pagewidth < 3.00 or pageheight < 3.00: - logger.warning( - "pdf width or height is below 3.00 - too small for some viewers!" - ) - elif pagewidth > 14400.0 or pageheight > 14400.0: - if kwargs["allow_oversized"]: - userunit = find_scale(pagewidth, pageheight) - pagewidth /= userunit - pageheight /= userunit - imgwidthpdf /= userunit - imgheightpdf /= userunit - else: - raise PdfTooLargeError( - "pdf width or height must not exceed 200 inches." - ) - for border in ["crop", "bleed", "trim", "art"]: - if kwargs[border + "border"] is None: - continue - if pagewidth < 2 * kwargs[border + "border"][1]: - raise ValueError( - "horizontal %s border larger than page width" % border - ) - if pageheight < 2 * kwargs[border + "border"][0]: - raise ValueError( - "vertical %s border larger than page height" % border - ) - # the image is always centered on the page - imgxpdf = (pagewidth - imgwidthpdf) / 2.0 - imgypdf = (pageheight - imgheightpdf) / 2.0 - pdf.add_imagepage( - color, - imgwidthpx, - imgheightpx, - imgformat, - imgdata, - smaskdata, - imgwidthpdf, - imgheightpdf, - imgxpdf, - imgypdf, - pagewidth, - pageheight, - userunit, - palette, - inverted, - depth, - rotation, - kwargs["cropborder"], - kwargs["bleedborder"], - kwargs["trimborder"], - kwargs["artborder"], - iccp, - ) - - if kwargs["outputstream"]: - pdf.tostream(kwargs["outputstream"]) - return - - return pdf.tostring() - - -def parse_num(num, name): - if num == "": - return None - unit = None - if num.endswith("pt"): - unit = Unit.pt - elif num.endswith("cm"): - unit = Unit.cm - elif num.endswith("mm"): - unit = Unit.mm - elif num.endswith("in"): - unit = Unit.inch - else: - try: - num = float(num) - except ValueError: - msg = ( - "%s is not a floating point number and doesn't have a " - "valid unit: %s" % (name, num) - ) - raise argparse.ArgumentTypeError(msg) - if unit is None: - unit = Unit.pt - else: - num = num[:-2] - try: - num = float(num) - except ValueError: - msg = "%s is not a floating point number: %s" % (name, num) - raise argparse.ArgumentTypeError(msg) - if num < 0: - msg = "%s must not be negative: %s" % (name, num) - raise argparse.ArgumentTypeError(msg) - if unit == Unit.cm: - num = cm_to_pt(num) - elif unit == Unit.mm: - num = mm_to_pt(num) - elif unit == Unit.inch: - num = in_to_pt(num) - return num - - -def parse_imgsize_num(num, name): - if num == "": - return None - unit = None - if num.endswith("pt"): - unit = ImgUnit.pt - elif num.endswith("cm"): - unit = ImgUnit.cm - elif num.endswith("mm"): - unit = ImgUnit.mm - elif num.endswith("in"): - unit = ImgUnit.inch - elif num.endswith("dpi"): - unit = ImgUnit.dpi - elif num.endswith("%"): - unit = ImgUnit.perc - else: - try: - num = float(num) - except ValueError: - msg = ( - "%s is not a floating point number and doesn't have a " - "valid unit: %s" % (name, num) - ) - raise argparse.ArgumentTypeError(msg) - if unit is None: - unit = ImgUnit.pt - else: - # strip off unit from string - if unit == ImgUnit.dpi: - num = num[:-3] - elif unit == ImgUnit.perc: - num = num[:-1] - else: - num = num[:-2] - try: - num = float(num) - except ValueError: - msg = "%s is not a floating point number: %s" % (name, num) - raise argparse.ArgumentTypeError(msg) - if unit == ImgUnit.cm: - num = (ImgSize.abs, cm_to_pt(num)) - elif unit == ImgUnit.mm: - num = (ImgSize.abs, mm_to_pt(num)) - elif unit == ImgUnit.inch: - num = (ImgSize.abs, in_to_pt(num)) - elif unit == ImgUnit.pt: - num = (ImgSize.abs, num) - elif unit == ImgUnit.dpi: - num = (ImgSize.dpi, num) - elif unit == ImgUnit.perc: - num = (ImgSize.perc, num) - return num - - -def parse_pagesize_rectarg(string): - transposed = string.endswith("^T") - if transposed: - string = string[:-2] - if papersizes.get(string.lower()): - string = papersizes[string.lower()] - if "x" not in string: - # if there is no separating "x" in the string, then the string is - # interpreted as the width - w = parse_num(string, "width") - h = None - else: - w, h = string.split("x", 1) - w = parse_num(w, "width") - h = parse_num(h, "height") - if transposed: - w, h = h, w - if w is None and h is None: - raise argparse.ArgumentTypeError("at least one dimension must be specified") - return w, h - - -def parse_imgsize_rectarg(string): - transposed = string.endswith("^T") - if transposed: - string = string[:-2] - if papersizes.get(string.lower()): - string = papersizes[string.lower()] - if "x" not in string: - # if there is no separating "x" in the string, then the string is - # interpreted as the width - w = parse_imgsize_num(string, "width") - h = None - else: - w, h = string.split("x", 1) - w = parse_imgsize_num(w, "width") - h = parse_imgsize_num(h, "height") - if transposed: - w, h = h, w - if w is None and h is None: - raise argparse.ArgumentTypeError("at least one dimension must be specified") - return w, h - - -def parse_colorspacearg(string): - for c in Colorspace: - if c.name == string: - return c - allowed = ", ".join([c.name for c in Colorspace]) - raise argparse.ArgumentTypeError( - "Unsupported colorspace: %s. Must be one of: %s." % (string, allowed) - ) - - -def parse_enginearg(string): - for c in Engine: - if c.name == string: - return c - allowed = ", ".join([c.name for c in Engine]) - raise argparse.ArgumentTypeError( - "Unsupported engine: %s. Must be one of: %s." % (string, allowed) - ) - - -def parse_borderarg(string): - if ":" in string: - h, v = string.split(":", 1) - if h == "": - raise argparse.ArgumentTypeError("missing value before colon") - if v == "": - raise argparse.ArgumentTypeError("missing value after colon") - else: - if string == "": - raise argparse.ArgumentTypeError("border option cannot be empty") - h, v = string, string - h, v = parse_num(h, "left/right border"), parse_num(v, "top/bottom border") - if h is None and v is None: - raise argparse.ArgumentTypeError("missing value") - return h, v - - -def from_file(path): - result = [] - if path == "-": - content = sys.stdin.buffer.read() - else: - with open(path, "rb") as f: - content = f.read() - for path in content.split(b"\0"): - if path == b"": - continue - try: - # test-read a byte from it so that we can abort early in case - # we cannot read data from the file - with open(path, "rb") as im: - im.read(1) - except IsADirectoryError: - raise argparse.ArgumentTypeError('"%s" is a directory' % path) - except PermissionError: - raise argparse.ArgumentTypeError('"%s" permission denied' % path) - except FileNotFoundError: - raise argparse.ArgumentTypeError('"%s" does not exist' % path) - result.append(path) - return result - - -def input_images(path_expr): - if path_expr == "-": - # we slurp in all data from stdin because we need to seek in it later - result = [sys.stdin.buffer.read()] - if len(result) == 0: - raise argparse.ArgumentTypeError('"%s" is empty' % path_expr) - else: - result = [] - paths = [path_expr] - if sys.platform == "win32" and ("*" in path_expr or "?" in path_expr): - # on windows, program is responsible for expanding wildcards such as *.jpg - # glob won't return files that don't exist so we only use it for wildcards - # paths without wildcards that do not exist will trigger "does not exist" - from glob import glob - - paths = sorted(glob(path_expr)) - for path in paths: - try: - if os.path.getsize(path) == 0: - raise argparse.ArgumentTypeError('"%s" is empty' % path) - # test-read a byte from it so that we can abort early in case - # we cannot read data from the file - with open(path, "rb") as im: - im.read(1) - except IsADirectoryError: - raise argparse.ArgumentTypeError('"%s" is a directory' % path) - except PermissionError: - raise argparse.ArgumentTypeError('"%s" permission denied' % path) - except FileNotFoundError: - raise argparse.ArgumentTypeError('"%s" does not exist' % path) - result.append(path) - return result - - -def parse_rotationarg(string): - for m in Rotation: - if m.name == string.lower(): - return m - raise argparse.ArgumentTypeError("unknown rotation value: %s" % string) - - -def parse_fitarg(string): - for m in FitMode: - if m.name == string.lower(): - return m - raise argparse.ArgumentTypeError("unknown fit mode: %s" % string) - - -def parse_panes(string): - for m in PageMode: - if m.name == string.lower(): - return m - allowed = ", ".join([m.name for m in PageMode]) - raise argparse.ArgumentTypeError( - "Unsupported page mode: %s. Must be one of: %s." % (string, allowed) - ) - - -def parse_magnification(string): - for m in Magnification: - if m.name == string.lower(): - return m - try: - return float(string) - except ValueError: - pass - allowed = ", ".join([m.name for m in Magnification]) - raise argparse.ArgumentTypeError( - "Unsupported magnification: %s. Must be " - "a floating point number or one of: %s." % (string, allowed) - ) - - -def parse_layout(string): - for l in PageLayout: - if l.name == string.lower(): - return l - allowed = ", ".join([l.name for l in PageLayout]) - raise argparse.ArgumentTypeError( - "Unsupported page layout: %s. Must be one of: %s." % (string, allowed) - ) - - -def valid_date(string): - # first try parsing in ISO8601 format - try: - return datetime.strptime(string, "%Y-%m-%d") - except ValueError: - pass - try: - return datetime.strptime(string, "%Y-%m-%dT%H:%M") - except ValueError: - pass - try: - return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S") - except ValueError: - pass - # then try dateutil - try: - from dateutil import parser - except ImportError: - pass - else: - try: - return parser.parse(string) - except TypeError: - pass - # as a last resort, try the local date utility - try: - import subprocess - except ImportError: - pass - else: - try: - utime = subprocess.check_output(["date", "--date", string, "+%s"]) - except subprocess.CalledProcessError: - pass - else: - return datetime.utcfromtimestamp(int(utime)) - raise argparse.ArgumentTypeError("cannot parse date: %s" % string) - - -def gui(): - import tkinter - import tkinter.filedialog - - have_fitz = True - try: - import fitz - except ImportError: - have_fitz = False - - # from Python 3.7 Lib/idlelib/configdialog.py - # Copyright 2015-2017 Terry Jan Reedy - # Python License - class VerticalScrolledFrame(tkinter.Frame): - """A pure Tkinter vertically scrollable frame. - - * Use the 'interior' attribute to place widgets inside the scrollable frame - * Construct and pack/place/grid normally - * This frame only allows vertical scrolling - """ - - def __init__(self, parent, *args, **kw): - tkinter.Frame.__init__(self, parent, *args, **kw) - - # Create a canvas object and a vertical scrollbar for scrolling it. - vscrollbar = tkinter.Scrollbar(self, orient=tkinter.VERTICAL) - vscrollbar.pack(fill=tkinter.Y, side=tkinter.RIGHT, expand=tkinter.FALSE) - canvas = tkinter.Canvas( - self, - borderwidth=0, - highlightthickness=0, - yscrollcommand=vscrollbar.set, - width=240, - ) - canvas.pack(side=tkinter.LEFT, fill=tkinter.BOTH, expand=tkinter.TRUE) - vscrollbar.config(command=canvas.yview) - - # Reset the view. - canvas.xview_moveto(0) - canvas.yview_moveto(0) - - # Create a frame inside the canvas which will be scrolled with it. - self.interior = interior = tkinter.Frame(canvas) - interior_id = canvas.create_window(0, 0, window=interior, anchor=tkinter.NW) - - # Track changes to the canvas and frame width and sync them, - # also updating the scrollbar. - def _configure_interior(event): - # Update the scrollbars to match the size of the inner frame. - size = (interior.winfo_reqwidth(), interior.winfo_reqheight()) - canvas.config(scrollregion="0 0 %s %s" % size) - - interior.bind("<Configure>", _configure_interior) - - def _configure_canvas(event): - if interior.winfo_reqwidth() != canvas.winfo_width(): - # Update the inner frame's width to fill the canvas. - canvas.itemconfigure(interior_id, width=canvas.winfo_width()) - - canvas.bind("<Configure>", _configure_canvas) - - return - - # From Python 3.7 Lib/tkinter/__init__.py - # Copyright 2000 Fredrik Lundh - # Python License - # - # add support for 'state' and 'name' kwargs - # add support for updating list of options - class OptionMenu(tkinter.Menubutton): - """OptionMenu which allows the user to select a value from a menu.""" - - def __init__(self, master, variable, value, *values, **kwargs): - """Construct an optionmenu widget with the parent MASTER, with - the resource textvariable set to VARIABLE, the initially selected - value VALUE, the other menu values VALUES and an additional - keyword argument command.""" - kw = { - "borderwidth": 2, - "textvariable": variable, - "indicatoron": 1, - "relief": tkinter.RAISED, - "anchor": "c", - "highlightthickness": 2, - } - if "state" in kwargs: - kw["state"] = kwargs["state"] - del kwargs["state"] - if "name" in kwargs: - kw["name"] = kwargs["name"] - del kwargs["name"] - tkinter.Widget.__init__(self, master, "menubutton", kw) - self.widgetName = "tk_optionMenu" - self.callback = kwargs.get("command") - self.variable = variable - if "command" in kwargs: - del kwargs["command"] - if kwargs: - raise tkinter.TclError("unknown option -" + list(kwargs.keys())[0]) - self.set_values([value] + list(values)) - - def __getitem__(self, name): - if name == "menu": - return self.__menu - return tkinter.Widget.__getitem__(self, name) - - def set_values(self, values): - menu = self.__menu = tkinter.Menu(self, name="menu", tearoff=0) - self.menuname = menu._w - for v in values: - menu.add_command( - label=v, command=tkinter._setit(self.variable, v, self.callback) - ) - self["menu"] = menu - - def destroy(self): - """Destroy this widget and the associated menu.""" - tkinter.Menubutton.destroy(self) - self.__menu = None - - root = tkinter.Tk() - app = tkinter.Frame(master=root) - - infiles = [] - maxpagewidth = 0 - maxpageheight = 0 - doc = None - - args = { - "engine": tkinter.StringVar(), - "auto_orient": tkinter.BooleanVar(), - "fit": tkinter.StringVar(), - "title": tkinter.StringVar(), - "author": tkinter.StringVar(), - "creator": tkinter.StringVar(), - "producer": tkinter.StringVar(), - "subject": tkinter.StringVar(), - "keywords": tkinter.StringVar(), - "nodate": tkinter.BooleanVar(), - "creationdate": tkinter.StringVar(), - "moddate": tkinter.StringVar(), - "viewer_panes": tkinter.StringVar(), - "viewer_initial_page": tkinter.IntVar(), - "viewer_magnification": tkinter.StringVar(), - "viewer_page_layout": tkinter.StringVar(), - "viewer_fit_window": tkinter.BooleanVar(), - "viewer_center_window": tkinter.BooleanVar(), - "viewer_fullscreen": tkinter.BooleanVar(), - "pagesize_dropdown": tkinter.StringVar(), - "pagesize_width": tkinter.DoubleVar(), - "pagesize_height": tkinter.DoubleVar(), - "imgsize_dropdown": tkinter.StringVar(), - "imgsize_width": tkinter.DoubleVar(), - "imgsize_height": tkinter.DoubleVar(), - "colorspace": tkinter.StringVar(), - "first_frame_only": tkinter.BooleanVar(), - } - args["engine"].set("auto") - args["title"].set("") - args["auto_orient"].set(False) - args["fit"].set("into") - args["colorspace"].set("auto") - args["viewer_panes"].set("auto") - args["viewer_initial_page"].set(1) - args["viewer_magnification"].set("auto") - args["viewer_page_layout"].set("auto") - args["first_frame_only"].set(False) - args["pagesize_dropdown"].set("auto") - args["imgsize_dropdown"].set("auto") - - def on_open_button(): - nonlocal infiles - nonlocal doc - nonlocal maxpagewidth - nonlocal maxpageheight - infiles = tkinter.filedialog.askopenfilenames( - parent=root, - title="open image", - filetypes=[ - ( - "images", - "*.bmp *.eps *.gif *.ico *.jpeg *.jpg *.jp2 *.pcx *.png *.ppm *.tiff", - ), - ("all files", "*"), - ], - # initialdir="/home/josch/git/plakativ", - # initialfile="test.pdf", - ) - if have_fitz: - with BytesIO() as f: - save_pdf(f) - f.seek(0) - doc = fitz.open(stream=f, filetype="pdf") - for page in doc: - if page.getDisplayList().rect.width > maxpagewidth: - maxpagewidth = page.getDisplayList().rect.width - if page.getDisplayList().rect.height > maxpageheight: - maxpageheight = page.getDisplayList().rect.height - draw() - - def save_pdf(stream): - pagesizearg = None - if args["pagesize_dropdown"].get() == "auto": - # nothing to do - pass - elif args["pagesize_dropdown"].get() == "custom": - pagesizearg = args["pagesize_width"].get(), args["pagesize_height"].get() - elif args["pagesize_dropdown"].get() in papernames.values(): - raise NotImplemented() - else: - raise Exception("no such pagesize: %s" % args["pagesize_dropdown"].get()) - imgsizearg = None - if args["imgsize_dropdown"].get() == "auto": - # nothing to do - pass - elif args["imgsize_dropdown"].get() == "custom": - imgsizearg = args["imgsize_width"].get(), args["imgsize_height"].get() - elif args["imgsize_dropdown"].get() in papernames.values(): - raise NotImplemented() - else: - raise Exception("no such imgsize: %s" % args["imgsize_dropdown"].get()) - borderarg = None - layout_fun = get_layout_fun( - pagesizearg, - imgsizearg, - borderarg, - args["fit"].get(), - args["auto_orient"].get(), - ) - viewer_panesarg = None - if args["viewer_panes"].get() == "auto": - # nothing to do - pass - elif args["viewer_panes"].get() in PageMode: - viewer_panesarg = args["viewer_panes"].get() - else: - raise Exception("no such viewer_panes: %s" % args["viewer_panes"].get()) - viewer_magnificationarg = None - if args["viewer_magnification"].get() == "auto": - # nothing to do - pass - elif args["viewer_magnification"].get() in Magnification: - viewer_magnificationarg = args["viewer_magnification"].get() - else: - raise Exception( - "no such viewer_magnification: %s" % args["viewer_magnification"].get() - ) - viewer_page_layoutarg = None - if args["viewer_page_layout"].get() == "auto": - # nothing to do - pass - elif args["viewer_page_layout"].get() in PageLayout: - viewer_page_layoutarg = args["viewer_page_layout"].get() - else: - raise Exception( - "no such viewer_page_layout: %s" % args["viewer_page_layout"].get() - ) - colorspacearg = None - if args["colorspace"].get() != "auto": - colorspacearg = next( - v for v in Colorspace if v.name == args["colorspace"].get() - ) - enginearg = None - if args["engine"].get() != "auto": - enginearg = next(v for v in Engine if v.name == args["engine"].get()) - - convert( - *infiles, - engine=enginearg, - title=args["title"].get() if args["title"].get() else None, - author=args["author"].get() if args["author"].get() else None, - creator=args["creator"].get() if args["creator"].get() else None, - producer=args["producer"].get() if args["producer"].get() else None, - creationdate=args["creationdate"].get() - if args["creationdate"].get() - else None, - moddate=args["moddate"].get() if args["moddate"].get() else None, - subject=args["subject"].get() if args["subject"].get() else None, - keywords=args["keywords"].get() if args["keywords"].get() else None, - colorspace=colorspacearg, - nodate=args["nodate"].get(), - layout_fun=layout_fun, - viewer_panes=viewer_panesarg, - viewer_initial_page=args["viewer_initial_page"].get() - if args["viewer_initial_page"].get() > 1 - else None, - viewer_magnification=viewer_magnificationarg, - viewer_page_layout=viewer_page_layoutarg, - viewer_fit_window=(args["viewer_fit_window"].get() or None), - viewer_center_window=(args["viewer_center_window"].get() or None), - viewer_fullscreen=(args["viewer_fullscreen"].get() or None), - outputstream=stream, - first_frame_only=args["first_frame_only"].get(), - cropborder=None, - bleedborder=None, - trimborder=None, - artborder=None, - ) - - def on_save_button(): - filename = tkinter.filedialog.asksaveasfilename( - parent=root, - title="save PDF", - defaultextension=".pdf", - filetypes=[("pdf documents", "*.pdf"), ("all files", "*")], - # initialdir="/home/josch/git/plakativ", - # initialfile=base + "_poster" + ext, - ) - with open(filename, "wb") as f: - save_pdf(f) - - root.title("img2pdf") - app.pack(fill=tkinter.BOTH, expand=tkinter.TRUE) - - canvas = tkinter.Canvas(app, bg="black") - - def draw(): - canvas.delete(tkinter.ALL) - if not infiles: - canvas.create_text( - canvas.size[0] / 2, - canvas.size[1] / 2, - text='Click on the "Open Image(s)" button in the upper right.', - fill="white", - ) - return - - if not doc: - canvas.create_text( - canvas.size[0] / 2, - canvas.size[1] / 2, - text="PyMuPDF not available. Install the Python fitz module\n" - + "for preview functionality.", - fill="white", - ) - return - - canvas_padding = 10 - # factor to convert from pdf dimensions (given in pt) into canvas - # dimensions (given in pixels) - zoom = min( - (canvas.size[0] - canvas_padding) / maxpagewidth, - (canvas.size[1] - canvas_padding) / maxpageheight, - ) - - pagenum = 0 - mat_0 = fitz.Matrix(zoom, zoom) - canvas.image = tkinter.PhotoImage( - data=doc[pagenum] - .getDisplayList() - .getPixmap(matrix=mat_0, alpha=False) - .getImageData("ppm") - ) - canvas.create_image( - (canvas.size[0] - maxpagewidth * zoom) / 2, - (canvas.size[1] - maxpageheight * zoom) / 2, - anchor=tkinter.NW, - image=canvas.image, - ) - - canvas.create_rectangle( - (canvas.size[0] - maxpagewidth * zoom) / 2, - (canvas.size[1] - maxpageheight * zoom) / 2, - (canvas.size[0] - maxpagewidth * zoom) / 2 + canvas.image.width(), - (canvas.size[1] - maxpageheight * zoom) / 2 + canvas.image.height(), - outline="red", - ) - - def on_resize(event): - canvas.size = (event.width, event.height) - draw() - - canvas.pack(fill=tkinter.BOTH, side=tkinter.LEFT, expand=tkinter.TRUE) - canvas.bind("<Configure>", on_resize) - - frame_right = tkinter.Frame(app) - frame_right.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.Y) - - top_frame = tkinter.Frame(frame_right) - top_frame.pack(fill=tkinter.X) - - tkinter.Button(top_frame, text="Open Image(s)", command=on_open_button).pack( - side=tkinter.LEFT, expand=tkinter.TRUE, fill=tkinter.X - ) - tkinter.Button(top_frame, text="Help", state=tkinter.DISABLED).pack( - side=tkinter.RIGHT, expand=tkinter.TRUE, fill=tkinter.X - ) - - frame1 = VerticalScrolledFrame(frame_right) - frame1.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.Y) - - output_options = tkinter.LabelFrame(frame1.interior, text="Output Options") - output_options.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - tkinter.Label(output_options, text="colorspace").grid( - row=0, column=0, sticky=tkinter.W - ) - OptionMenu(output_options, args["colorspace"], "auto", state=tkinter.DISABLED).grid( - row=0, column=1, sticky=tkinter.W - ) - tkinter.Label(output_options, text="engine").grid(row=1, column=0, sticky=tkinter.W) - OptionMenu(output_options, args["engine"], "auto", state=tkinter.DISABLED).grid( - row=1, column=1, sticky=tkinter.W - ) - tkinter.Checkbutton( - output_options, - text="Suppress timestamp", - variable=args["nodate"], - state=tkinter.DISABLED, - ).grid(row=2, column=0, columnspan=2, sticky=tkinter.W) - tkinter.Checkbutton( - output_options, - text="only first frame", - variable=args["first_frame_only"], - state=tkinter.DISABLED, - ).grid(row=3, column=0, columnspan=2, sticky=tkinter.W) - tkinter.Checkbutton( - output_options, text="force large input", state=tkinter.DISABLED - ).grid(row=4, column=0, columnspan=2, sticky=tkinter.W) - image_size_frame = tkinter.LabelFrame(frame1.interior, text="Image size") - image_size_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - OptionMenu( - image_size_frame, - args["imgsize_dropdown"], - *(["auto", "custom"] + sorted(papernames.values())), - state=tkinter.DISABLED, - ).grid(row=1, column=0, columnspan=3, sticky=tkinter.W) - - tkinter.Label( - image_size_frame, text="Width:", state=tkinter.DISABLED, name="size_label_width" - ).grid(row=2, column=0, sticky=tkinter.W) - tkinter.Spinbox( - image_size_frame, - format="%.2f", - increment=0.01, - from_=0, - to=100, - width=5, - state=tkinter.DISABLED, - name="spinbox_width", - ).grid(row=2, column=1, sticky=tkinter.W) - tkinter.Label( - image_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_width_mm" - ).grid(row=2, column=2, sticky=tkinter.W) - - tkinter.Label( - image_size_frame, - text="Height:", - state=tkinter.DISABLED, - name="size_label_height", - ).grid(row=3, column=0, sticky=tkinter.W) - tkinter.Spinbox( - image_size_frame, - format="%.2f", - increment=0.01, - from_=0, - to=100, - width=5, - state=tkinter.DISABLED, - name="spinbox_height", - ).grid(row=3, column=1, sticky=tkinter.W) - tkinter.Label( - image_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_height_mm" - ).grid(row=3, column=2, sticky=tkinter.W) - - page_size_frame = tkinter.LabelFrame(frame1.interior, text="Page size") - page_size_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - OptionMenu( - page_size_frame, - args["pagesize_dropdown"], - *(["auto", "custom"] + sorted(papernames.values())), - state=tkinter.DISABLED, - ).grid(row=1, column=0, columnspan=3, sticky=tkinter.W) - - tkinter.Label( - page_size_frame, text="Width:", state=tkinter.DISABLED, name="size_label_width" - ).grid(row=2, column=0, sticky=tkinter.W) - tkinter.Spinbox( - page_size_frame, - format="%.2f", - increment=0.01, - from_=0, - to=100, - width=5, - state=tkinter.DISABLED, - name="spinbox_width", - ).grid(row=2, column=1, sticky=tkinter.W) - tkinter.Label( - page_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_width_mm" - ).grid(row=2, column=2, sticky=tkinter.W) - - tkinter.Label( - page_size_frame, - text="Height:", - state=tkinter.DISABLED, - name="size_label_height", - ).grid(row=3, column=0, sticky=tkinter.W) - tkinter.Spinbox( - page_size_frame, - format="%.2f", - increment=0.01, - from_=0, - to=100, - width=5, - state=tkinter.DISABLED, - name="spinbox_height", - ).grid(row=3, column=1, sticky=tkinter.W) - tkinter.Label( - page_size_frame, text="mm", state=tkinter.DISABLED, name="size_label_height_mm" - ).grid(row=3, column=2, sticky=tkinter.W) - layout_frame = tkinter.LabelFrame(frame1.interior, text="Layout") - layout_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - tkinter.Label(layout_frame, text="border", state=tkinter.DISABLED).grid( - row=0, column=0, sticky=tkinter.W - ) - tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid( - row=0, column=1, sticky=tkinter.W - ) - tkinter.Label(layout_frame, text="fit", state=tkinter.DISABLED).grid( - row=1, column=0, sticky=tkinter.W - ) - OptionMenu( - layout_frame, args["fit"], *[v.name for v in FitMode], state=tkinter.DISABLED - ).grid(row=1, column=1, sticky=tkinter.W) - tkinter.Checkbutton( - layout_frame, - text="auto orient", - state=tkinter.DISABLED, - variable=args["auto_orient"], - ).grid(row=2, column=0, columnspan=2, sticky=tkinter.W) - tkinter.Label(layout_frame, text="crop border", state=tkinter.DISABLED).grid( - row=3, column=0, sticky=tkinter.W - ) - tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid( - row=3, column=1, sticky=tkinter.W - ) - tkinter.Label(layout_frame, text="bleed border", state=tkinter.DISABLED).grid( - row=4, column=0, sticky=tkinter.W - ) - tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid( - row=4, column=1, sticky=tkinter.W - ) - tkinter.Label(layout_frame, text="trim border", state=tkinter.DISABLED).grid( - row=5, column=0, sticky=tkinter.W - ) - tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid( - row=5, column=1, sticky=tkinter.W - ) - tkinter.Label(layout_frame, text="art border", state=tkinter.DISABLED).grid( - row=6, column=0, sticky=tkinter.W - ) - tkinter.Spinbox(layout_frame, state=tkinter.DISABLED).grid( - row=6, column=1, sticky=tkinter.W - ) - metadata_frame = tkinter.LabelFrame(frame1.interior, text="PDF metadata") - metadata_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - tkinter.Label(metadata_frame, text="title", state=tkinter.DISABLED).grid( - row=0, column=0, sticky=tkinter.W - ) - tkinter.Entry( - metadata_frame, textvariable=args["title"], state=tkinter.DISABLED - ).grid(row=0, column=1, sticky=tkinter.W) - tkinter.Label(metadata_frame, text="author", state=tkinter.DISABLED).grid( - row=1, column=0, sticky=tkinter.W - ) - tkinter.Entry( - metadata_frame, textvariable=args["author"], state=tkinter.DISABLED - ).grid(row=1, column=1, sticky=tkinter.W) - tkinter.Label(metadata_frame, text="creator", state=tkinter.DISABLED).grid( - row=2, column=0, sticky=tkinter.W - ) - tkinter.Entry( - metadata_frame, textvariable=args["creator"], state=tkinter.DISABLED - ).grid(row=2, column=1, sticky=tkinter.W) - tkinter.Label(metadata_frame, text="producer", state=tkinter.DISABLED).grid( - row=3, column=0, sticky=tkinter.W - ) - tkinter.Entry( - metadata_frame, textvariable=args["producer"], state=tkinter.DISABLED - ).grid(row=3, column=1, sticky=tkinter.W) - tkinter.Label(metadata_frame, text="creation date", state=tkinter.DISABLED).grid( - row=4, column=0, sticky=tkinter.W - ) - tkinter.Entry( - metadata_frame, textvariable=args["creationdate"], state=tkinter.DISABLED - ).grid(row=4, column=1, sticky=tkinter.W) - tkinter.Label( - metadata_frame, text="modification date", state=tkinter.DISABLED - ).grid(row=5, column=0, sticky=tkinter.W) - tkinter.Entry( - metadata_frame, textvariable=args["moddate"], state=tkinter.DISABLED - ).grid(row=5, column=1, sticky=tkinter.W) - tkinter.Label(metadata_frame, text="subject", state=tkinter.DISABLED).grid( - row=6, column=0, sticky=tkinter.W - ) - tkinter.Entry(metadata_frame, state=tkinter.DISABLED).grid( - row=6, column=1, sticky=tkinter.W - ) - tkinter.Label(metadata_frame, text="keywords", state=tkinter.DISABLED).grid( - row=7, column=0, sticky=tkinter.W - ) - tkinter.Entry(metadata_frame, state=tkinter.DISABLED).grid( - row=7, column=1, sticky=tkinter.W - ) - viewer_frame = tkinter.LabelFrame(frame1.interior, text="PDF viewer options") - viewer_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - tkinter.Label(viewer_frame, text="panes", state=tkinter.DISABLED).grid( - row=0, column=0, sticky=tkinter.W - ) - OptionMenu( - viewer_frame, - args["viewer_panes"], - *(["auto"] + [v.name for v in PageMode]), - state=tkinter.DISABLED, - ).grid(row=0, column=1, sticky=tkinter.W) - tkinter.Label(viewer_frame, text="initial page", state=tkinter.DISABLED).grid( - row=1, column=0, sticky=tkinter.W - ) - tkinter.Spinbox( - viewer_frame, - increment=1, - from_=1, - to=10000, - width=6, - textvariable=args["viewer_initial_page"], - state=tkinter.DISABLED, - name="viewer_initial_page_spinbox", - ).grid(row=1, column=1, sticky=tkinter.W) - tkinter.Label(viewer_frame, text="magnification", state=tkinter.DISABLED).grid( - row=2, column=0, sticky=tkinter.W - ) - OptionMenu( - viewer_frame, - args["viewer_magnification"], - *(["auto", "custom"] + [v.name for v in Magnification]), - state=tkinter.DISABLED, - ).grid(row=2, column=1, sticky=tkinter.W) - tkinter.Label(viewer_frame, text="page layout", state=tkinter.DISABLED).grid( - row=3, column=0, sticky=tkinter.W - ) - OptionMenu( - viewer_frame, - args["viewer_page_layout"], - *(["auto"] + [v.name for v in PageLayout]), - state=tkinter.DISABLED, - ).grid(row=3, column=1, sticky=tkinter.W) - tkinter.Checkbutton( - viewer_frame, - text="fit window to page size", - variable=args["viewer_fit_window"], - state=tkinter.DISABLED, - ).grid(row=4, column=0, columnspan=2, sticky=tkinter.W) - tkinter.Checkbutton( - viewer_frame, - text="center window", - variable=args["viewer_center_window"], - state=tkinter.DISABLED, - ).grid(row=5, column=0, columnspan=2, sticky=tkinter.W) - tkinter.Checkbutton( - viewer_frame, - text="open in fullscreen", - variable=args["viewer_fullscreen"], - state=tkinter.DISABLED, - ).grid(row=6, column=0, columnspan=2, sticky=tkinter.W) - - option_frame = tkinter.LabelFrame(frame1.interior, text="Program options") - option_frame.pack(side=tkinter.TOP, expand=tkinter.TRUE, fill=tkinter.X) - - tkinter.Label(option_frame, text="Unit:", state=tkinter.DISABLED).grid( - row=0, column=0, sticky=tkinter.W - ) - unit = tkinter.StringVar() - unit.set("mm") - OptionMenu(option_frame, unit, ["mm"], state=tkinter.DISABLED).grid( - row=0, column=1, sticky=tkinter.W - ) - - tkinter.Label(option_frame, text="Language:", state=tkinter.DISABLED).grid( - row=1, column=0, sticky=tkinter.W - ) - language = tkinter.StringVar() - language.set("English") - OptionMenu(option_frame, language, ["English"], state=tkinter.DISABLED).grid( - row=1, column=1, sticky=tkinter.W - ) - - bottom_frame = tkinter.Frame(frame_right) - bottom_frame.pack(fill=tkinter.X) - - tkinter.Button(bottom_frame, text="Save PDF", command=on_save_button).pack( - side=tkinter.LEFT, expand=tkinter.TRUE, fill=tkinter.X - ) - tkinter.Button(bottom_frame, text="Exit", command=root.destroy).pack( - side=tkinter.RIGHT, expand=tkinter.TRUE, fill=tkinter.X - ) - - app.mainloop() - - -def main(argv=sys.argv): - rendered_papersizes = "" - for k, v in sorted(papersizes.items()): - rendered_papersizes += " %-8s %s\n" % (papernames[k], v) - - parser = argparse.ArgumentParser( - formatter_class=argparse.RawDescriptionHelpFormatter, - description="""\ -Losslessly convert raster images to PDF without re-encoding PNG, JPEG, and -JPEG2000 images. This leads to a lossless conversion of PNG, JPEG and JPEG2000 -images with the only added file size coming from the PDF container itself. -Other raster graphics formats are losslessly stored using the same encoding -that PNG uses. -For images with transparency, the alpha channel will be stored as a separate -soft mask. This is lossless, too. - -The output is sent to standard output so that it can be redirected into a file -or to another program as part of a shell pipe. To directly write the output -into a file, use the -o or --output option. - -Options: -""", - epilog="""\ -Colorspace: - Currently, the colorspace must be forced for JPEG 2000 images that are not in - the RGB colorspace. Available colorspace options are based on Python Imaging - Library (PIL) short handles. - - RGB RGB color - L Grayscale - 1 Black and white (internally converted to grayscale) - CMYK CMYK color - CMYK;I CMYK color with inversion (for CMYK JPEG files from Adobe) - -Paper sizes: - You can specify the short hand paper size names shown in the first column in - the table below as arguments to the --pagesize and --imgsize options. The - width and height they are mapping to is shown in the second column. Giving - the value in the second column has the same effect as giving the short hand - in the first column. Appending ^T (a caret/circumflex followed by the letter - T) turns the paper size from portrait into landscape. The postfix thus - symbolizes the transpose. The values are case insensitive. - -%s - -Fit options: - The img2pdf options for the --fit argument are shown in the first column in - the table below. The function of these options can be mapped to the geometry - operators of imagemagick. For users who are familiar with imagemagick, the - corresponding operator is shown in the second column. The third column shows - whether or not the aspect ratio is preserved for that option (same as in - imagemagick). Just like imagemagick, img2pdf tries hard to preserve the - aspect ratio, so if the --fit argument is not given, then the default is - "into" which corresponds to the absence of any operator in imagemagick. - The value of the --fit option is case insensitive. - - into | | Y | The default. Width and height values specify maximum - | | | values. - ---------+---+---+---------------------------------------------------------- - fill | ^ | Y | Width and height values specify the minimum values. - ---------+---+---+---------------------------------------------------------- - exact | ! | N | Width and height emphatically given. - ---------+---+---+---------------------------------------------------------- - shrink | > | Y | Shrinks an image with dimensions larger than the given - | | | ones (and otherwise behaves like "into"). - ---------+---+---+---------------------------------------------------------- - enlarge | < | Y | Enlarges an image with dimensions smaller than the given - | | | ones (and otherwise behaves like "into"). - -Argument parsing: - Argument long options can be abbreviated to a prefix if the abbreviation is - unambiguous. That is, the prefix must match a unique option. - - Beware of your shell interpreting argument values as special characters (like - the semicolon in the CMYK;I colorspace option). If in doubt, put the argument - values in single quotes. - - If you want an argument value to start with one or more minus characters, you - must use the long option name and join them with an equal sign like so: - - $ img2pdf --author=--test-- - - If your input file name starts with one or more minus characters, either - separate the input files from the other arguments by two minus signs: - - $ img2pdf -- --my-file-starts-with-two-minuses.jpg - - Or be more explicit about its relative path by prepending a ./: - - $ img2pdf ./--my-file-starts-with-two-minuses.jpg - - The order of non-positional arguments (all arguments other than the input - images) does not matter. - -Examples: - Lines starting with a dollar sign denote commands you can enter into your - terminal. The dollar sign signifies your command prompt. It is not part of - the command you type. - - Convert two scans in JPEG format to a PDF document. - - $ img2pdf --output out.pdf page1.jpg page2.jpg - - Convert a directory of JPEG images into a PDF with printable A4 pages in - landscape mode. On each page, the photo takes the maximum amount of space - while preserving its aspect ratio and a print border of 2 cm on the top and - bottom and 2.5 cm on the left and right hand side. - - $ img2pdf --output out.pdf --pagesize A4^T --border 2cm:2.5cm *.jpg - - On each A4 page, fit images into a 10 cm times 15 cm rectangle but keep the - original image size if the image is smaller than that. - - $ img2pdf --output out.pdf -S A4 --imgsize 10cmx15cm --fit shrink *.jpg - - Prepare a directory of photos to be printed borderless on photo paper with a - 3:2 aspect ratio and rotate each page so that its orientation is the same as - the input image. - - $ img2pdf --output out.pdf --pagesize 15cmx10cm --auto-orient *.jpg - - Encode a grayscale JPEG2000 image. The colorspace has to be forced as img2pdf - cannot read it from the JPEG2000 file automatically. - - $ img2pdf --output out.pdf --colorspace L input.jp2 - -Written by Johannes Schauer Marin Rodrigues <josch@mister-muffin.de> - -Report bugs at https://gitlab.mister-muffin.de/josch/img2pdf/issues -""" - % rendered_papersizes, - ) - - parser.add_argument( - "images", - metavar="infile", - type=input_images, - nargs="*", - help="Specifies the input file(s) in any format that can be read by " - "the Python Imaging Library (PIL). If no input images are given, then " - 'a single image is read from standard input. The special filename "-" ' - "can be used once to read an image from standard input. To read a " - 'file in the current directory with the filename "-" (or with a ' - 'filename starting with "-"), pass it to img2pdf by explicitly ' - 'stating its relative path like "./-". Cannot be used together with ' - "--from-file.", - ) - parser.add_argument( - "-v", - "--verbose", - action="store_true", - help="Makes the program operate in verbose mode, printing messages on " - "standard error.", - ) - parser.add_argument( - "-V", - "--version", - action="version", - version="%(prog)s " + __version__, - help="Prints version information and exits.", - ) - parser.add_argument( - "--gui", dest="gui", action="store_true", help="run experimental tkinter gui" - ) - parser.add_argument( - "--from-file", - metavar="FILE", - type=from_file, - default=[], - help="Read the list of images from FILE instead of passing them as " - "positional arguments. If this option is used, then the list of " - "positional arguments must be empty. The paths to the input images " - 'in FILE are separated by NUL bytes. If FILE is "-" then the paths ' - "are expected on standard input. This option is useful if you want " - "to pass more images than the maximum command length of your shell " - "permits. This option can be used with commands like `find -print0`.", - ) - - outargs = parser.add_argument_group( - title="General output arguments", - description="Arguments controlling the output format.", - ) - - # In Python3 we have to output to sys.stdout.buffer because we write are - # bytes and not strings. In certain situations, like when the main - # function is wrapped by contextlib.redirect_stdout(), sys.stdout does not - # have the buffer attribute. Thus we write to sys.stdout by default and - # to sys.stdout.buffer if it exists. - outargs.add_argument( - "-o", - "--output", - metavar="out", - type=argparse.FileType("wb"), - default=sys.stdout.buffer if hasattr(sys.stdout, "buffer") else sys.stdout, - help="Makes the program output to a file instead of standard output.", - ) - outargs.add_argument( - "-C", - "--colorspace", - metavar="colorspace", - type=parse_colorspacearg, - help=""" -Forces the PIL colorspace. See the epilogue for a list of possible values. -Usually the PDF colorspace would be derived from the color space of the input -image. This option overwrites the automatically detected colorspace from the -input image and thus forces a certain colorspace in the output PDF /ColorSpace -property. This is useful for JPEG 2000 images with a different colorspace than -RGB.""", - ) - - outargs.add_argument( - "-D", - "--nodate", - action="store_true", - help="Suppresses timestamps in the output and thus makes the output " - "deterministic between individual runs. You can also manually " - "set a date using the --moddate and --creationdate options.", - ) - - outargs.add_argument( - "--engine", - metavar="engine", - type=parse_enginearg, - help="Choose PDF engine. Can be either internal, pikepdf or pdfrw. " - "The internal engine does not have additional requirements and writes " - "out a human readable PDF. The pikepdf engine requires the pikepdf " - "Python module and qpdf library, is most featureful, can " - 'linearize PDFs ("fast web view") and can compress more parts of it.' - "The pdfrw engine requires the pdfrw Python " - "module but does not support unicode metadata (See " - "https://github.com/pmaupin/pdfrw/issues/39) or palette data (See " - "https://github.com/pmaupin/pdfrw/issues/128).", - ) - - outargs.add_argument( - "--first-frame-only", - action="store_true", - help="By default, img2pdf will convert multi-frame images like " - "multi-page TIFF or animated GIF images to one page per frame. " - "This option will only let the first frame of every multi-frame " - "input image be converted into a page in the resulting PDF.", - ) - - outargs.add_argument( - "--pillow-limit-break", - action="store_true", - help="img2pdf uses the Python Imaging Library Pillow to read input " - "images. Pillow limits the maximum input image size to %d pixels " - "to prevent decompression bomb denial of service attacks. If " - "your input image contains more pixels than that, use this " - "option to disable this safety measure during this run of img2pdf" - % Image.MAX_IMAGE_PIXELS, - ) - - outargs.add_argument( - "--pdfa", - nargs="?", - const="/usr/share/color/icc/sRGB.icc", - default=None, - help="Output a PDF/A-1b compliant document. By default, this will " - "embed /usr/share/color/icc/sRGB.icc as the color profile.", - ) - - sizeargs = parser.add_argument_group( - title="Image and page size and layout arguments", - description="""\ -Every input image will be placed on its own page. The image size is controlled -by the dpi value of the input image or, if unset or missing, the default dpi of -%.2f. By default, each page will have the same size as the image it shows. -Thus, there will be no visible border between the image and the page border by -default. If image size and page size are made different from each other by the -options in this section, the image will always be centered in both dimensions. - -The image size and page size can be explicitly set using the --imgsize and ---pagesize options, respectively. If either dimension of the image size is -specified but the same dimension of the page size is not, then the latter will -be derived from the former using an optional minimal distance between the image -and the page border (given by the --border option) and/or a certain fitting -strategy (given by the --fit option). The converse happens if a dimension of -the page size is set but the same dimension of the image size is not. - -Any length value in below options is represented by the meta variable L which -is a floating point value with an optional unit appended (without a space -between them). The default unit is pt (1/72 inch, the PDF unit) and other -allowed units are cm (centimeter), mm (millimeter), and in (inch). - -Any size argument of the format LxL in the options below specifies the width -and height of a rectangle where the first L represents the width and the second -L represents the height with an optional unit following each value as described -above. Either width or height may be omitted. If the height is omitted, the -separating x can be omitted as well. Omitting the width requires to prefix the -height with the separating x. The missing dimension will be chosen so to not -change the image aspect ratio. Instead of giving the width and height -explicitly, you may also specify some (case-insensitive) common page sizes such -as letter and A4. See the epilogue at the bottom for a complete list of the -valid sizes. - -The --fit option scales to fit the image into a rectangle that is either -derived from the --imgsize option or otherwise from the --pagesize option. -If the --border option is given in addition to the --imgsize option while the ---pagesize option is not given, then the page size will be calculated from the -image size, respecting the border setting. If the --border option is given in -addition to the --pagesize option while the --imgsize option is not given, then -the image size will be calculated from the page size, respecting the border -setting. If the --border option is given while both the --pagesize and ---imgsize options are passed, then the --border option will be ignored. - -The --pagesize option or the --imgsize option with the --border option will -determine the MediaBox size of the resulting PDF document. -""" - % default_dpi, - ) - - sizeargs.add_argument( - "-S", - "--pagesize", - metavar="LxL", - type=parse_pagesize_rectarg, - help=""" -Sets the size of the PDF pages. The short-option is the upper case S because -it is an mnemonic for being bigger than the image size.""", - ) - - sizeargs.add_argument( - "-s", - "--imgsize", - metavar="LxL", - type=parse_imgsize_rectarg, - help=""" -Sets the size of the images on the PDF pages. In addition, the unit dpi is -allowed which will set the image size as a value of dots per inch. Instead of -a unit, width and height values may also have a percentage sign appended, -indicating a resize of the image by that percentage. The short-option is the -lower case s because it is an mnemonic for being smaller than the page size. -""", - ) - sizeargs.add_argument( - "-b", - "--border", - metavar="L[:L]", - type=parse_borderarg, - help=""" -Specifies the minimal distance between the image border and the PDF page -border. This value Is overwritten by explicit values set by --pagesize or ---imgsize. The value will be used when calculating page dimensions from the -image dimensions or the other way round. One, or two length values can be given -as an argument, separated by a colon. One value specifies the minimal border on -all four sides. Two values specify the minimal border on the top/bottom and -left/right, respectively. It is not possible to specify asymmetric borders -because images will always be centered on the page. -""", - ) - sizeargs.add_argument( - "-f", - "--fit", - metavar="FIT", - type=parse_fitarg, - default=FitMode.into, - help=""" - -If --imgsize is given, fits the image using these dimensions. Otherwise, fit -the image into the dimensions given by --pagesize. FIT is one of into, fill, -exact, shrink and enlarge. The default value is "into". See the epilogue at the -bottom for a description of the FIT options. - -""", - ) - sizeargs.add_argument( - "-a", - "--auto-orient", - action="store_true", - help=""" -If both dimensions of the page are given via --pagesize, conditionally swaps -these dimensions such that the page orientation is the same as the orientation -of the input image. If the orientation of a page gets flipped, then so do the -values set via the --border option. -""", - ) - sizeargs.add_argument( - "-r", - "--rotation", - "--orientation", - metavar="ROT", - type=parse_rotationarg, - default=Rotation.auto, - help=""" -Specifies how input images should be rotated. ROT can be one of auto, none, -ifvalid, 0, 90, 180 and 270. The default value is auto and indicates that input -images are rotated according to their EXIF Orientation tag. The values none and -0 ignore the EXIF Orientation values of the input images. The value ifvalid -acts like auto but ignores invalid EXIF rotation values and only issues a -warning instead of throwing an error. This is useful because many devices like -Android phones, Canon cameras or scanners emit an invalid Orientation tag value -of zero. The values 90, 180 and 270 perform a clockwise rotation of the image. - """, - ) - sizeargs.add_argument( - "--crop-border", - metavar="L[:L]", - type=parse_borderarg, - help=""" -Specifies the border between the CropBox and the MediaBox. One, or two length -values can be given as an argument, separated by a colon. One value specifies -the border on all four sides. Two values specify the border on the top/bottom -and left/right, respectively. It is not possible to specify asymmetric borders. -""", - ) - sizeargs.add_argument( - "--bleed-border", - metavar="L[:L]", - type=parse_borderarg, - help=""" -Specifies the border between the BleedBox and the MediaBox. One, or two length -values can be given as an argument, separated by a colon. One value specifies -the border on all four sides. Two values specify the border on the top/bottom -and left/right, respectively. It is not possible to specify asymmetric borders. -""", - ) - sizeargs.add_argument( - "--trim-border", - metavar="L[:L]", - type=parse_borderarg, - help=""" -Specifies the border between the TrimBox and the MediaBox. One, or two length -values can be given as an argument, separated by a colon. One value specifies -the border on all four sides. Two values specify the border on the top/bottom -and left/right, respectively. It is not possible to specify asymmetric borders. -""", - ) - sizeargs.add_argument( - "--art-border", - metavar="L[:L]", - type=parse_borderarg, - help=""" -Specifies the border between the ArtBox and the MediaBox. One, or two length -values can be given as an argument, separated by a colon. One value specifies -the border on all four sides. Two values specify the border on the top/bottom -and left/right, respectively. It is not possible to specify asymmetric borders. -""", - ) - - metaargs = parser.add_argument_group( - title="Arguments setting metadata", - description="Options handling embedded timestamps, title and author " - "information.", - ) - metaargs.add_argument( - "--title", metavar="title", type=str, help="Sets the title metadata value" - ) - metaargs.add_argument( - "--author", metavar="author", type=str, help="Sets the author metadata value" - ) - metaargs.add_argument( - "--creator", metavar="creator", type=str, help="Sets the creator metadata value" - ) - metaargs.add_argument( - "--producer", - metavar="producer", - type=str, - default="img2pdf " + __version__, - help="Sets the producer metadata value " - "(default is: img2pdf " + __version__ + ")", - ) - metaargs.add_argument( - "--creationdate", - metavar="creationdate", - type=valid_date, - help="Sets the UTC creation date metadata value in YYYY-MM-DD or " - "YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " - "understood by python dateutil module or any format understood " - "by `date --date`", - ) - metaargs.add_argument( - "--moddate", - metavar="moddate", - type=valid_date, - help="Sets the UTC modification date metadata value in YYYY-MM-DD " - "or YYYY-MM-DDTHH:MM or YYYY-MM-DDTHH:MM:SS format or any format " - "understood by python dateutil module or any format understood " - "by `date --date`", - ) - metaargs.add_argument( - "--subject", metavar="subject", type=str, help="Sets the subject metadata value" - ) - metaargs.add_argument( - "--keywords", - metavar="kw", - type=str, - nargs="+", - help="Sets the keywords metadata value (can be given multiple times)", - ) - - viewerargs = parser.add_argument_group( - title="PDF viewer arguments", - description="PDF files can specify how they are meant to be " - "presented to the user by a PDF viewer", - ) - - viewerargs.add_argument( - "--viewer-panes", - metavar="PANES", - type=parse_panes, - help="Instruct the PDF viewer which side panes to show. Valid values " - 'are "outlines" and "thumbs". It is not possible to specify both ' - "at the same time.", - ) - viewerargs.add_argument( - "--viewer-initial-page", - metavar="NUM", - type=int, - help="Instead of showing the first page, instruct the PDF viewer to " - "show the given page instead. Page numbers start with 1.", - ) - viewerargs.add_argument( - "--viewer-magnification", - metavar="MAG", - type=parse_magnification, - help="Instruct the PDF viewer to open the PDF with a certain zoom " - "level. Valid values are either a floating point number giving " - 'the exact zoom level, "fit" (zoom to fit whole page), "fith" ' - '(zoom to fit page width) and "fitbh" (zoom to fit visible page ' - "width).", - ) - viewerargs.add_argument( - "--viewer-page-layout", - metavar="LAYOUT", - type=parse_layout, - help="Instruct the PDF viewer how to arrange the pages on the screen. " - 'Valid values are "single" (display single pages), "onecolumn" ' - '(one continuous column), "twocolumnright" (two continuous ' - 'columns with odd number pages on the right) and "twocolumnleft" ' - "(two continuous columns with odd numbered pages on the left), " - '"twopageright" (two pages with odd numbered page on the right) ' - 'and "twopageleft" (two pages with odd numbered page on the left)', - ) - viewerargs.add_argument( - "--viewer-fit-window", - action="store_true", - help="Instruct the PDF viewer to resize the window to fit the page size", - ) - viewerargs.add_argument( - "--viewer-center-window", - action="store_true", - help="Instruct the PDF viewer to center the PDF viewer window", - ) - viewerargs.add_argument( - "--viewer-fullscreen", - action="store_true", - help="Instruct the PDF viewer to open the PDF in fullscreen mode", - ) - - args = parser.parse_args(argv[1:]) - - if args.verbose: - logging.basicConfig(level=logging.DEBUG) - - if args.pillow_limit_break: - Image.MAX_IMAGE_PIXELS = None - - if args.gui: - gui() - sys.exit(0) - - layout_fun = get_layout_fun( - args.pagesize, args.imgsize, args.border, args.fit, args.auto_orient - ) - - if len(args.images) > 0 and len(args.from_file) > 0: - logger.error( - "%s: error: cannot use --from-file with positional arguments" % parser.prog - ) - sys.exit(2) - elif len(args.images) == 0 and len(args.from_file) == 0: - # if no positional arguments were supplied, read a single image from - # standard input - logger.info("reading image from standard input") - try: - images = [sys.stdin.buffer.read()] - except KeyboardInterrupt: - sys.exit(0) - elif len(args.images) > 0 and len(args.from_file) == 0: - # On windows, each positional argument can expand into multiple paths - # because we do globbing ourselves. Here we flatten the list of lists - # again. - images = list(chain.from_iterable(args.images)) - elif len(args.images) == 0 and len(args.from_file) > 0: - images = args.from_file - - # with the number of pages being equal to the number of images, the - # value passed to --viewer-initial-page must be between 1 and that number - if args.viewer_initial_page is not None: - if args.viewer_initial_page < 1: - parser.print_usage(file=sys.stderr) - logger.error( - "%s: error: argument --viewer-initial-page: must be " - "greater than zero" % parser.prog - ) - sys.exit(2) - if args.viewer_initial_page > len(images): - parser.print_usage(file=sys.stderr) - logger.error( - "%s: error: argument --viewer-initial-page: must be " - "less than or equal to the total number of pages" % parser.prog - ) - sys.exit(2) - - try: - convert( - *images, - engine=args.engine, - title=args.title, - author=args.author, - creator=args.creator, - producer=args.producer, - creationdate=args.creationdate, - moddate=args.moddate, - subject=args.subject, - keywords=args.keywords, - colorspace=args.colorspace, - nodate=args.nodate, - layout_fun=layout_fun, - viewer_panes=args.viewer_panes, - viewer_initial_page=args.viewer_initial_page, - viewer_magnification=args.viewer_magnification, - viewer_page_layout=args.viewer_page_layout, - viewer_fit_window=args.viewer_fit_window, - viewer_center_window=args.viewer_center_window, - viewer_fullscreen=args.viewer_fullscreen, - outputstream=args.output, - first_frame_only=args.first_frame_only, - cropborder=args.crop_border, - bleedborder=args.bleed_border, - trimborder=args.trim_border, - artborder=args.art_border, - pdfa=args.pdfa, - rotation=args.rotation, - ) - except Exception as e: - logger.error("error: " + str(e)) - if logger.isEnabledFor(logging.DEBUG): - import traceback - - traceback.print_exc(file=sys.stderr) - sys.exit(1) - - -if __name__ == "__main__": - main() |