Spaces:
Running
Running
//======================================================================== | |
// | |
// pdfinfo.cc | |
// | |
// Copyright 1998-2003 Glyph & Cog, LLC | |
// Copyright 2013 Igalia S.L. | |
// | |
//======================================================================== | |
//======================================================================== | |
// | |
// Modified under the Poppler project - http://poppler.freedesktop.org | |
// | |
// All changes made under the Poppler project to this file are licensed | |
// under GPL version 2 or later | |
// | |
// Copyright (C) 2006 Dom Lachowicz <[email protected]> | |
// Copyright (C) 2007-2010, 2012, 2016-2022 Albert Astals Cid <[email protected]> | |
// Copyright (C) 2010 Hib Eris <[email protected]> | |
// Copyright (C) 2011 Vittal Aithal <[email protected]> | |
// Copyright (C) 2012, 2013, 2016-2018, 2021 Adrian Johnson <[email protected]> | |
// Copyright (C) 2012 Fabio D'Urso <[email protected]> | |
// Copyright (C) 2013 Adrian Perez de Castro <[email protected]> | |
// Copyright (C) 2013 Suzuki Toshiya <[email protected]> | |
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich | |
// Copyright (C) 2018 Adam Reichold <[email protected]> | |
// Copyright (C) 2018 Evangelos Rigas <[email protected]> | |
// Copyright (C) 2019 Christian Persch <[email protected]> | |
// Copyright (C) 2019-2021 Oliver Sander <[email protected]> | |
// Copyright (C) 2019 Thomas Fischer <[email protected]> | |
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <[email protected]> | |
// | |
// To see a description of the changes please see the Changelog file that | |
// came with your tarball or type make ChangeLog if you are building from git | |
// | |
//======================================================================== | |
static int firstPage = 1; | |
static int lastPage = 0; | |
static bool printBoxes = false; | |
static bool printMetadata = false; | |
static bool printCustom = false; | |
static bool printJS = false; | |
static bool isoDates = false; | |
static bool rawDates = false; | |
static char textEncName[128] = ""; | |
static char ownerPassword[33] = "\001"; | |
static char userPassword[33] = "\001"; | |
static bool printVersion = false; | |
static bool printHelp = false; | |
static bool printEnc = false; | |
static bool printStructure = false; | |
static bool printStructureText = false; | |
static bool printDests = false; | |
static bool printUrls = false; | |
static const ArgDesc argDesc[] = { { "-f", argInt, &firstPage, 0, "first page to convert" }, | |
{ "-l", argInt, &lastPage, 0, "last page to convert" }, | |
{ "-box", argFlag, &printBoxes, 0, "print the page bounding boxes" }, | |
{ "-meta", argFlag, &printMetadata, 0, "print the document metadata (XML)" }, | |
{ "-custom", argFlag, &printCustom, 0, "print both custom and standard metadata" }, | |
{ "-js", argFlag, &printJS, 0, "print all JavaScript in the PDF" }, | |
{ "-struct", argFlag, &printStructure, 0, "print the logical document structure (for tagged files)" }, | |
{ "-struct-text", argFlag, &printStructureText, 0, "print text contents along with document structure (for tagged files)" }, | |
{ "-isodates", argFlag, &isoDates, 0, "print the dates in ISO-8601 format" }, | |
{ "-rawdates", argFlag, &rawDates, 0, "print the undecoded date strings directly from the PDF file" }, | |
{ "-dests", argFlag, &printDests, 0, "print all named destinations in the PDF" }, | |
{ "-url", argFlag, &printUrls, 0, "print all URLs inside PDF objects (does not scan text content)" }, | |
{ "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" }, | |
{ "-listenc", argFlag, &printEnc, 0, "list available encodings" }, | |
{ "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" }, | |
{ "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" }, | |
{ "-v", argFlag, &printVersion, 0, "print copyright and version info" }, | |
{ "-h", argFlag, &printHelp, 0, "print usage information" }, | |
{ "-help", argFlag, &printHelp, 0, "print usage information" }, | |
{ "--help", argFlag, &printHelp, 0, "print usage information" }, | |
{ "-?", argFlag, &printHelp, 0, "print usage information" }, | |
{} }; | |
static void printTextString(const GooString *s, const UnicodeMap *uMap) | |
{ | |
char buf[8]; | |
std::vector<Unicode> u = TextStringToUCS4(s->toStr()); | |
for (const auto &c : u) { | |
int n = uMap->mapUnicode(c, buf, sizeof(buf)); | |
fwrite(buf, 1, n, stdout); | |
} | |
} | |
static void printUCS4String(const Unicode *u, int len, const UnicodeMap *uMap) | |
{ | |
char buf[8]; | |
for (int i = 0; i < len; i++) { | |
int n = uMap->mapUnicode(u[i], buf, sizeof(buf)); | |
fwrite(buf, 1, n, stdout); | |
} | |
} | |
static void printInfoString(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) | |
{ | |
const GooString *s1; | |
Object obj = infoDict->lookup(key); | |
if (obj.isString()) { | |
fputs(text, stdout); | |
s1 = obj.getString(); | |
printTextString(s1, uMap); | |
fputc('\n', stdout); | |
} | |
} | |
static void printInfoDate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) | |
{ | |
int year, mon, day, hour, min, sec, tz_hour, tz_minute; | |
char tz; | |
struct tm tmStruct; | |
time_t time; | |
char buf[256]; | |
Object obj = infoDict->lookup(key); | |
if (obj.isString()) { | |
fputs(text, stdout); | |
const GooString *s = obj.getString(); | |
// TODO do something with the timezone info | |
if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) { | |
tmStruct.tm_year = year - 1900; | |
tmStruct.tm_mon = mon - 1; | |
tmStruct.tm_mday = day; | |
tmStruct.tm_hour = hour; | |
tmStruct.tm_min = min; | |
tmStruct.tm_sec = sec; | |
tmStruct.tm_wday = -1; | |
tmStruct.tm_yday = -1; | |
tmStruct.tm_isdst = -1; | |
// compute the tm_wday and tm_yday fields | |
time = timegm(&tmStruct); | |
if (time != (time_t)-1) { | |
int offset = (tz_hour * 60 + tz_minute) * 60; | |
if (tz == '-') { | |
offset *= -1; | |
} | |
time -= offset; | |
localtime_r(&time, &tmStruct); | |
strftime(buf, sizeof(buf), "%c %Z", &tmStruct); | |
fputs(buf, stdout); | |
} else { | |
printTextString(s, uMap); | |
} | |
} else { | |
printTextString(s, uMap); | |
} | |
fputc('\n', stdout); | |
} | |
} | |
static void printISODate(Dict *infoDict, const char *key, const char *text, const UnicodeMap *uMap) | |
{ | |
int year, mon, day, hour, min, sec, tz_hour, tz_minute; | |
char tz; | |
Object obj = infoDict->lookup(key); | |
if (obj.isString()) { | |
fputs(text, stdout); | |
const GooString *s = obj.getString(); | |
if (parseDateString(s, &year, &mon, &day, &hour, &min, &sec, &tz, &tz_hour, &tz_minute)) { | |
fprintf(stdout, "%04d-%02d-%02dT%02d:%02d:%02d", year, mon, day, hour, min, sec); | |
if (tz_hour == 0 && tz_minute == 0) { | |
fprintf(stdout, "Z"); | |
} else { | |
fprintf(stdout, "%c%02d", tz, tz_hour); | |
if (tz_minute) { | |
fprintf(stdout, ":%02d", tz_minute); | |
} | |
} | |
} else { | |
printTextString(obj.getString(), uMap); | |
} | |
fputc('\n', stdout); | |
} | |
} | |
static void printBox(const char *text, const PDFRectangle *box) | |
{ | |
printf("%s%8.2f %8.2f %8.2f %8.2f\n", text, box->x1, box->y1, box->x2, box->y2); | |
} | |
static void printIndent(unsigned indent) | |
{ | |
while (indent--) { | |
putchar(' '); | |
putchar(' '); | |
} | |
} | |
static void printAttribute(const Attribute *attribute, unsigned indent) | |
{ | |
printIndent(indent); | |
printf(" /%s ", attribute->getTypeName()); | |
if (attribute->getType() == Attribute::UserProperty) { | |
std::unique_ptr<GooString> name = attribute->getName(); | |
printf("(%s) ", name->c_str()); | |
} | |
attribute->getValue()->print(stdout); | |
if (attribute->getFormattedValue()) { | |
printf(" \"%s\"", attribute->getFormattedValue()); | |
} | |
if (attribute->isHidden()) { | |
printf(" [hidden]"); | |
} | |
} | |
static void printStruct(const StructElement *element, unsigned indent) | |
{ | |
if (element->isObjectRef()) { | |
printIndent(indent); | |
printf("Object %i %i\n", element->getObjectRef().num, element->getObjectRef().gen); | |
return; | |
} | |
if (printStructureText && element->isContent()) { | |
GooString *text = element->getText(false); | |
printIndent(indent); | |
if (text) { | |
printf("\"%s\"\n", text->c_str()); | |
} else { | |
printf("(No content?)\n"); | |
} | |
delete text; | |
} | |
if (!element->isContent()) { | |
printIndent(indent); | |
printf("%s", element->getTypeName()); | |
if (element->getID()) { | |
printf(" <%s>", element->getID()->c_str()); | |
} | |
if (element->getTitle()) { | |
printf(" \"%s\"", element->getTitle()->c_str()); | |
} | |
if (element->getRevision() > 0) { | |
printf(" r%u", element->getRevision()); | |
} | |
if (element->isInline() || element->isBlock()) { | |
printf(" (%s)", element->isInline() ? "inline" : "block"); | |
} | |
if (element->getNumAttributes()) { | |
putchar(':'); | |
for (unsigned i = 0; i < element->getNumAttributes(); i++) { | |
putchar('\n'); | |
printAttribute(element->getAttribute(i), indent + 1); | |
} | |
} | |
putchar('\n'); | |
for (unsigned i = 0; i < element->getNumChildren(); i++) { | |
printStruct(element->getChild(i), indent + 1); | |
} | |
} | |
} | |
struct GooStringCompare | |
{ | |
bool operator()(GooString *lhs, GooString *rhs) const { return lhs->cmp(const_cast<GooString *>(rhs)) < 0; } | |
}; | |
static void printLinkDest(const std::unique_ptr<LinkDest> &dest) | |
{ | |
GooString s; | |
switch (dest->getKind()) { | |
case destXYZ: | |
s.append("[ XYZ "); | |
if (dest->getChangeLeft()) { | |
s.appendf("{0:4.0g} ", dest->getLeft()); | |
} else { | |
s.append("null "); | |
} | |
if (dest->getChangeTop()) { | |
s.appendf("{0:4.0g} ", dest->getTop()); | |
} else { | |
s.append("null "); | |
} | |
if (dest->getChangeZoom()) { | |
s.appendf("{0:4.2f} ", dest->getZoom()); | |
} else { | |
s.append("null "); | |
} | |
break; | |
case destFit: | |
s.append("[ Fit "); | |
break; | |
case destFitH: | |
if (dest->getChangeTop()) { | |
s.appendf("[ FitH {0:4.0g} ", dest->getTop()); | |
} else { | |
s.append("[ FitH null "); | |
} | |
break; | |
case destFitV: | |
if (dest->getChangeLeft()) { | |
s.appendf("[ FitV {0:4.0g} ", dest->getLeft()); | |
} else { | |
s.append("[ FitV null "); | |
} | |
break; | |
case destFitR: | |
s.appendf("[ FitR {0:4.0g} {1:4.0g} {2:4.0g} {3:4.0g} ", dest->getLeft(), dest->getBottom(), dest->getRight(), dest->getTop()); | |
break; | |
case destFitB: | |
s.append("[ FitB "); | |
break; | |
case destFitBH: | |
if (dest->getChangeTop()) { | |
s.appendf("[ FitBH {0:4.0g} ", dest->getTop()); | |
} else { | |
s.append("[ FitBH null "); | |
} | |
break; | |
case destFitBV: | |
if (dest->getChangeLeft()) { | |
s.appendf("[ FitBV {0:4.0g} ", dest->getLeft()); | |
} else { | |
s.append("[ FitBV null "); | |
} | |
break; | |
} | |
s.append(" "); | |
s.setChar(26, ']'); | |
s.setChar(27, '\0'); | |
printf("%s", s.c_str()); | |
} | |
static void printDestinations(PDFDoc *doc, const UnicodeMap *uMap) | |
{ | |
std::map<Ref, std::map<GooString *, std::unique_ptr<LinkDest>, GooStringCompare>> map; | |
int numDests = doc->getCatalog()->numDestNameTree(); | |
for (int i = 0; i < numDests; i++) { | |
GooString *name = new GooString(doc->getCatalog()->getDestNameTreeName(i)); | |
std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestNameTreeDest(i); | |
if (dest && dest->isPageRef()) { | |
Ref pageRef = dest->getPageRef(); | |
map[pageRef].insert(std::make_pair(name, std::move(dest))); | |
} else { | |
delete name; | |
} | |
} | |
numDests = doc->getCatalog()->numDests(); | |
for (int i = 0; i < numDests; i++) { | |
GooString *name = new GooString(doc->getCatalog()->getDestsName(i)); | |
std::unique_ptr<LinkDest> dest = doc->getCatalog()->getDestsDest(i); | |
if (dest && dest->isPageRef()) { | |
Ref pageRef = dest->getPageRef(); | |
map[pageRef].insert(std::make_pair(name, std::move(dest))); | |
} else { | |
delete name; | |
} | |
} | |
printf("Page Destination Name\n"); | |
for (int i = firstPage; i <= lastPage; i++) { | |
Ref *ref = doc->getCatalog()->getPageRef(i); | |
if (ref) { | |
auto pageDests = map.find(*ref); | |
if (pageDests != map.end()) { | |
for (auto &it : pageDests->second) { | |
printf("%4d ", i); | |
printLinkDest(it.second); | |
printf(" \""); | |
printTextString(it.first, uMap); | |
printf("\"\n"); | |
delete it.first; | |
} | |
} | |
} | |
} | |
} | |
static void printUrlList(PDFDoc *doc) | |
{ | |
printf("Page Type URL\n"); | |
for (int pg = firstPage; pg <= lastPage; pg++) { | |
Page *page = doc->getPage(pg); | |
if (page) { | |
std::unique_ptr<Links> links = page->getLinks(); | |
for (AnnotLink *annot : links->getLinks()) { | |
LinkAction *action = annot->getAction(); | |
if (action->getKind() == actionURI) { | |
LinkURI *linkUri = dynamic_cast<LinkURI *>(action); | |
std::string uri = linkUri->getURI(); | |
printf("%4d Annotation %s\n", pg, uri.c_str()); | |
} | |
} | |
} | |
} | |
} | |
static void printPdfSubtype(PDFDoc *doc, const UnicodeMap *uMap) | |
{ | |
const Object info = doc->getDocInfo(); | |
if (info.isDict()) { | |
const PDFSubtype pdftype = doc->getPDFSubtype(); | |
if ((pdftype == subtypeNull) | (pdftype == subtypeNone)) { | |
return; | |
} | |
std::unique_ptr<GooString> part; | |
std::unique_ptr<GooString> abbr; | |
std::unique_ptr<GooString> standard; | |
std::unique_ptr<GooString> typeExp; | |
std::unique_ptr<GooString> confExp; | |
// Form title from PDFSubtype | |
switch (pdftype) { | |
case subtypePDFA: | |
printInfoString(info.getDict(), "GTS_PDFA1Version", "PDF subtype: ", uMap); | |
typeExp = std::make_unique<GooString>("ISO 19005 - Electronic document file format for long-term preservation (PDF/A)"); | |
standard = std::make_unique<GooString>("ISO 19005"); | |
abbr = std::make_unique<GooString>("PDF/A"); | |
break; | |
case subtypePDFE: | |
printInfoString(info.getDict(), "GTS_PDFEVersion", "PDF subtype: ", uMap); | |
typeExp = std::make_unique<GooString>("ISO 24517 - Engineering document format using PDF (PDF/E)"); | |
standard = std::make_unique<GooString>("ISO 24517"); | |
abbr = std::make_unique<GooString>("PDF/E"); | |
break; | |
case subtypePDFUA: | |
printInfoString(info.getDict(), "GTS_PDFUAVersion", "PDF subtype: ", uMap); | |
typeExp = std::make_unique<GooString>("ISO 14289 - Electronic document file format enhancement for accessibility (PDF/UA)"); | |
standard = std::make_unique<GooString>("ISO 14289"); | |
abbr = std::make_unique<GooString>("PDF/UA"); | |
break; | |
case subtypePDFVT: | |
printInfoString(info.getDict(), "GTS_PDFVTVersion", "PDF subtype: ", uMap); | |
typeExp = std::make_unique<GooString>("ISO 16612 - Electronic document file format for variable data exchange (PDF/VT)"); | |
standard = std::make_unique<GooString>("ISO 16612"); | |
abbr = std::make_unique<GooString>("PDF/VT"); | |
break; | |
case subtypePDFX: | |
printInfoString(info.getDict(), "GTS_PDFXVersion", "PDF subtype: ", uMap); | |
typeExp = std::make_unique<GooString>("ISO 15930 - Electronic document file format for prepress digital data exchange (PDF/X)"); | |
standard = std::make_unique<GooString>("ISO 15930"); | |
abbr = std::make_unique<GooString>("PDF/X"); | |
break; | |
case subtypeNone: | |
case subtypeNull: | |
default: | |
return; | |
} | |
// Form the abbreviation from PDFSubtypePart and PDFSubtype | |
const PDFSubtypePart subpart = doc->getPDFSubtypePart(); | |
switch (pdftype) { | |
case subtypePDFX: | |
switch (subpart) { | |
case subtypePart1: | |
abbr->append("-1:2001"); | |
break; | |
case subtypePart2: | |
abbr->append("-2"); | |
break; | |
case subtypePart3: | |
abbr->append("-3:2002"); | |
break; | |
case subtypePart4: | |
abbr->append("-1:2003"); | |
break; | |
case subtypePart5: | |
abbr->append("-2"); | |
break; | |
case subtypePart6: | |
abbr->append("-3:2003"); | |
break; | |
case subtypePart7: | |
abbr->append("-4"); | |
break; | |
case subtypePart8: | |
abbr->append("-5"); | |
break; | |
default: | |
break; | |
} | |
break; | |
case subtypeNone: | |
case subtypeNull: | |
break; | |
default: | |
abbr->appendf("-{0:d}", subpart); | |
break; | |
} | |
// Form standard from PDFSubtypePart | |
switch (subpart) { | |
case subtypePartNone: | |
case subtypePartNull: | |
break; | |
default: | |
standard->appendf("-{0:d}", subpart); | |
break; | |
} | |
// Form the subtitle from PDFSubtypePart and PDFSubtype | |
switch (pdftype) { | |
case subtypePDFA: | |
switch (subpart) { | |
case subtypePart1: | |
part = std::make_unique<GooString>("Use of PDF 1.4"); | |
break; | |
case subtypePart2: | |
part = std::make_unique<GooString>("Use of ISO 32000-1"); | |
break; | |
case subtypePart3: | |
part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files"); | |
break; | |
default: | |
break; | |
} | |
break; | |
case subtypePDFE: | |
switch (subpart) { | |
case subtypePart1: | |
part = std::make_unique<GooString>("Use of PDF 1.6"); | |
break; | |
default: | |
break; | |
} | |
break; | |
case subtypePDFUA: | |
switch (subpart) { | |
case subtypePart1: | |
part = std::make_unique<GooString>("Use of ISO 32000-1"); | |
break; | |
case subtypePart2: | |
part = std::make_unique<GooString>("Use of ISO 32000-2"); | |
break; | |
case subtypePart3: | |
part = std::make_unique<GooString>("Use of ISO 32000-1 with support for embedded files"); | |
break; | |
default: | |
break; | |
} | |
break; | |
case subtypePDFVT: | |
switch (subpart) { | |
case subtypePart1: | |
part = std::make_unique<GooString>("Using PPML 2.1 and PDF 1.4"); | |
break; | |
case subtypePart2: | |
part = std::make_unique<GooString>("Using PDF/X-4 and PDF/X-5 (PDF/VT-1 and PDF/VT-2)"); | |
break; | |
case subtypePart3: | |
part = std::make_unique<GooString>("Using PDF/X-6 (PDF/VT-3)"); | |
break; | |
default: | |
break; | |
} | |
break; | |
case subtypePDFX: | |
switch (subpart) { | |
case subtypePart1: | |
part = std::make_unique<GooString>("Complete exchange using CMYK data (PDF/X-1 and PDF/X-1a)"); | |
break; | |
case subtypePart3: | |
part = std::make_unique<GooString>("Complete exchange suitable for colour-managed workflows (PDF/X-3)"); | |
break; | |
case subtypePart4: | |
part = std::make_unique<GooString>("Complete exchange of CMYK and spot colour printing data using PDF 1.4 (PDF/X-1a)"); | |
break; | |
case subtypePart5: | |
part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.4 (PDF/X-2) [Withdrawn]"); | |
break; | |
case subtypePart6: | |
part = std::make_unique<GooString>("Complete exchange of printing data suitable for colour-managed workflows using PDF 1.4 (PDF/X-3)"); | |
break; | |
case subtypePart7: | |
part = std::make_unique<GooString>("Complete exchange of printing data (PDF/X-4) and partial exchange of printing data with external profile reference (PDF/X-4p) using PDF 1.6"); | |
break; | |
case subtypePart8: | |
part = std::make_unique<GooString>("Partial exchange of printing data using PDF 1.6 (PDF/X-5)"); | |
break; | |
default: | |
break; | |
} | |
break; | |
default: | |
break; | |
} | |
// Form Conformance explanation from PDFSubtypeConformance | |
switch (doc->getPDFSubtypeConformance()) { | |
case subtypeConfA: | |
confExp = std::make_unique<GooString>("Level A, Accessible"); | |
break; | |
case subtypeConfB: | |
confExp = std::make_unique<GooString>("Level B, Basic"); | |
break; | |
case subtypeConfG: | |
confExp = std::make_unique<GooString>("Level G, External graphical content"); | |
break; | |
case subtypeConfN: | |
confExp = std::make_unique<GooString>("Level N, External ICC profile"); | |
break; | |
case subtypeConfP: | |
confExp = std::make_unique<GooString>("Level P, Embedded ICC profile"); | |
break; | |
case subtypeConfPG: | |
confExp = std::make_unique<GooString>("Level PG, Embedded ICC profile and external graphical content"); | |
break; | |
case subtypeConfU: | |
confExp = std::make_unique<GooString>("Level U, Unicode support"); | |
break; | |
case subtypeConfNone: | |
case subtypeConfNull: | |
default: | |
confExp.reset(); | |
break; | |
} | |
printf(" Title: %s\n", typeExp->c_str()); | |
printf(" Abbreviation: %s\n", abbr->c_str()); | |
if (part.get()) { | |
printf(" Subtitle: Part %d: %s\n", subpart, part->c_str()); | |
} else { | |
printf(" Subtitle: Part %d\n", subpart); | |
} | |
printf(" Standard: %s-%d\n", typeExp->toStr().substr(0, 9).c_str(), subpart); | |
if (confExp.get()) { | |
printf(" Conformance: %s\n", confExp->c_str()); | |
} | |
} | |
} | |
static void printCustomInfo(PDFDoc *doc, const UnicodeMap *uMap) | |
{ | |
Object info = doc->getDocInfo(); | |
if (info.isDict()) { | |
Dict *dict = info.getDict(); | |
// Sort keys | |
std::set<std::string> keys; | |
for (int i = 0; i < dict->getLength(); i++) { | |
std::string key(dict->getKey(i)); | |
if (key != "Trapped") { | |
keys.insert(key); | |
} | |
} | |
for (const std::string &key : keys) { | |
if (key == "CreationDate") { | |
if (isoDates) { | |
printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
} else if (rawDates) { | |
printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
} else { | |
printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
} | |
} else if (key == "ModDate") { | |
if (isoDates) { | |
printISODate(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} else if (rawDates) { | |
printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} else { | |
printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} | |
} else { | |
Object obj = dict->lookup(key.c_str()); | |
if (obj.isString()) { | |
// print key | |
Unicode *u; | |
int len = utf8ToUCS4(key.c_str(), &u); | |
printUCS4String(u, len, uMap); | |
fputs(":", stdout); | |
while (len < 16) { | |
fputs(" ", stdout); | |
len++; | |
} | |
gfree(u); | |
// print value | |
GooString val_str(obj.getString()); | |
printTextString(&val_str, uMap); | |
fputc('\n', stdout); | |
} | |
} | |
} | |
} | |
} | |
static void printInfo(PDFDoc *doc, const UnicodeMap *uMap, long long filesize, bool multiPage) | |
{ | |
Page *page; | |
char buf[256]; | |
double w, h, wISO, hISO, isoThreshold; | |
int pg, i; | |
int r; | |
// print doc info | |
Object info = doc->getDocInfo(); | |
if (info.isDict()) { | |
printInfoString(info.getDict(), "Title", "Title: ", uMap); | |
printInfoString(info.getDict(), "Subject", "Subject: ", uMap); | |
printInfoString(info.getDict(), "Keywords", "Keywords: ", uMap); | |
printInfoString(info.getDict(), "Author", "Author: ", uMap); | |
printInfoString(info.getDict(), "Creator", "Creator: ", uMap); | |
printInfoString(info.getDict(), "Producer", "Producer: ", uMap); | |
if (isoDates) { | |
printISODate(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
printISODate(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} else if (rawDates) { | |
printInfoString(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
printInfoString(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} else { | |
printInfoDate(info.getDict(), "CreationDate", "CreationDate: ", uMap); | |
printInfoDate(info.getDict(), "ModDate", "ModDate: ", uMap); | |
} | |
} | |
bool hasMetadata = false; | |
std::unique_ptr<GooString> metadata = doc->readMetadata(); | |
if (metadata) { | |
hasMetadata = true; | |
} | |
const std::set<std::string> docInfoStandardKeys { "Title", "Author", "Subject", "Keywords", "Creator", "Producer", "CreationDate", "ModDate", "Trapped" }; | |
bool hasCustom = false; | |
if (info.isDict()) { | |
Dict *dict = info.getDict(); | |
for (i = 0; i < dict->getLength(); i++) { | |
std::string key(dict->getKey(i)); | |
if (docInfoStandardKeys.find(key) == docInfoStandardKeys.end()) { | |
hasCustom = true; | |
break; | |
} | |
} | |
} | |
// print metadata info | |
printf("Custom Metadata: %s\n", hasCustom ? "yes" : "no"); | |
printf("Metadata Stream: %s\n", hasMetadata ? "yes" : "no"); | |
// print tagging info | |
printf("Tagged: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoMarked) ? "yes" : "no"); | |
printf("UserProperties: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoUserProperties) ? "yes" : "no"); | |
printf("Suspects: %s\n", (doc->getCatalog()->getMarkInfo() & Catalog::markInfoSuspects) ? "yes" : "no"); | |
// print form info | |
switch (doc->getCatalog()->getFormType()) { | |
case Catalog::NoForm: | |
printf("Form: none\n"); | |
break; | |
case Catalog::AcroForm: | |
printf("Form: AcroForm\n"); | |
break; | |
case Catalog::XfaForm: | |
printf("Form: XFA\n"); | |
break; | |
} | |
// print javascript info | |
{ | |
JSInfo jsInfo(doc, firstPage - 1); | |
jsInfo.scanJS(lastPage - firstPage + 1); | |
printf("JavaScript: %s\n", jsInfo.containsJS() ? "yes" : "no"); | |
} | |
// print page count | |
printf("Pages: %d\n", doc->getNumPages()); | |
// print encryption info | |
printf("Encrypted: "); | |
if (doc->isEncrypted()) { | |
unsigned char *fileKey; | |
CryptAlgorithm encAlgorithm; | |
int keyLength; | |
doc->getXRef()->getEncryptionParameters(&fileKey, &encAlgorithm, &keyLength); | |
const char *encAlgorithmName = "unknown"; | |
switch (encAlgorithm) { | |
case cryptRC4: | |
encAlgorithmName = "RC4"; | |
break; | |
case cryptAES: | |
encAlgorithmName = "AES"; | |
break; | |
case cryptAES256: | |
encAlgorithmName = "AES-256"; | |
break; | |
case cryptNone: | |
break; | |
} | |
printf("yes (print:%s copy:%s change:%s addNotes:%s algorithm:%s)\n", doc->okToPrint(true) ? "yes" : "no", doc->okToCopy(true) ? "yes" : "no", doc->okToChange(true) ? "yes" : "no", doc->okToAddNotes(true) ? "yes" : "no", | |
encAlgorithmName); | |
} else { | |
printf("no\n"); | |
} | |
// print page size | |
for (pg = firstPage; pg <= lastPage; ++pg) { | |
w = doc->getPageCropWidth(pg); | |
h = doc->getPageCropHeight(pg); | |
if (multiPage) { | |
printf("Page %4d size: %g x %g pts", pg, w, h); | |
} else { | |
printf("Page size: %g x %g pts", w, h); | |
} | |
if ((fabs(w - 612) < 1 && fabs(h - 792) < 1) || (fabs(w - 792) < 1 && fabs(h - 612) < 1)) { | |
printf(" (letter)"); | |
} else { | |
hISO = sqrt(sqrt(2.0)) * 7200 / 2.54; | |
wISO = hISO / sqrt(2.0); | |
isoThreshold = hISO * 0.003; ///< allow for 0.3% error when guessing conformance to ISO 216, A series | |
for (i = 0; i <= 6; ++i) { | |
if ((fabs(w - wISO) < isoThreshold && fabs(h - hISO) < isoThreshold) || (fabs(w - hISO) < isoThreshold && fabs(h - wISO) < isoThreshold)) { | |
printf(" (A%d)", i); | |
break; | |
} | |
hISO = wISO; | |
wISO /= sqrt(2.0); | |
isoThreshold /= sqrt(2.0); | |
} | |
} | |
printf("\n"); | |
r = doc->getPageRotate(pg); | |
if (multiPage) { | |
printf("Page %4d rot: %d\n", pg, r); | |
} else { | |
printf("Page rot: %d\n", r); | |
} | |
} | |
// print the boxes | |
if (printBoxes) { | |
if (multiPage) { | |
for (pg = firstPage; pg <= lastPage; ++pg) { | |
page = doc->getPage(pg); | |
if (!page) { | |
error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", pg); | |
continue; | |
} | |
sprintf(buf, "Page %4d MediaBox: ", pg); | |
printBox(buf, page->getMediaBox()); | |
sprintf(buf, "Page %4d CropBox: ", pg); | |
printBox(buf, page->getCropBox()); | |
sprintf(buf, "Page %4d BleedBox: ", pg); | |
printBox(buf, page->getBleedBox()); | |
sprintf(buf, "Page %4d TrimBox: ", pg); | |
printBox(buf, page->getTrimBox()); | |
sprintf(buf, "Page %4d ArtBox: ", pg); | |
printBox(buf, page->getArtBox()); | |
} | |
} else { | |
page = doc->getPage(firstPage); | |
if (!page) { | |
error(errSyntaxError, -1, "Failed to print boxes for page {0:d}", firstPage); | |
} else { | |
printBox("MediaBox: ", page->getMediaBox()); | |
printBox("CropBox: ", page->getCropBox()); | |
printBox("BleedBox: ", page->getBleedBox()); | |
printBox("TrimBox: ", page->getTrimBox()); | |
printBox("ArtBox: ", page->getArtBox()); | |
} | |
} | |
} | |
// print file size | |
printf("File size: %lld bytes\n", filesize); | |
// print linearization info | |
printf("Optimized: %s\n", doc->isLinearized() ? "yes" : "no"); | |
// print PDF version | |
printf("PDF version: %d.%d\n", doc->getPDFMajorVersion(), doc->getPDFMinorVersion()); | |
printPdfSubtype(doc, uMap); | |
} | |
int main(int argc, char *argv[]) | |
{ | |
std::unique_ptr<PDFDoc> doc; | |
GooString *fileName; | |
std::optional<GooString> ownerPW, userPW; | |
const UnicodeMap *uMap; | |
FILE *f; | |
bool ok; | |
int exitCode; | |
bool multiPage; | |
exitCode = 99; | |
// parse args | |
Win32Console win32console(&argc, &argv); | |
ok = parseArgs(argDesc, &argc, argv); | |
if (!ok || (argc != 2 && !printEnc) || printVersion || printHelp) { | |
fprintf(stderr, "pdfinfo version %s\n", PACKAGE_VERSION); | |
fprintf(stderr, "%s\n", popplerCopyright); | |
fprintf(stderr, "%s\n", xpdfCopyright); | |
if (!printVersion) { | |
printUsage("pdfinfo", "<PDF-file>", argDesc); | |
} | |
if (printVersion || printHelp) { | |
exitCode = 0; | |
} | |
goto err0; | |
} | |
if (printStructureText) { | |
printStructure = true; | |
} | |
// read config file | |
globalParams = std::make_unique<GlobalParams>(); | |
if (printEnc) { | |
printEncodings(); | |
exitCode = 0; | |
goto err0; | |
} | |
fileName = new GooString(argv[1]); | |
if (textEncName[0]) { | |
globalParams->setTextEncoding(textEncName); | |
} | |
// get mapping to output encoding | |
if (!(uMap = globalParams->getTextEncoding())) { | |
error(errCommandLine, -1, "Couldn't get text encoding"); | |
delete fileName; | |
goto err1; | |
} | |
// open PDF file | |
if (ownerPassword[0] != '\001') { | |
ownerPW = GooString(ownerPassword); | |
} | |
if (userPassword[0] != '\001') { | |
userPW = GooString(userPassword); | |
} | |
if (fileName->cmp("-") == 0) { | |
delete fileName; | |
fileName = new GooString("fd://0"); | |
} | |
doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); | |
if (!doc->isOk()) { | |
exitCode = 1; | |
goto err2; | |
} | |
// get page range | |
if (firstPage < 1) { | |
firstPage = 1; | |
} | |
if (lastPage == 0) { | |
multiPage = false; | |
} else { | |
multiPage = true; | |
} | |
if (lastPage < 1 || lastPage > doc->getNumPages()) { | |
lastPage = doc->getNumPages(); | |
} | |
if (lastPage < firstPage) { | |
error(errCommandLine, -1, "Wrong page range given: the first page ({0:d}) can not be after the last page ({1:d}).", firstPage, lastPage); | |
goto err2; | |
} | |
if (printMetadata) { | |
// print the metadata | |
const std::unique_ptr<GooString> metadata = doc->readMetadata(); | |
if (metadata) { | |
fputs(metadata->c_str(), stdout); | |
fputc('\n', stdout); | |
} | |
} else if (printCustom) { | |
printCustomInfo(doc.get(), uMap); | |
} else if (printJS) { | |
// print javascript | |
JSInfo jsInfo(doc.get(), firstPage - 1); | |
jsInfo.scanJS(lastPage - firstPage + 1, stdout, uMap); | |
} else if (printStructure || printStructureText) { | |
// print structure | |
const StructTreeRoot *structTree = doc->getCatalog()->getStructTreeRoot(); | |
if (structTree) { | |
for (unsigned i = 0; i < structTree->getNumChildren(); i++) { | |
printStruct(structTree->getChild(i), 0); | |
} | |
} | |
} else if (printDests) { | |
printDestinations(doc.get(), uMap); | |
} else if (printUrls) { | |
printUrlList(doc.get()); | |
} else { | |
// print info | |
long long filesize = 0; | |
f = fopen(fileName->c_str(), "rb"); | |
if (f) { | |
Gfseek(f, 0, SEEK_END); | |
filesize = Gftell(f); | |
fclose(f); | |
} | |
if (multiPage == false) { | |
lastPage = 1; | |
} | |
printInfo(doc.get(), uMap, filesize, multiPage); | |
} | |
exitCode = 0; | |
// clean up | |
err2: | |
delete fileName; | |
err1: | |
err0: | |
return exitCode; | |
} | |