Spaces:
Running
Running
//======================================================================== | |
// | |
// pdfdetach.cc | |
// | |
// Copyright 2010 Glyph & Cog, LLC | |
// | |
//======================================================================== | |
//======================================================================== | |
// | |
// Modified under the Poppler project - http://poppler.freedesktop.org | |
// | |
// All changes made under the Poppler project to this file are licensed | |
// under GPL version 2 or later | |
// | |
// Copyright (C) 2011 Carlos Garcia Campos <[email protected]> | |
// Copyright (C) 2013 Yury G. Kudryashov <[email protected]> | |
// Copyright (C) 2014, 2017 Adrian Johnson <[email protected]> | |
// Copyright (C) 2018, 2020, 2022, 2024 Albert Astals Cid <[email protected]> | |
// Copyright (C) 2018 Adam Reichold <[email protected]> | |
// Copyright (C) 2019, 2021, 2024 Oliver Sander <[email protected]> | |
// Copyright (C) 2020 <[email protected]> | |
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <[email protected]> | |
// | |
// To see a description of the changes please see the Changelog file that | |
// came with your tarball or type make ChangeLog if you are building from git | |
// | |
//======================================================================== | |
static bool doList = false; | |
static int saveNum = 0; | |
static char saveFile[128] = ""; | |
static bool saveAll = false; | |
static char savePath[1024] = ""; | |
static char textEncName[128] = ""; | |
static char ownerPassword[33] = "\001"; | |
static char userPassword[33] = "\001"; | |
static bool printVersion = false; | |
static bool printHelp = false; | |
static const ArgDesc argDesc[] = { { "-list", argFlag, &doList, 0, "list all embedded files" }, | |
{ "-save", argInt, &saveNum, 0, "save the specified embedded file (file number)" }, | |
{ "-savefile", argString, &saveFile, sizeof(saveFile), "save the specified embedded file (file name)" }, | |
{ "-saveall", argFlag, &saveAll, 0, "save all embedded files" }, | |
{ "-o", argString, savePath, sizeof(savePath), "file name for the saved embedded file" }, | |
{ "-enc", argString, textEncName, sizeof(textEncName), "output text encoding name" }, | |
{ "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)" }, | |
{ "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)" }, | |
{ "-v", argFlag, &printVersion, 0, "print copyright and version info" }, | |
{ "-h", argFlag, &printHelp, 0, "print usage information" }, | |
{ "-help", argFlag, &printHelp, 0, "print usage information" }, | |
{ "--help", argFlag, &printHelp, 0, "print usage information" }, | |
{ "-?", argFlag, &printHelp, 0, "print usage information" }, | |
{} }; | |
int main(int argc, char *argv[]) | |
{ | |
std::unique_ptr<PDFDoc> doc; | |
GooString *fileName; | |
const UnicodeMap *uMap; | |
std::optional<GooString> ownerPW, userPW; | |
char uBuf[8]; | |
bool ok; | |
bool hasSaveFile; | |
std::vector<std::unique_ptr<FileSpec>> embeddedFiles; | |
int nFiles, nPages, n, i, j; | |
Page *page; | |
Annots *annots; | |
const GooString *s1; | |
Unicode u; | |
bool isUnicode; | |
Win32Console win32Console(&argc, &argv); | |
// parse args | |
ok = parseArgs(argDesc, &argc, argv); | |
hasSaveFile = strlen(saveFile) > 0; | |
if ((doList ? 1 : 0) + ((saveNum != 0) ? 1 : 0) + ((hasSaveFile != 0) ? 1 : 0) + (saveAll ? 1 : 0) != 1) { | |
ok = false; | |
} | |
if (!ok || argc != 2 || printVersion || printHelp) { | |
fprintf(stderr, "pdfdetach version %s\n", PACKAGE_VERSION); | |
fprintf(stderr, "%s\n", popplerCopyright); | |
fprintf(stderr, "%s\n", xpdfCopyright); | |
if (!printVersion) { | |
printUsage("pdfdetach", "<PDF-file>", argDesc); | |
} | |
return 99; | |
} | |
fileName = new GooString(argv[1]); | |
// read config file | |
globalParams = std::make_unique<GlobalParams>(); | |
if (textEncName[0]) { | |
globalParams->setTextEncoding(textEncName); | |
} | |
// get mapping to output encoding | |
if (!(uMap = globalParams->getTextEncoding())) { | |
error(errConfig, -1, "Couldn't get text encoding"); | |
delete fileName; | |
return 99; | |
} | |
// open PDF file | |
if (ownerPassword[0] != '\001') { | |
ownerPW = GooString(ownerPassword); | |
} | |
if (userPassword[0] != '\001') { | |
userPW = GooString(userPassword); | |
} | |
doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); | |
if (!doc->isOk()) { | |
return 1; | |
} | |
for (i = 0; i < doc->getCatalog()->numEmbeddedFiles(); ++i) { | |
embeddedFiles.push_back(doc->getCatalog()->embeddedFile(i)); | |
} | |
nPages = doc->getCatalog()->getNumPages(); | |
for (i = 0; i < nPages; ++i) { | |
page = doc->getCatalog()->getPage(i + 1); | |
if (!page) { | |
continue; | |
} | |
annots = page->getAnnots(); | |
if (!annots) { | |
break; | |
} | |
for (Annot *annot : annots->getAnnots()) { | |
if (annot->getType() != Annot::typeFileAttachment) { | |
continue; | |
} | |
embeddedFiles.push_back(std::make_unique<FileSpec>(static_cast<AnnotFileAttachment *>(annot)->getFile())); | |
} | |
} | |
nFiles = embeddedFiles.size(); | |
// list embedded files | |
if (doList) { | |
printf("%d embedded files\n", nFiles); | |
for (i = 0; i < nFiles; ++i) { | |
const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; | |
printf("%d: ", i + 1); | |
s1 = fileSpec->getFileName(); | |
if (!s1) { | |
return 3; | |
} | |
if (hasUnicodeByteOrderMark(s1->toStr())) { | |
isUnicode = true; | |
j = 2; | |
} else { | |
isUnicode = false; | |
j = 0; | |
} | |
while (j < s1->getLength()) { | |
if (isUnicode) { | |
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); | |
j += 2; | |
} else { | |
u = pdfDocEncoding[s1->getChar(j) & 0xff]; | |
++j; | |
} | |
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); | |
fwrite(uBuf, 1, n, stdout); | |
} | |
fputc('\n', stdout); | |
} | |
// save all embedded files | |
} else if (saveAll) { | |
std::filesystem::path basePath = savePath; | |
if (basePath.empty()) { | |
basePath = std::filesystem::current_path(); | |
} | |
basePath = basePath.lexically_normal(); | |
for (i = 0; i < nFiles; ++i) { | |
const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; | |
std::string filename; | |
s1 = fileSpec->getFileName(); | |
if (!s1) { | |
return 3; | |
} | |
if (hasUnicodeByteOrderMark(s1->toStr())) { | |
isUnicode = true; | |
j = 2; | |
} else { | |
isUnicode = false; | |
j = 0; | |
} | |
while (j < s1->getLength()) { | |
if (isUnicode) { | |
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); | |
j += 2; | |
} else { | |
u = pdfDocEncoding[s1->getChar(j) & 0xff]; | |
++j; | |
} | |
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); | |
filename.append(uBuf, n); | |
} | |
if (filename.empty()) { | |
return 3; | |
} | |
std::filesystem::path filePath = basePath; | |
filePath = filePath.append(filename).lexically_normal(); | |
if (!filePath.generic_string().starts_with(basePath.generic_string())) { | |
error(errIO, -1, "Preventing directory traversal"); | |
return 3; | |
} | |
auto *embFile = fileSpec->getEmbeddedFile(); | |
if (!embFile || !embFile->isOk()) { | |
return 3; | |
} | |
if (!embFile->save(filePath.generic_string())) { | |
error(errIO, -1, "Error saving embedded file as '{0:s}'", filePath.c_str()); | |
return 2; | |
} | |
} | |
// save an embedded file | |
} else { | |
if (hasSaveFile) { | |
for (i = 0; i < nFiles; ++i) { | |
const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[i]; | |
s1 = fileSpec->getFileName(); | |
if (strcmp(s1->c_str(), saveFile) == 0) { | |
saveNum = i + 1; | |
break; | |
} | |
} | |
} | |
if (saveNum < 1 || saveNum > nFiles) { | |
error(errCommandLine, -1, hasSaveFile ? "Invalid file name" : "Invalid file number"); | |
return 99; | |
} | |
const std::unique_ptr<FileSpec> &fileSpec = embeddedFiles[saveNum - 1]; | |
std::string targetPath = savePath; | |
if (targetPath.empty()) { | |
// The user hasn't given a path to save, just use the filename specified in the pdf as name | |
s1 = fileSpec->getFileName(); | |
if (!s1) { | |
return 3; | |
} | |
if (hasUnicodeByteOrderMark(s1->toStr())) { | |
isUnicode = true; | |
j = 2; | |
} else { | |
isUnicode = false; | |
j = 0; | |
} | |
while (j < s1->getLength()) { | |
if (isUnicode) { | |
u = ((s1->getChar(j) & 0xff) << 8) | (s1->getChar(j + 1) & 0xff); | |
j += 2; | |
} else { | |
u = pdfDocEncoding[s1->getChar(j) & 0xff]; | |
++j; | |
} | |
n = uMap->mapUnicode(u, uBuf, sizeof(uBuf)); | |
targetPath.append(uBuf, n); | |
} | |
const std::filesystem::path basePath = std::filesystem::current_path().lexically_normal(); | |
std::filesystem::path filePath = basePath; | |
filePath = filePath.append(targetPath).lexically_normal(); | |
if (!filePath.generic_string().starts_with(basePath.generic_string())) { | |
error(errIO, -1, "Preventing directory traversal"); | |
return 3; | |
} | |
targetPath = filePath.generic_string(); | |
} | |
auto *embFile = fileSpec->getEmbeddedFile(); | |
if (!embFile || !embFile->isOk()) { | |
return 3; | |
} | |
if (!embFile->save(targetPath)) { | |
error(errIO, -1, "Error saving embedded file as '{0:s}'", targetPath.c_str()); | |
return 2; | |
} | |
} | |
return 0; | |
} | |