Spaces:
Running
Running
//======================================================================== | |
// | |
// HtmlOutputDev.cc | |
// | |
// Copyright 1997-2002 Glyph & Cog, LLC | |
// | |
// Changed 1999-2000 by G.Ovtcharov | |
// | |
// Changed 2002 by Mikhail Kruk | |
// | |
//======================================================================== | |
//======================================================================== | |
// | |
// Modified under the Poppler project - http://poppler.freedesktop.org | |
// | |
// All changes made under the Poppler project to this file are licensed | |
// under GPL version 2 or later | |
// | |
// Copyright (C) 2005-2013, 2016-2022 Albert Astals Cid <[email protected]> | |
// Copyright (C) 2008 Kjartan Maraas <[email protected]> | |
// Copyright (C) 2008 Boris Toloknov <[email protected]> | |
// Copyright (C) 2008 Haruyuki Kawabe <[email protected]> | |
// Copyright (C) 2008 Tomas Are Haavet <[email protected]> | |
// Copyright (C) 2009 Warren Toomey <[email protected]> | |
// Copyright (C) 2009, 2011 Carlos Garcia Campos <[email protected]> | |
// Copyright (C) 2009 Reece Dunn <[email protected]> | |
// Copyright (C) 2010, 2012, 2013, 2022 Adrian Johnson <[email protected]> | |
// Copyright (C) 2010 Hib Eris <[email protected]> | |
// Copyright (C) 2010 OSSD CDAC Mumbai by Leena Chourey ([email protected]) and Onkar Potdar ([email protected]) | |
// Copyright (C) 2011 Joshua Richardson <[email protected]> | |
// Copyright (C) 2011 Stephen Reichling <[email protected]> | |
// Copyright (C) 2011, 2012 Igor Slepchin <[email protected]> | |
// Copyright (C) 2012 Ihar Filipau <[email protected]> | |
// Copyright (C) 2012 Gerald Schmidt <[email protected]> | |
// Copyright (C) 2012 Pino Toscano <[email protected]> | |
// Copyright (C) 2013 Thomas Freitag <[email protected]> | |
// Copyright (C) 2013 Julien Nabet <[email protected]> | |
// Copyright (C) 2013 Johannes Brandstätter <[email protected]> | |
// Copyright (C) 2014 Fabio D'Urso <[email protected]> | |
// Copyright (C) 2016 Vincent Le Garrec <[email protected]> | |
// Copyright (C) 2017 Caolán McNamara <[email protected]> | |
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich | |
// Copyright (C) 2018 Thibaut Brard <[email protected]> | |
// Copyright (C) 2018-2020 Adam Reichold <[email protected]> | |
// Copyright (C) 2019, 2020, 2022, 2024 Oliver Sander <[email protected]> | |
// Copyright (C) 2020 Eddie Kohler <[email protected]> | |
// Copyright (C) 2021 Christopher Hasse <[email protected]> | |
// Copyright (C) 2022 Brian Rosenfield <[email protected]> | |
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <[email protected]> | |
// | |
// To see a description of the changes please see the Changelog file that | |
// came with your tarball or type make ChangeLog if you are building from git | |
// | |
//======================================================================== | |
class HtmlImage | |
{ | |
public: | |
HtmlImage(std::unique_ptr<GooString> &&_fName, GfxState *state) : fName(std::move(_fName)) | |
{ | |
state->transform(0, 0, &xMin, &yMax); | |
state->transform(1, 1, &xMax, &yMin); | |
} | |
~HtmlImage() = default; | |
HtmlImage(const HtmlImage &) = delete; | |
HtmlImage &operator=(const HtmlImage &) = delete; | |
double xMin, xMax; // image x coordinates | |
double yMin, yMax; // image y coordinates | |
std::unique_ptr<GooString> fName; // image file name | |
}; | |
// returns true if x is closer to y than x is to z | |
static inline bool IS_CLOSER(double x, double y, double z) | |
{ | |
return std::fabs((x) - (y)) < std::fabs((x) - (z)); | |
} | |
extern bool complexMode; | |
extern bool singleHtml; | |
extern bool dataUrls; | |
extern bool ignore; | |
extern bool printCommands; | |
extern bool printHtml; | |
extern bool noframes; | |
extern bool stout; | |
extern bool xml; | |
extern bool noRoundedCoordinates; | |
extern bool showHidden; | |
extern bool noMerge; | |
extern double wordBreakThreshold; | |
static bool debug = false; | |
static GooString* Dirname(GooString* str){ | |
char *p=str->c_str(); | |
int len=str->getLength(); | |
for (int i=len-1;i>=0;i--) | |
if (*(p+i)==SLASH) | |
return new GooString(p,i+1); | |
return new GooString(); | |
} | |
static std::unique_ptr<GooString> print_matrix(const double *mat) | |
{ | |
return GooString::format("[{0:g} {1:g} {2:g} {3:g} {4:g} {5:g}]", *mat, mat[1], mat[2], mat[3], mat[4], mat[5]); | |
} | |
static std::unique_ptr<GooString> print_uni_str(const Unicode *u, const unsigned uLen) | |
{ | |
if (!uLen) { | |
return std::make_unique<GooString>(""); | |
} | |
std::unique_ptr<GooString> gstr_buff0 = GooString::format("{0:c}", (*u < 0x7F ? *u & 0xFF : '?')); | |
for (unsigned i = 1; i < uLen; i++) { | |
if (u[i] < 0x7F) { | |
gstr_buff0->append(static_cast<char>(u[i]) & 0xFF); | |
} | |
} | |
return gstr_buff0; | |
} | |
//------------------------------------------------------------------------ | |
// HtmlString | |
//------------------------------------------------------------------------ | |
HtmlString::HtmlString(GfxState *state, double fontSize, HtmlFontAccu *_fonts) : fonts(_fonts) | |
{ | |
double x, y; | |
state->transform(state->getCurX(), state->getCurY(), &x, &y); | |
if (std::shared_ptr<const GfxFont> font = state->getFont()) { | |
double ascent = font->getAscent(); | |
double descent = font->getDescent(); | |
if (ascent > 1.05) { | |
// printf( "ascent=%.15g is too high, descent=%.15g\n", ascent, descent ); | |
ascent = 1.05; | |
} | |
if (descent < -0.4) { | |
// printf( "descent %.15g is too low, ascent=%.15g\n", descent, ascent ); | |
descent = -0.4; | |
} | |
yMin = y - ascent * fontSize; | |
yMax = y - descent * fontSize; | |
GfxRGB rgb; | |
state->getFillRGB(&rgb); | |
HtmlFont hfont = HtmlFont(*font, std::lround(fontSize), rgb, state->getFillOpacity()); | |
if (isMatRotOrSkew(state->getTextMat())) { | |
double normalizedMatrix[4]; | |
memcpy(normalizedMatrix, state->getTextMat(), sizeof(normalizedMatrix)); | |
// browser rotates the opposite way | |
// so flip the sign of the angle -> sin() components change sign | |
if (debug) { | |
std::cerr << DEBUG << "before transform: " << print_matrix(normalizedMatrix)->c_str() << std::endl; | |
} | |
normalizedMatrix[1] *= -1; | |
normalizedMatrix[2] *= -1; | |
if (debug) { | |
std::cerr << DEBUG << "after reflecting angle: " << print_matrix(normalizedMatrix)->c_str() << std::endl; | |
} | |
normalizeRotMat(normalizedMatrix); | |
if (debug) { | |
std::cerr << DEBUG << "after norm: " << print_matrix(normalizedMatrix)->c_str() << std::endl; | |
} | |
hfont.setRotMat(normalizedMatrix); | |
} | |
fontpos = fonts->AddFont(hfont); | |
} else { | |
// this means that the PDF file draws text without a current font, | |
// which should never happen | |
yMin = y - 0.95 * fontSize; | |
yMax = y + 0.35 * fontSize; | |
fontpos = 0; | |
} | |
if (yMin == yMax) { | |
// this is a sanity check for a case that shouldn't happen -- but | |
// if it does happen, we want to avoid dividing by zero later | |
yMin = y; | |
yMax = y + 1; | |
} | |
col = 0; | |
text = nullptr; | |
xRight = nullptr; | |
link = nullptr; | |
len = size = 0; | |
yxNext = nullptr; | |
xyNext = nullptr; | |
htext = std::make_unique<GooString>(); | |
dir = textDirUnknown; | |
} | |
HtmlString::~HtmlString() | |
{ | |
gfree(text); | |
gfree(xRight); | |
} | |
void HtmlString::addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u) | |
{ | |
if (dir == textDirUnknown) { | |
// dir = UnicodeMap::getDirection(u); | |
dir = textDirLeftRight; | |
} | |
if (len == size) { | |
size += 16; | |
text = (Unicode *)grealloc(text, size * sizeof(Unicode)); | |
xRight = (double *)grealloc(xRight, size * sizeof(double)); | |
} | |
text[len] = u; | |
if (len == 0) { | |
xMin = x; | |
} | |
xMax = xRight[len] = x + dx; | |
// printf("added char: %f %f xright = %f\n", x, dx, x+dx); | |
++len; | |
} | |
void HtmlString::endString() | |
{ | |
if (dir == textDirRightLeft && len > 1) { | |
// printf("will reverse!\n"); | |
for (int i = 0; i < len / 2; i++) { | |
Unicode ch = text[i]; | |
text[i] = text[len - i - 1]; | |
text[len - i - 1] = ch; | |
} | |
} | |
} | |
//------------------------------------------------------------------------ | |
// HtmlPage | |
//------------------------------------------------------------------------ | |
HtmlPage::HtmlPage(bool rawOrderA) | |
{ | |
rawOrder = rawOrderA; | |
curStr = nullptr; | |
yxStrings = nullptr; | |
xyStrings = nullptr; | |
yxCur1 = yxCur2 = nullptr; | |
fonts = new HtmlFontAccu(); | |
links = new HtmlLinks(); | |
pageWidth = 0; | |
pageHeight = 0; | |
fontsPageMarker = 0; | |
DocName = nullptr; | |
firstPage = -1; | |
} | |
HtmlPage::~HtmlPage() | |
{ | |
clear(); | |
delete DocName; | |
delete fonts; | |
delete links; | |
for (auto entry : imgList) { | |
delete entry; | |
} | |
} | |
void HtmlPage::updateFont(GfxState *state) | |
{ | |
const char *name; | |
int code; | |
double dimLength; | |
// adjust the font size | |
fontSize = state->getTransformedFontSize(); | |
const GfxFont *const font = state->getFont().get(); | |
if (font && font->getType() == fontType3) { | |
// Grab the font size from the font bounding box if possible - remember to | |
// scale from the glyph coordinate system. | |
const double *fontBBox = font->getFontBBox(); | |
const double *fontMat = font->getFontMatrix(); | |
dimLength = (fontBBox[3] - fontBBox[1]) * fontMat[3]; | |
if (dimLength > 0) { | |
fontSize *= dimLength; | |
} else { | |
// This is a hack which makes it possible to deal with some Type 3 | |
// fonts. The problem is that it's impossible to know what the | |
// base coordinate system used in the font is without actually | |
// rendering the font. This code tries to guess by looking at the | |
// width of the character 'm' (which breaks if the font is a | |
// subset that doesn't contain 'm'). | |
for (code = 0; code < 256; ++code) { | |
if ((name = ((Gfx8BitFont *)font)->getCharName(code)) && name[0] == 'm' && name[1] == '\0') { | |
break; | |
} | |
} | |
if (code < 256) { | |
dimLength = ((Gfx8BitFont *)font)->getWidth(code); | |
if (dimLength != 0) { | |
// 600 is a generic average 'm' width -- yes, this is a hack | |
fontSize *= dimLength / 0.6; | |
} | |
} | |
if (fontMat[0] != 0) { | |
fontSize *= fabs(fontMat[3] / fontMat[0]); | |
} | |
} | |
} | |
} | |
void HtmlPage::beginString(GfxState *state, const GooString *s) | |
{ | |
curStr = new HtmlString(state, fontSize, fonts); | |
} | |
void HtmlPage::conv() | |
{ | |
for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { | |
tmp->htext = HtmlFont::HtmlFilter(tmp->text, tmp->len); | |
size_t linkIndex = 0; | |
if (links->inLink(tmp->xMin, tmp->yMin, tmp->xMax, tmp->yMax, linkIndex)) { | |
tmp->link = links->getLink(linkIndex); | |
} | |
} | |
} | |
void HtmlPage::addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen) | |
{ | |
double x1, y1, w1, h1, dx2, dy2; | |
int n, i; | |
state->transform(x, y, &x1, &y1); | |
n = curStr->len; | |
// check that new character is in the same direction as current string | |
// and is not too far away from it before adding | |
// if ((UnicodeMap::getDirection(u[0]) != curStr->dir) || | |
// XXX | |
if (debug) { | |
const double *text_mat = state->getTextMat(); | |
// rotation is (cos q, sin q, -sin q, cos q, 0, 0) | |
// sin q is zero iff there is no rotation, or 180 deg. rotation; | |
// for 180 rotation, cos q will be negative | |
if (text_mat[0] < 0 || !is_within(text_mat[1], .1, 0)) { | |
std::cerr << DEBUG << "rotation matrix for \"" << print_uni_str(u, uLen)->c_str() << '"' << std::endl; | |
std::cerr << "text " << print_matrix(state->getTextMat())->c_str(); | |
} | |
} | |
if (n > 0 && // don't start a new string, unless there is already a string | |
// TODO: the following line assumes that text is flowing left to | |
// right, which will not necessarily be the case, e.g. if rotated; | |
// It assesses whether or not two characters are close enough to | |
// be part of the same string | |
fabs(x1 - curStr->xRight[n - 1]) > wordBreakThreshold * (curStr->yMax - curStr->yMin) && | |
// rotation is (cos q, sin q, -sin q, cos q, 0, 0) | |
// sin q is zero iff there is no rotation, or 180 deg. rotation; | |
// for 180 rotation, cos q will be negative | |
!rot_matrices_equal(curStr->getFont().getRotMat(), state->getTextMat())) { | |
endString(); | |
beginString(state, nullptr); | |
} | |
state->textTransformDelta(state->getCharSpace() * state->getHorizScaling(), 0, &dx2, &dy2); | |
dx -= dx2; | |
dy -= dy2; | |
state->transformDelta(dx, dy, &w1, &h1); | |
if (uLen != 0) { | |
w1 /= uLen; | |
h1 /= uLen; | |
} | |
for (i = 0; i < uLen; ++i) { | |
curStr->addChar(state, x1 + i * w1, y1 + i * h1, w1, h1, u[i]); | |
} | |
} | |
void HtmlPage::endString() | |
{ | |
HtmlString *p1, *p2; | |
double h, y1, y2; | |
// throw away zero-length strings -- they don't have valid xMin/xMax | |
// values, and they're useless anyway | |
if (curStr->len == 0) { | |
delete curStr; | |
curStr = nullptr; | |
return; | |
} | |
curStr->endString(); | |
if (curStr->yMax - curStr->yMin > 20) { | |
delete curStr; | |
curStr = NULL; | |
return; | |
} | |
// insert string in y-major list | |
h = curStr->yMax - curStr->yMin; | |
y1 = curStr->yMin + 0.5 * h; | |
y2 = curStr->yMin + 0.8 * h; | |
if (rawOrder) { | |
p1 = yxCur1; | |
p2 = nullptr; | |
} else if ((!yxCur1 || (y1 >= yxCur1->yMin && (y2 >= yxCur1->yMax || curStr->xMax >= yxCur1->xMin))) && (!yxCur2 || (y1 < yxCur2->yMin || (y2 < yxCur2->yMax && curStr->xMax < yxCur2->xMin)))) { | |
p1 = yxCur1; | |
p2 = yxCur2; | |
} else { | |
for (p1 = nullptr, p2 = yxStrings; p2; p1 = p2, p2 = p2->yxNext) { | |
if (y1 < p2->yMin || (y2 < p2->yMax && curStr->xMax < p2->xMin)) { | |
break; | |
} | |
} | |
yxCur2 = p2; | |
} | |
yxCur1 = curStr; | |
if (p1) { | |
p1->yxNext = curStr; | |
} else { | |
yxStrings = curStr; | |
} | |
curStr->yxNext = p2; | |
curStr = nullptr; | |
} | |
static const char *strrstr(const char *s, const char *ss) | |
{ | |
const char *p = strstr(s, ss); | |
for (const char *pp = p; pp != nullptr; pp = strstr(p + 1, ss)) { | |
p = pp; | |
} | |
return p; | |
} | |
static void CloseTags(GooString *htext, bool &finish_a, bool &finish_italic, bool &finish_bold) | |
{ | |
const char *last_italic = finish_italic && (finish_bold || finish_a) ? strrstr(htext->c_str(), "<i>") : nullptr; | |
const char *last_bold = finish_bold && (finish_italic || finish_a) ? strrstr(htext->c_str(), "<b>") : nullptr; | |
const char *last_a = finish_a && (finish_italic || finish_bold) ? strrstr(htext->c_str(), "<a ") : nullptr; | |
if (finish_a && (finish_italic || finish_bold) && last_a > (last_italic > last_bold ? last_italic : last_bold)) { | |
htext->append("</a>", 4); | |
finish_a = false; | |
} | |
if (finish_italic && finish_bold && last_italic > last_bold) { | |
htext->append("</i>", 4); | |
finish_italic = false; | |
} | |
if (finish_bold) { | |
htext->append("</b>", 4); | |
} | |
if (finish_italic) { | |
htext->append("</i>", 4); | |
} | |
if (finish_a) { | |
htext->append("</a>"); | |
} | |
} | |
// Strings are lines of text; | |
// This function aims to combine strings into lines and paragraphs if !noMerge | |
// It may also strip out duplicate strings (if they are on top of each other); sometimes they are to create a font effect | |
void HtmlPage::coalesce() | |
{ | |
HtmlString *str1, *str2; | |
double space, horSpace, vertSpace, vertOverlap; | |
bool addSpace, addLineBreak; | |
int n, i; | |
double curX, curY; | |
for (str1 = yxStrings; str1; str1 = str1->yxNext) { | |
printf("x=%f..%f y=%f..%f size=%2d '", | |
str1->xMin, str1->xMax, str1->yMin, str1->yMax, | |
(int)(str1->yMax - str1->yMin)); | |
for (i = 0; i < str1->len; ++i) { | |
fputc(str1->text[i] & 0xff, stdout); | |
} | |
printf("'\n"); | |
} | |
printf("\n------------------------------------------------------------\n\n"); | |
str1 = yxStrings; | |
if (!str1) { | |
return; | |
} | |
//----- discard duplicated text (fake boldface, drop shadows) | |
if (!complexMode) { /* if not in complex mode get rid of duplicate strings */ | |
HtmlString *str3; | |
bool found; | |
while (str1) { | |
double size = str1->yMax - str1->yMin; | |
double xLimit = str1->xMin + size; | |
found = false; | |
for (str2 = str1, str3 = str1->yxNext; str3 && str3->xMin < xLimit; str2 = str3, str3 = str2->yxNext) { | |
if (str3->len == str1->len && !memcmp(str3->text, str1->text, str1->len * sizeof(Unicode)) && fabs(str3->yMin - str1->yMin) < size * 0.2 && fabs(str3->yMax - str1->yMax) < size * 0.2 | |
&& fabs(str3->xMax - str1->xMax) < size * 0.1) { | |
found = true; | |
// printf("found duplicate!\n"); | |
break; | |
} | |
} | |
if (found) { | |
str2->xyNext = str3->xyNext; | |
str2->yxNext = str3->yxNext; | |
delete str3; | |
} else { | |
str1 = str1->yxNext; | |
} | |
} | |
} /*- !complexMode */ | |
str1 = yxStrings; | |
const HtmlFont *hfont1 = getFont(str1); | |
if (hfont1->isBold()) { | |
str1->htext->insert(0, "<b>", 3); | |
} | |
if (hfont1->isItalic()) { | |
str1->htext->insert(0, "<i>", 3); | |
} | |
if (str1->getLink() != nullptr) { | |
GooString *ls = str1->getLink()->getLinkStart(); | |
str1->htext->insert(0, ls); | |
delete ls; | |
} | |
curX = str1->xMin; | |
curY = str1->yMin; | |
while (str1 && (str2 = str1->yxNext)) { | |
const HtmlFont *hfont2 = getFont(str2); | |
space = str1->yMax - str1->yMin; // the height of the font's bounding box | |
horSpace = str2->xMin - str1->xMax; | |
// if strings line up on left-hand side AND they are on subsequent lines, we need a line break | |
addLineBreak = !noMerge && (fabs(str1->xMin - str2->xMin) < 0.4) && IS_CLOSER(str2->yMax, str1->yMax + space, str1->yMax); | |
vertSpace = str2->yMin - str1->yMax; | |
// printf("coalesce %d %d %f? ", str1->dir, str2->dir, d); | |
if (str2->yMin >= str1->yMin && str2->yMin <= str1->yMax) { | |
vertOverlap = str1->yMax - str2->yMin; | |
} else if (str2->yMax >= str1->yMin && str2->yMax <= str1->yMax) { | |
vertOverlap = str2->yMax - str1->yMin; | |
} else { | |
vertOverlap = 0; | |
} | |
// Combine strings if: | |
// They appear to be the same font (complex mode only) && going in the same direction AND at least one of the following: | |
// 1. They appear to be part of the same line of text | |
// 2. They appear to be subsequent lines of a paragraph | |
// We assume (1) or (2) above, respectively, based on: | |
// (1) strings overlap vertically AND | |
// horizontal space between end of str1 and start of str2 is consistent with a single space or less; | |
// when rawOrder, the strings have to overlap vertically by at least 50% | |
// (2) Strings flow down the page, but the space between them is not too great, and they are lined up on the left | |
if (((((rawOrder && vertOverlap > 0.5 * space) || (!rawOrder && str2->yMin < str1->yMax)) && (horSpace > -0.5 * space && horSpace < space)) || (vertSpace >= 0 && vertSpace < 0.5 * space && addLineBreak)) | |
&& (!complexMode || (hfont1->isEqualIgnoreBold(*hfont2))) && // in complex mode fonts must be the same, in other modes fonts do not metter | |
str1->dir == str2->dir // text direction the same | |
) { | |
// printf("yes\n"); | |
n = str1->len + str2->len; | |
if ((addSpace = horSpace > wordBreakThreshold * space)) { | |
++n; | |
} | |
if (addLineBreak) { | |
++n; | |
} | |
str1->size = (n + 15) & ~15; | |
str1->text = (Unicode *)grealloc(str1->text, str1->size * sizeof(Unicode)); | |
str1->xRight = (double *)grealloc(str1->xRight, str1->size * sizeof(double)); | |
if (addSpace) { | |
str1->text[str1->len] = 0x20; | |
str1->htext->append(xml ? " " : " "); | |
str1->xRight[str1->len] = str2->xMin; | |
++str1->len; | |
} | |
if (addLineBreak) { | |
str1->text[str1->len] = '\n'; | |
str1->htext->append("<br/>"); | |
str1->xRight[str1->len] = str2->xMin; | |
++str1->len; | |
str1->yMin = str2->yMin; | |
str1->yMax = str2->yMax; | |
str1->xMax = str2->xMax; | |
int fontLineSize = hfont1->getLineSize(); | |
int curLineSize = (int)(vertSpace + space); | |
if (curLineSize != fontLineSize) { | |
HtmlFont *newfnt = new HtmlFont(*hfont1); | |
newfnt->setLineSize(curLineSize); | |
str1->fontpos = fonts->AddFont(*newfnt); | |
delete newfnt; | |
hfont1 = getFont(str1); | |
// we have to reget hfont2 because it's location could have | |
// changed on resize | |
hfont2 = getFont(str2); | |
} | |
} | |
for (i = 0; i < str2->len; ++i) { | |
str1->text[str1->len] = str2->text[i]; | |
str1->xRight[str1->len] = str2->xRight[i]; | |
++str1->len; | |
} | |
/* fix <i>, <b> if str1 and str2 differ and handle switch of links */ | |
const HtmlLink *hlink1 = str1->getLink(); | |
const HtmlLink *hlink2 = str2->getLink(); | |
bool switch_links = !hlink1 || !hlink2 || !hlink1->isEqualDest(*hlink2); | |
bool finish_a = switch_links && hlink1 != nullptr; | |
bool finish_italic = hfont1->isItalic() && (!hfont2->isItalic() || finish_a); | |
bool finish_bold = hfont1->isBold() && (!hfont2->isBold() || finish_a || finish_italic); | |
CloseTags(str1->htext.get(), finish_a, finish_italic, finish_bold); | |
if (switch_links && hlink2 != nullptr) { | |
GooString *ls = hlink2->getLinkStart(); | |
str1->htext->append(ls); | |
delete ls; | |
} | |
if ((!hfont1->isItalic() || finish_italic) && hfont2->isItalic()) { | |
str1->htext->append("<i>", 3); | |
} | |
if ((!hfont1->isBold() || finish_bold) && hfont2->isBold()) { | |
str1->htext->append("<b>", 3); | |
} | |
str1->htext->append(str2->htext.get()); | |
// str1 now contains href for link of str2 (if it is defined) | |
str1->link = str2->link; | |
hfont1 = hfont2; | |
if (str2->xMax > str1->xMax) { | |
str1->xMax = str2->xMax; | |
} | |
if (str2->yMax > str1->yMax) { | |
str1->yMax = str2->yMax; | |
} | |
str1->yxNext = str2->yxNext; | |
delete str2; | |
} else { // keep strings separate | |
// printf("no\n"); | |
bool finish_a = str1->getLink() != nullptr; | |
bool finish_bold = hfont1->isBold(); | |
bool finish_italic = hfont1->isItalic(); | |
CloseTags(str1->htext.get(), finish_a, finish_italic, finish_bold); | |
str1->xMin = curX; | |
str1->yMin = curY; | |
str1 = str2; | |
curX = str1->xMin; | |
curY = str1->yMin; | |
hfont1 = hfont2; | |
if (hfont1->isBold()) { | |
str1->htext->insert(0, "<b>", 3); | |
} | |
if (hfont1->isItalic()) { | |
str1->htext->insert(0, "<i>", 3); | |
} | |
if (str1->getLink() != nullptr) { | |
GooString *ls = str1->getLink()->getLinkStart(); | |
str1->htext->insert(0, ls); | |
delete ls; | |
} | |
} | |
} | |
str1->xMin = curX; | |
str1->yMin = curY; | |
bool finish_bold = hfont1->isBold(); | |
bool finish_italic = hfont1->isItalic(); | |
bool finish_a = str1->getLink() != nullptr; | |
CloseTags(str1->htext.get(), finish_a, finish_italic, finish_bold); | |
for (str1 = yxStrings; str1; str1 = str1->yxNext) { | |
printf("x=%3d..%3d y=%3d..%3d size=%2d ", | |
(int)str1->xMin, (int)str1->xMax, (int)str1->yMin, (int)str1->yMax, | |
(int)(str1->yMax - str1->yMin)); | |
printf("'%s'\n", str1->htext->c_str()); | |
} | |
printf("\n------------------------------------------------------------\n\n"); | |
} | |
void HtmlPage::dumpAsXML(FILE *f, int page) | |
{ | |
fprintf(f, "<page number=\"%d\" position=\"absolute\"", page); | |
fprintf(f, " top=\"0\" left=\"0\" height=\"%d\" width=\"%d\">\n", pageHeight, pageWidth); | |
for (int i = fontsPageMarker; i < fonts->size(); i++) { | |
GooString *fontCSStyle = fonts->CSStyle(i); | |
fprintf(f, "\t%s\n", fontCSStyle->c_str()); | |
delete fontCSStyle; | |
} | |
for (auto ptr : imgList) { | |
auto img = static_cast<HtmlImage *>(ptr); | |
if (!noRoundedCoordinates) { | |
fprintf(f, "<image top=\"%d\" left=\"%d\" ", xoutRound(img->yMin), xoutRound(img->xMin)); | |
fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(img->xMax - img->xMin), xoutRound(img->yMax - img->yMin)); | |
} else { | |
fprintf(f, "<image top=\"%f\" left=\"%f\" ", img->yMin, img->xMin); | |
fprintf(f, "width=\"%f\" height=\"%f\" ", img->xMax - img->xMin, img->yMax - img->yMin); | |
} | |
fprintf(f, "src=\"%s\"/>\n", img->fName->c_str()); | |
delete img; | |
} | |
imgList.clear(); | |
for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { | |
if (tmp->htext) { | |
if (!noRoundedCoordinates) { | |
fprintf(f, "<text top=\"%d\" left=\"%d\" ", xoutRound(tmp->yMin), xoutRound(tmp->xMin)); | |
fprintf(f, "width=\"%d\" height=\"%d\" ", xoutRound(tmp->xMax - tmp->xMin), xoutRound(tmp->yMax - tmp->yMin)); | |
} else { | |
fprintf(f, "<text top=\"%f\" left=\"%f\" ", tmp->yMin, tmp->xMin); | |
fprintf(f, "width=\"%f\" height=\"%f\" ", tmp->xMax - tmp->xMin, tmp->yMax - tmp->yMin); | |
} | |
fprintf(f, "font=\"%d\">", tmp->fontpos); | |
fputs(tmp->htext->c_str(), f); | |
fputs("</text>\n", f); | |
} | |
} | |
fputs("</page>\n", f); | |
} | |
static void printCSS(FILE *f) | |
{ | |
// Image flip/flop CSS | |
// Source: | |
// http://stackoverflow.com/questions/1309055/cross-browser-way-to-flip-html-image-via-javascript-css | |
// tested in Chrome, Fx (Linux) and IE9 (W7) | |
static const char css[] = "<style type=\"text/css\">" | |
"\n" | |
"<!--" | |
"\n" | |
".xflip {" | |
"\n" | |
" -moz-transform: scaleX(-1);" | |
"\n" | |
" -webkit-transform: scaleX(-1);" | |
"\n" | |
" -o-transform: scaleX(-1);" | |
"\n" | |
" transform: scaleX(-1);" | |
"\n" | |
" filter: fliph;" | |
"\n" | |
"}" | |
"\n" | |
".yflip {" | |
"\n" | |
" -moz-transform: scaleY(-1);" | |
"\n" | |
" -webkit-transform: scaleY(-1);" | |
"\n" | |
" -o-transform: scaleY(-1);" | |
"\n" | |
" transform: scaleY(-1);" | |
"\n" | |
" filter: flipv;" | |
"\n" | |
"}" | |
"\n" | |
".xyflip {" | |
"\n" | |
" -moz-transform: scaleX(-1) scaleY(-1);" | |
"\n" | |
" -webkit-transform: scaleX(-1) scaleY(-1);" | |
"\n" | |
" -o-transform: scaleX(-1) scaleY(-1);" | |
"\n" | |
" transform: scaleX(-1) scaleY(-1);" | |
"\n" | |
" filter: fliph + flipv;" | |
"\n" | |
"}" | |
"\n" | |
"-->" | |
"\n" | |
"</style>" | |
"\n"; | |
fwrite(css, sizeof(css) - 1, 1, f); | |
} | |
int HtmlPage::dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page) | |
{ | |
if (!noframes) { | |
const std::string pgNum = std::to_string(page); | |
std::string pageFileName(DocName->toStr()); | |
if (!singleHtml) { | |
pageFileName += '-' + pgNum + ".html"; | |
pageFile = fopen(pageFileName.c_str(), "w"); | |
} else { | |
pageFileName += "-html.html"; | |
pageFile = fopen(pageFileName.c_str(), "a"); | |
} | |
if (!pageFile) { | |
error(errIO, -1, "Couldn't open html file '{0:s}'", pageFileName.c_str()); | |
return 1; | |
} | |
if (!singleHtml) { | |
fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>Page %d</title>\n\n", DOCTYPE, page); | |
} else { | |
fprintf(pageFile, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n\n", DOCTYPE, pageFileName.c_str()); | |
} | |
const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName()); | |
if (!singleHtml) { | |
fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str()); | |
} else { | |
fprintf(pageFile, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n <br/>\n", htmlEncoding.c_str()); | |
} | |
} else { | |
pageFile = file; | |
fprintf(pageFile, "<!-- Page %d -->\n", page); | |
fprintf(pageFile, "<a name=\"%d\"></a>\n", page); | |
} | |
return 0; | |
} | |
void HtmlPage::dumpComplex(FILE *file, int page, const std::vector<std::string> &backgroundImages) | |
{ | |
FILE *pageFile; | |
if (firstPage == -1) { | |
firstPage = page; | |
} | |
if (dumpComplexHeaders(file, pageFile, page)) { | |
error(errIO, -1, "Couldn't write headers."); | |
return; | |
} | |
fputs("<style type=\"text/css\">\n<!--\n", pageFile); | |
fputs("\tp {margin: 0; padding: 0;}", pageFile); | |
for (int i = fontsPageMarker; i != fonts->size(); i++) { | |
GooString *fontCSStyle; | |
if (!singleHtml) { | |
fontCSStyle = fonts->CSStyle(i); | |
} else { | |
fontCSStyle = fonts->CSStyle(i, page); | |
} | |
fprintf(pageFile, "\t%s\n", fontCSStyle->c_str()); | |
delete fontCSStyle; | |
} | |
fputs("-->\n</style>\n", pageFile); | |
if (!noframes) { | |
fputs("</head>\n<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n", pageFile); | |
} | |
fprintf(pageFile, "<div id=\"page%d-div\" style=\"position:relative;width:%dpx;height:%dpx;\">\n", page, pageWidth, pageHeight); | |
if (!ignore && (size_t)(page - firstPage) < backgroundImages.size()) { | |
fprintf(pageFile, "<img width=\"%d\" height=\"%d\" src=\"%s\" alt=\"background image\"/>\n", pageWidth, pageHeight, backgroundImages[page - firstPage].c_str()); | |
} | |
for (HtmlString *tmp1 = yxStrings; tmp1; tmp1 = tmp1->yxNext) { | |
if (tmp1->htext) { | |
fprintf(pageFile, "<p style=\"position:absolute;top:%dpx;left:%dpx;white-space:nowrap\" class=\"ft", xoutRound(tmp1->yMin), xoutRound(tmp1->xMin)); | |
if (!singleHtml) { | |
fputc('0', pageFile); | |
} else { | |
fprintf(pageFile, "%d", page); | |
} | |
fprintf(pageFile, "%d\">", tmp1->fontpos); | |
fputs(tmp1->htext->c_str(), pageFile); | |
fputs("</p>\n", pageFile); | |
} | |
} | |
fputs("</div>\n", pageFile); | |
if (!noframes) { | |
fputs("</body>\n</html>\n", pageFile); | |
fclose(pageFile); | |
} | |
} | |
void HtmlPage::dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages) | |
{ | |
if (complexMode || singleHtml) { | |
if (xml) { | |
dumpAsXML(f, pageNum); | |
} | |
if (!xml) { | |
dumpComplex(f, pageNum, backgroundImages); | |
} | |
} else { | |
fprintf(f, "<a name=%d></a>", pageNum); | |
// Loop over the list of image names on this page | |
for (auto ptr : imgList) { | |
auto img = static_cast<HtmlImage *>(ptr); | |
// see printCSS() for class names | |
const char *styles[4] = { "", " class=\"xflip\"", " class=\"yflip\"", " class=\"xyflip\"" }; | |
int style_index = 0; | |
if (img->xMin > img->xMax) { | |
style_index += 1; // xFlip | |
} | |
if (img->yMin > img->yMax) { | |
style_index += 2; // yFlip | |
} | |
fprintf(f, "<img%s src=\"%s\"/><br/>\n", styles[style_index], img->fName->c_str()); | |
delete img; | |
} | |
imgList.clear(); | |
for (HtmlString *tmp = yxStrings; tmp; tmp = tmp->yxNext) { | |
if (tmp->htext) { | |
fputs(tmp->htext->c_str(), f); | |
fputs("<br/>\n", f); | |
} | |
} | |
fputs("<hr/>\n", f); | |
} | |
} | |
void HtmlPage::clear() | |
{ | |
HtmlString *p1, *p2; | |
if (curStr) { | |
delete curStr; | |
curStr = nullptr; | |
} | |
for (p1 = yxStrings; p1; p1 = p2) { | |
p2 = p1->yxNext; | |
delete p1; | |
} | |
yxStrings = nullptr; | |
xyStrings = nullptr; | |
yxCur1 = yxCur2 = nullptr; | |
if (!noframes) { | |
delete fonts; | |
fonts = new HtmlFontAccu(); | |
fontsPageMarker = 0; | |
} else { | |
fontsPageMarker = fonts->size(); | |
} | |
delete links; | |
links = new HtmlLinks(); | |
} | |
void HtmlPage::setDocName(const char *fname) | |
{ | |
DocName = new GooString(fname); | |
} | |
void HtmlPage::addImage(std::unique_ptr<GooString> &&fname, GfxState *state) | |
{ | |
HtmlImage *img = new HtmlImage(std::move(fname), state); | |
imgList.push_back(img); | |
} | |
//------------------------------------------------------------------------ | |
// HtmlMetaVar | |
//------------------------------------------------------------------------ | |
HtmlMetaVar::HtmlMetaVar(const char *_name, const char *_content) | |
{ | |
name = new GooString(_name); | |
content = new GooString(_content); | |
} | |
HtmlMetaVar::~HtmlMetaVar() | |
{ | |
delete name; | |
delete content; | |
} | |
GooString *HtmlMetaVar::toString() const | |
{ | |
GooString *result = new GooString("<meta name=\""); | |
result->append(name); | |
result->append("\" content=\""); | |
result->append(content); | |
result->append("\"/>"); | |
return result; | |
} | |
//------------------------------------------------------------------------ | |
// HtmlOutputDev | |
//------------------------------------------------------------------------ | |
static const char *HtmlEncodings[][2] = { { "Latin1", "ISO-8859-1" }, { nullptr, nullptr } }; | |
std::string HtmlOutputDev::mapEncodingToHtml(const std::string &encoding) | |
{ | |
for (int i = 0; HtmlEncodings[i][0] != nullptr; i++) { | |
if (encoding == HtmlEncodings[i][0]) { | |
return HtmlEncodings[i][1]; | |
} | |
} | |
return encoding; | |
} | |
void HtmlOutputDev::doFrame(int firstPage) | |
{ | |
GooString *fName = new GooString(Docname); | |
fName->append(".html"); | |
if (!(fContentsFrame = fopen(fName->c_str(), "w"))) { | |
error(errIO, -1, "Couldn't open html file '{0:t}'", fName); | |
delete fName; | |
return; | |
} | |
delete fName; | |
const std::string baseName = gbasename(Docname->c_str()); | |
fputs(DOCTYPE, fContentsFrame); | |
fputs("\n<html>", fContentsFrame); | |
fputs("\n<head>", fContentsFrame); | |
fprintf(fContentsFrame, "\n<title>%s</title>", docTitle->c_str()); | |
const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); | |
fprintf(fContentsFrame, "\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str()); | |
dumpMetaVars(fContentsFrame); | |
fprintf(fContentsFrame, "</head>\n"); | |
fputs("<frameset cols=\"100,*\">\n", fContentsFrame); | |
fprintf(fContentsFrame, "<frame name=\"links\" src=\"%s_ind.html\"/>\n", baseName.c_str()); | |
fputs("<frame name=\"contents\" src=", fContentsFrame); | |
if (complexMode) { | |
fprintf(fContentsFrame, "\"%s-%d.html\"", baseName.c_str(), firstPage); | |
} else { | |
fprintf(fContentsFrame, "\"%ss.html\"", baseName.c_str()); | |
} | |
fputs("/>\n</frameset>\n</html>\n", fContentsFrame); | |
fclose(fContentsFrame); | |
} | |
HtmlOutputDev::HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrderA, int firstPage, bool outline) | |
{ | |
catalog = catalogA; | |
fContentsFrame = nullptr; | |
page = nullptr; | |
docTitle = new GooString(title); | |
pages = nullptr; | |
dumpJPEG = true; | |
// write = true; | |
rawOrder = rawOrderA; | |
this->doOutline = outline; | |
ok = false; | |
// this->firstPage = firstPage; | |
// pageNum=firstPage; | |
// open file | |
needClose = false; | |
pages = new HtmlPage(rawOrder); | |
glMetaVars.push_back(new HtmlMetaVar("generator", "pdftohtml 0.36")); | |
if (author) { | |
glMetaVars.push_back(new HtmlMetaVar("author", author)); | |
} | |
if (keywords) { | |
glMetaVars.push_back(new HtmlMetaVar("keywords", keywords)); | |
} | |
if (date) { | |
glMetaVars.push_back(new HtmlMetaVar("date", date)); | |
} | |
if (subject) { | |
glMetaVars.push_back(new HtmlMetaVar("subject", subject)); | |
} | |
maxPageWidth = 0; | |
maxPageHeight = 0; | |
pages->setDocName(fileName); | |
Docname = new GooString(fileName); | |
// for non-xml output (complex or simple) with frames generate the left frame | |
if (!xml && !noframes) { | |
if (!singleHtml) { | |
GooString *left = new GooString(fileName); | |
left->append("_ind.html"); | |
doFrame(firstPage); | |
if (!(fContentsFrame = fopen(left->c_str(), "w"))) { | |
error(errIO, -1, "Couldn't open html file '{0:t}'", left); | |
delete left; | |
return; | |
} | |
delete left; | |
fputs(DOCTYPE, fContentsFrame); | |
fputs("<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title></title>\n</head>\n<body>\n", fContentsFrame); | |
if (doOutline) { | |
fprintf(fContentsFrame, "<a href=\"%s%s\" target=\"contents\">Outline</a><br/>", gbasename(Docname->c_str()).c_str(), complexMode ? "-outline.html" : "s.html#outline"); | |
} | |
} | |
if (!complexMode) { /* not in complex mode */ | |
GooString *right = new GooString(fileName); | |
right->append("s.html"); | |
if (!(page = fopen(right->c_str(), "w"))) { | |
error(errIO, -1, "Couldn't open html file '{0:t}'", right); | |
delete right; | |
return; | |
} | |
delete right; | |
fputs(DOCTYPE, page); | |
fputs("<html>\n<head>\n<title></title>\n", page); | |
printCSS(page); | |
fputs("</head>\n<body>\n", page); | |
} | |
} | |
if (noframes) { | |
if (stout) { | |
page = stdout; | |
} else { | |
GooString *right = new GooString(fileName); | |
if (!xml) { | |
right->append(".html"); | |
} | |
if (xml) { | |
right->append(".xml"); | |
} | |
if (!(page = fopen(right->c_str(), "w"))) { | |
error(errIO, -1, "Couldn't open html file '{0:t}'", right); | |
delete right; | |
return; | |
} | |
delete right; | |
} | |
const std::string htmlEncoding = mapEncodingToHtml(globalParams->getTextEncodingName()); | |
if (xml) { | |
fprintf(page, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", htmlEncoding.c_str()); | |
fputs("<!DOCTYPE pdf2xml SYSTEM \"pdf2xml.dtd\">\n\n", page); | |
fprintf(page, "<pdf2xml producer=\"%s\" version=\"%s\">\n", PACKAGE_NAME, PACKAGE_VERSION); | |
} else { | |
fprintf(page, "%s\n<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"\" xml:lang=\"\">\n<head>\n<title>%s</title>\n", DOCTYPE, docTitle->c_str()); | |
fprintf(page, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"/>\n", htmlEncoding.c_str()); | |
dumpMetaVars(page); | |
printCSS(page); | |
fprintf(page, "</head>\n"); | |
fprintf(page, "<body bgcolor=\"#A0A0A0\" vlink=\"blue\" link=\"blue\">\n"); | |
} | |
} | |
ok = true; | |
} | |
HtmlOutputDev::~HtmlOutputDev() | |
{ | |
delete Docname; | |
delete docTitle; | |
for (auto entry : glMetaVars) { | |
delete entry; | |
} | |
if (fContentsFrame) { | |
fputs("</body>\n</html>\n", fContentsFrame); | |
fclose(fContentsFrame); | |
} | |
if (page != nullptr) { | |
if (xml) { | |
fputs("</pdf2xml>\n", page); | |
fclose(page); | |
} else if (!complexMode || xml || noframes) { | |
fputs("</body>\n</html>\n", page); | |
fclose(page); | |
} | |
} | |
if (pages) { | |
delete pages; | |
} | |
} | |
void HtmlOutputDev::startPage(int pageNumA, GfxState *state, XRef *xref) | |
{ | |
if (mode&&!xml){ | |
if (write){ | |
write=false; | |
GooString* fname=Dirname(Docname); | |
fname->append("image.log"); | |
if((tin=fopen(getFileNameFromPath(fname->c_str(),fname->getLength()),"w"))==NULL){ | |
printf("Error : can not open %s",fname); | |
exit(1); | |
} | |
delete fname; | |
// if(state->getRotation()!=0) | |
// fprintf(tin,"ROTATE=%d rotate %d neg %d neg translate\n",state->getRotation(),state->getX1(),-state->getY1()); | |
// else | |
fprintf(tin,"ROTATE=%d neg %d neg translate\n",state->getX1(),state->getY1()); | |
} | |
} | |
pageNum = pageNumA; | |
const std::string str = gbasename(Docname->c_str()); | |
pages->clear(); | |
if (!noframes) { | |
if (fContentsFrame) { | |
if (complexMode) { | |
fprintf(fContentsFrame, "<a href=\"%s-%d.html\"", str.c_str(), pageNum); | |
} else { | |
fprintf(fContentsFrame, "<a href=\"%ss.html#%d\"", str.c_str(), pageNum); | |
} | |
fprintf(fContentsFrame, " target=\"contents\" >Page %d</a><br/>\n", pageNum); | |
} | |
} | |
pages->pageWidth = static_cast<int>(state->getPageWidth()); | |
pages->pageHeight = static_cast<int>(state->getPageHeight()); | |
} | |
void HtmlOutputDev::endPage() | |
{ | |
std::unique_ptr<Links> linksList = docPage->getLinks(); | |
for (AnnotLink *link : linksList->getLinks()) { | |
doProcessLink(link); | |
} | |
pages->conv(); | |
pages->coalesce(); | |
pages->dump(page, pageNum, backgroundImages); | |
// I don't yet know what to do in the case when there are pages of different | |
// sizes and we want complex output: running ghostscript many times | |
// seems very inefficient. So for now I'll just use last page's size | |
maxPageWidth = pages->pageWidth; | |
maxPageHeight = pages->pageHeight; | |
// if(!noframes&&!xml) fputs("<br/>\n", fContentsFrame); | |
if (!stout && !globalParams->getErrQuiet()) { | |
printf("Page-%d\n", (pageNum)); | |
} | |
} | |
void HtmlOutputDev::addBackgroundImage(const std::string &img) | |
{ | |
backgroundImages.push_back(img); | |
} | |
void HtmlOutputDev::updateFont(GfxState *state) | |
{ | |
pages->updateFont(state); | |
} | |
void HtmlOutputDev::beginString(GfxState *state, const GooString *s) | |
{ | |
pages->beginString(state, s); | |
} | |
void HtmlOutputDev::endString(GfxState *state) | |
{ | |
pages->endString(); | |
} | |
void HtmlOutputDev::drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int /*nBytes*/, const Unicode *u, int uLen) | |
{ | |
if (!showHidden && (state->getRender() & 3) == 3) { | |
return; | |
} | |
pages->addChar(state, x, y, dx, dy, originX, originY, u, uLen); | |
} | |
void HtmlOutputDev::drawJpegImage(GfxState *state, Stream *str) | |
{ | |
InMemoryFile ims; | |
FILE *f1 = nullptr; | |
int c; | |
// open the image file | |
std::unique_ptr<GooString> fName = createImageFileName("jpg"); | |
f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb"); | |
if (!f1) { | |
error(errIO, -1, "Couldn't open image file '{0:t}'", fName.get()); | |
return; | |
} | |
// initialize stream | |
str = str->getNextStream(); | |
str->reset(); | |
// copy the stream | |
while ((c = str->getChar()) != EOF) { | |
fputc(c, f1); | |
} | |
fclose(f1); | |
if (dataUrls) { | |
fName = std::make_unique<GooString>(std::string("data:image/jpeg;base64,") + gbase64Encode(ims.getBuffer())); | |
} | |
pages->addImage(std::move(fName), state); | |
} | |
void HtmlOutputDev::drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask) | |
{ | |
FILE *f1; | |
InMemoryFile ims; | |
if (!colorMap && !isMask) { | |
error(errInternal, -1, "Can't have color image without a color map"); | |
return; | |
} | |
// open the image file | |
std::unique_ptr<GooString> fName = createImageFileName("png"); | |
f1 = dataUrls ? ims.open("wb") : fopen(fName->c_str(), "wb"); | |
if (!f1) { | |
error(errIO, -1, "Couldn't open image file '{0:t}'", fName.get()); | |
return; | |
} | |
PNGWriter *writer = new PNGWriter(isMask ? PNGWriter::MONOCHROME : PNGWriter::RGB); | |
// TODO can we calculate the resolution of the image? | |
if (!writer->init(f1, width, height, 72, 72)) { | |
error(errInternal, -1, "Can't init PNG for image '{0:t}'", fName.get()); | |
delete writer; | |
fclose(f1); | |
return; | |
} | |
if (!isMask) { | |
unsigned char *p; | |
GfxRGB rgb; | |
unsigned char *row = (unsigned char *)gmalloc(3 * width); // 3 bytes/pixel: RGB | |
unsigned char **row_pointer = &row; | |
// Initialize the image stream | |
ImageStream *imgStr = new ImageStream(str, width, colorMap->getNumPixelComps(), colorMap->getBits()); | |
imgStr->reset(); | |
// For each line... | |
for (int y = 0; y < height; y++) { | |
// Convert into a PNG row | |
p = imgStr->getLine(); | |
if (!p) { | |
error(errIO, -1, "Failed to read PNG. '{0:t}' will be incorrect", fName.get()); | |
gfree(row); | |
delete writer; | |
delete imgStr; | |
fclose(f1); | |
return; | |
} | |
for (int x = 0; x < width; x++) { | |
colorMap->getRGB(p, &rgb); | |
// Write the RGB pixels into the row | |
row[3 * x] = colToByte(rgb.r); | |
row[3 * x + 1] = colToByte(rgb.g); | |
row[3 * x + 2] = colToByte(rgb.b); | |
p += colorMap->getNumPixelComps(); | |
} | |
if (!writer->writeRow(row_pointer)) { | |
error(errIO, -1, "Failed to write into PNG '{0:t}'", fName.get()); | |
delete writer; | |
delete imgStr; | |
fclose(f1); | |
return; | |
} | |
} | |
gfree(row); | |
imgStr->close(); | |
delete imgStr; | |
} else { // isMask == true | |
int size = (width + 7) / 8; | |
// PDF masks use 0 = draw current color, 1 = leave unchanged. | |
// We invert this to provide the standard interpretation of alpha | |
// (0 = transparent, 1 = opaque). If the colorMap already inverts | |
// the mask we leave the data unchanged. | |
int invert_bits = 0xff; | |
if (colorMap) { | |
GfxGray gray; | |
unsigned char zero[gfxColorMaxComps]; | |
memset(zero, 0, sizeof(zero)); | |
colorMap->getGray(zero, &gray); | |
if (colToByte(gray) == 0) { | |
invert_bits = 0x00; | |
} | |
} | |
str->reset(); | |
unsigned char *png_row = (unsigned char *)gmalloc(size); | |
for (int ri = 0; ri < height; ++ri) { | |
for (int i = 0; i < size; i++) { | |
png_row[i] = str->getChar() ^ invert_bits; | |
} | |
if (!writer->writeRow(&png_row)) { | |
error(errIO, -1, "Failed to write into PNG '{0:t}'", fName.get()); | |
delete writer; | |
fclose(f1); | |
gfree(png_row); | |
return; | |
} | |
} | |
str->close(); | |
gfree(png_row); | |
} | |
str->close(); | |
writer->close(); | |
delete writer; | |
fclose(f1); | |
if (dataUrls) { | |
fName = std::make_unique<GooString>(std::string("data:image/png;base64,") + gbase64Encode(ims.getBuffer())); | |
} | |
pages->addImage(std::move(fName), state); | |
return; | |
} | |
std::unique_ptr<GooString> HtmlOutputDev::createImageFileName(const char *ext) | |
{ | |
return GooString::format("{0:s}-{1:d}_{2:d}.{3:s}", Docname->c_str(), pageNum, pages->getNumImages() + 1, ext); | |
} | |
void HtmlOutputDev::drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) | |
{ | |
if (ignore || (complexMode && !xml)) { | |
OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); | |
return; | |
} | |
// dump JPEG file | |
if (dumpJPEG && str->getKind() == strDCT) { | |
drawJpegImage(state, str); | |
} else { | |
drawPngImage(state, str, width, height, nullptr, true); | |
OutputDev::drawImageMask(state, ref, str, width, height, invert, interpolate, inlineImg); | |
} | |
} | |
void HtmlOutputDev::drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg) | |
{ | |
if (ignore || (complexMode && !xml)) { | |
OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); | |
return; | |
} | |
/*if( !globalParams->getErrQuiet() ) | |
printf("image stream of kind %d\n", str->getKind());*/ | |
// dump JPEG file | |
if (dumpJPEG && str->getKind() == strDCT && (colorMap->getNumPixelComps() == 1 || colorMap->getNumPixelComps() == 3) && !inlineImg) { | |
drawJpegImage(state, str); | |
} else { | |
drawPngImage(state, str, width, height, colorMap); | |
OutputDev::drawImage(state, ref, str, width, height, colorMap, interpolate, maskColors, inlineImg); | |
} | |
} | |
void HtmlOutputDev::doProcessLink(AnnotLink *link) | |
{ | |
double _x1, _y1, _x2, _y2; | |
int x1, y1, x2, y2; | |
link->getRect(&_x1, &_y1, &_x2, &_y2); | |
cvtUserToDev(_x1, _y1, &x1, &y1); | |
cvtUserToDev(_x2, _y2, &x2, &y2); | |
GooString *_dest = getLinkDest(link); | |
HtmlLink t((double)x1, (double)y2, (double)x2, (double)y1, _dest); | |
pages->AddLink(t); | |
delete _dest; | |
} | |
GooString *HtmlOutputDev::getLinkDest(AnnotLink *link) | |
{ | |
if (!link->getAction()) { | |
return new GooString(); | |
} | |
switch (link->getAction()->getKind()) { | |
case actionGoTo: { | |
int destPage = 1; | |
LinkGoTo *ha = (LinkGoTo *)link->getAction(); | |
std::unique_ptr<LinkDest> dest; | |
if (ha->getDest() != nullptr) { | |
dest = std::make_unique<LinkDest>(*ha->getDest()); | |
} else if (ha->getNamedDest() != nullptr) { | |
dest = catalog->findDest(ha->getNamedDest()); | |
} | |
if (dest) { | |
GooString *file = new GooString(gbasename(Docname->c_str())); | |
if (dest->isPageRef()) { | |
const Ref pageref = dest->getPageRef(); | |
destPage = catalog->findPage(pageref); | |
} else { | |
destPage = dest->getPageNum(); | |
} | |
/* complex simple | |
frames file-4.html files.html#4 | |
noframes file.html#4 file.html#4 | |
*/ | |
if (noframes) { | |
file->append(".html#"); | |
file->append(std::to_string(destPage)); | |
} else { | |
if (complexMode) { | |
file->append("-"); | |
file->append(std::to_string(destPage)); | |
file->append(".html"); | |
} else { | |
file->append("s.html#"); | |
file->append(std::to_string(destPage)); | |
} | |
} | |
if (printCommands) { | |
printf(" link to page %d ", destPage); | |
} | |
return file; | |
} else { | |
return new GooString(); | |
} | |
} | |
case actionGoToR: { | |
LinkGoToR *ha = (LinkGoToR *)link->getAction(); | |
LinkDest *dest = nullptr; | |
int destPage = 1; | |
GooString *file = new GooString(); | |
if (ha->getFileName()) { | |
delete file; | |
file = new GooString(ha->getFileName()->c_str()); | |
} | |
if (ha->getDest() != nullptr) { | |
dest = new LinkDest(*ha->getDest()); | |
} | |
if (dest && file) { | |
if (!(dest->isPageRef())) { | |
destPage = dest->getPageNum(); | |
} | |
delete dest; | |
if (printCommands) { | |
printf(" link to page %d ", destPage); | |
} | |
if (printHtml) { | |
const char *p = file->c_str() + file->getLength() - 4; | |
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { | |
file->del(file->getLength() - 4, 4); | |
file->append(".html"); | |
} | |
file->append('#'); | |
file->append(std::to_string(destPage)); | |
} | |
} | |
if (printCommands && file) { | |
printf("filename %s\n", file->c_str()); | |
} | |
return file; | |
} | |
case actionURI: { | |
LinkURI *ha = (LinkURI *)link->getAction(); | |
GooString *file = new GooString(ha->getURI()); | |
// printf("uri : %s\n",file->c_str()); | |
return file; | |
} | |
case actionLaunch: | |
if (printHtml) { | |
LinkLaunch *ha = (LinkLaunch *)link->getAction(); | |
GooString *file = new GooString(ha->getFileName()->c_str()); | |
const char *p = file->c_str() + file->getLength() - 4; | |
if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { | |
file->del(file->getLength() - 4, 4); | |
file->append(".html"); | |
} | |
if (printCommands) { | |
printf("filename %s", file->c_str()); | |
} | |
return file; | |
} | |
// fallthrough | |
default: | |
return new GooString(); | |
} | |
} | |
void HtmlOutputDev::dumpMetaVars(FILE *file) | |
{ | |
GooString *var; | |
for (const HtmlMetaVar *t : glMetaVars) { | |
var = t->toString(); | |
fprintf(file, "%s\n", var->c_str()); | |
delete var; | |
} | |
} | |
bool HtmlOutputDev::dumpDocOutline(PDFDoc *doc) | |
{ | |
FILE *output = nullptr; | |
bool bClose = false; | |
if (!ok) { | |
return false; | |
} | |
Outline *outline = doc->getOutline(); | |
if (!outline) { | |
return false; | |
} | |
const std::vector<OutlineItem *> *outlines = outline->getItems(); | |
if (!outlines) { | |
return false; | |
} | |
if (!complexMode || xml) { | |
output = page; | |
} else if (complexMode && !xml) { | |
if (noframes) { | |
output = page; | |
fputs("<hr/>\n", output); | |
} else { | |
GooString *str = Docname->copy(); | |
str->append("-outline.html"); | |
output = fopen(str->c_str(), "w"); | |
delete str; | |
if (output == nullptr) { | |
return false; | |
} | |
bClose = true; | |
const std::string htmlEncoding = HtmlOutputDev::mapEncodingToHtml(globalParams->getTextEncodingName()); | |
fprintf(output, | |
"<html xmlns=\"http://www.w3.org/1999/xhtml\" " | |
"lang=\"\" xml:lang=\"\">\n" | |
"<head>\n" | |
"<title>Document Outline</title>\n" | |
"<meta http-equiv=\"Content-Type\" content=\"text/html; " | |
"charset=%s\"/>\n" | |
"</head>\n<body>\n", | |
htmlEncoding.c_str()); | |
} | |
} | |
if (!xml) { | |
bool done = newHtmlOutlineLevel(output, outlines); | |
if (done && !complexMode) { | |
fputs("<hr/>\n", output); | |
} | |
if (bClose) { | |
fputs("</body>\n</html>\n", output); | |
fclose(output); | |
} | |
} else { | |
newXmlOutlineLevel(output, outlines); | |
} | |
return true; | |
} | |
bool HtmlOutputDev::newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level) | |
{ | |
bool atLeastOne = false; | |
if (level == 1) { | |
fputs("<a name=\"outline\"></a>", output); | |
fputs("<h1>Document Outline</h1>\n", output); | |
} | |
fputs("<ul>\n", output); | |
for (OutlineItem *item : *outlines) { | |
const auto &title = item->getTitle(); | |
std::unique_ptr<GooString> titleStr = HtmlFont::HtmlFilter(title.data(), title.size()); | |
GooString *linkName = nullptr; | |
const int itemPage = getOutlinePageNum(item); | |
if (itemPage > 0) { | |
/* complex simple | |
frames file-4.html files.html#4 | |
noframes file.html#4 file.html#4 | |
*/ | |
linkName = new GooString(gbasename(Docname->c_str())); | |
if (noframes) { | |
linkName->append(".html#"); | |
linkName->append(std::to_string(itemPage)); | |
} else { | |
if (complexMode) { | |
linkName->append("-"); | |
linkName->append(std::to_string(itemPage)); | |
linkName->append(".html"); | |
} else { | |
linkName->append("s.html#"); | |
linkName->append(std::to_string(itemPage)); | |
} | |
} | |
} | |
fputs("<li>", output); | |
if (linkName) { | |
fprintf(output, "<a href=\"%s\">", linkName->c_str()); | |
} | |
if (titleStr) { | |
fputs(titleStr->c_str(), output); | |
} | |
if (linkName) { | |
fputs("</a>", output); | |
delete linkName; | |
} | |
atLeastOne = true; | |
item->open(); | |
if (item->hasKids() && item->getKids()) { | |
fputs("\n", output); | |
newHtmlOutlineLevel(output, item->getKids(), level + 1); | |
} | |
fputs("</li>\n", output); | |
} | |
fputs("</ul>\n", output); | |
return atLeastOne; | |
} | |
void HtmlOutputDev::newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines) | |
{ | |
fputs("<outline>\n", output); | |
for (OutlineItem *item : *outlines) { | |
const std::vector<Unicode> &title = item->getTitle(); | |
auto titleStr = HtmlFont::HtmlFilter(title.data(), title.size()); | |
const int itemPage = getOutlinePageNum(item); | |
if (itemPage > 0) { | |
fprintf(output, "<item page=\"%d\">%s</item>\n", itemPage, titleStr->c_str()); | |
} else { | |
fprintf(output, "<item>%s</item>\n", titleStr->c_str()); | |
} | |
item->open(); | |
if (item->hasKids() && item->getKids()) { | |
newXmlOutlineLevel(output, item->getKids()); | |
} | |
} | |
fputs("</outline>\n", output); | |
} | |
int HtmlOutputDev::getOutlinePageNum(OutlineItem *item) | |
{ | |
const LinkAction *action = item->getAction(); | |
const LinkGoTo *link = nullptr; | |
std::unique_ptr<LinkDest> linkdest; | |
int pagenum = -1; | |
if (!action || action->getKind() != actionGoTo) { | |
return pagenum; | |
} | |
link = static_cast<const LinkGoTo *>(action); | |
if (!link || !link->isOk()) { | |
return pagenum; | |
} | |
if (link->getDest()) { | |
linkdest = std::make_unique<LinkDest>(*link->getDest()); | |
} else if (link->getNamedDest()) { | |
linkdest = catalog->findDest(link->getNamedDest()); | |
} | |
if (!linkdest) { | |
return pagenum; | |
} | |
if (linkdest->isPageRef()) { | |
const Ref pageref = linkdest->getPageRef(); | |
pagenum = catalog->findPage(pageref); | |
} else { | |
pagenum = linkdest->getPageNum(); | |
} | |
return pagenum; | |
} | |