Spaces:
Running
Running
File size: 11,492 Bytes
5cee033 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 |
//========================================================================
//
// HtmlOutputDev.h
//
// Copyright 1997 Derek B. Noonburg
//
// Changed 1999 by G.Ovtcharov
//========================================================================
//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2006, 2007, 2009, 2012, 2018-2022 Albert Astals Cid <[email protected]>
// Copyright (C) 2008, 2009 Warren Toomey <[email protected]>
// Copyright (C) 2009, 2011 Carlos Garcia Campos <[email protected]>
// Copyright (C) 2009 Kovid Goyal <[email protected]>
// Copyright (C) 2010 Hib Eris <[email protected]>
// Copyright (C) 2011 Joshua Richardson <[email protected]>
// Copyright (C) 2011 Stephen Reichling <[email protected]>
// Copyright (C) 2012 Igor Slepchin <[email protected]>
// Copyright (C) 2012 Fabio D'Urso <[email protected]>
// Copyright (C) 2013 Thomas Freitag <[email protected]>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2019 Oliver Sander <[email protected]>
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <[email protected]>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================
#ifndef HTMLOUTPUTDEV_H
#define HTMLOUTPUTDEV_H
#include <cstdio>
#include "goo/gbasename.h"
#include "GfxFont.h"
#include "OutputDev.h"
#include "HtmlLinks.h"
#include "HtmlFonts.h"
#include "Link.h"
#include "Catalog.h"
#include "UnicodeMap.h"
#define xoutRound(x) ((int)(x + 0.5))
#define DOCTYPE "<!DOCTYPE html>"
class GfxState;
class GooString;
class HtmlImage;
class PDFDoc;
class OutlineItem;
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------
enum UnicodeTextDirection
{
textDirUnknown,
textDirLeftRight,
textDirRightLeft,
textDirTopBottom
};
class HtmlString
{
public:
// Constructor.
HtmlString(GfxState *state, double fontSize, HtmlFontAccu *fonts);
// Destructor.
~HtmlString();
HtmlString(const HtmlString &) = delete;
HtmlString &operator=(const HtmlString &) = delete;
// Add a character to the string.
void addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u);
const HtmlLink *getLink() const { return link; }
const HtmlFont &getFont() const { return *fonts->Get(fontpos); }
void endString(); // postprocessing
private:
// aender die text variable
const HtmlLink *link;
double xMin, xMax; // bounding box x coordinates
double yMin, yMax; // bounding box y coordinates
int col; // starting column
Unicode *text; // the text
double *xRight; // right-hand x coord of each char
HtmlString *yxNext; // next string in y-major order
HtmlString *xyNext; // next string in x-major order
int fontpos;
std::unique_ptr<GooString> htext;
int len; // length of text and xRight
int size; // size of text and xRight arrays
UnicodeTextDirection dir; // direction (left to right/right to left)
HtmlFontAccu *fonts;
friend class HtmlPage;
};
//------------------------------------------------------------------------
// HtmlPage
//------------------------------------------------------------------------
class HtmlPage
{
public:
// Constructor.
explicit HtmlPage(bool rawOrder);
// Destructor.
~HtmlPage();
HtmlPage(const HtmlPage &) = delete;
HtmlPage &operator=(const HtmlPage &) = delete;
// Begin a new string.
void beginString(GfxState *state, const GooString *s);
// Add a character to the current string.
void addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen); // unsigned char c);
void updateFont(GfxState *state);
// End the current string, sorting it into the list of strings.
void endString();
// Coalesce strings that look like parts of the same line.
void coalesce();
// Find a string. If <top> is true, starts looking at top of page;
// otherwise starts looking at <xMin>,<yMin>. If <bottom> is true,
// stops looking at bottom of page; otherwise stops looking at
// <xMax>,<yMax>. If found, sets the text bounding rectangle and
// returns true; otherwise returns false.
// new functions
void AddLink(const HtmlLink &x) { links->AddLink(x); }
// add an image to the current page
void addImage(std::unique_ptr<GooString> &&fname, GfxState *state);
// number of images on the current page
int getNumImages() { return imgList.size(); }
void dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages);
// Clear the page.
void clear();
void conv();
private:
const HtmlFont *getFont(HtmlString *hStr) const { return fonts->Get(hStr->fontpos); }
double fontSize; // current font size
bool rawOrder; // keep strings in content stream order
HtmlString *curStr; // currently active string
HtmlString *yxStrings; // strings in y-major order
HtmlString *xyStrings; // strings in x-major order
HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list
void setDocName(const char *fname);
void dumpAsXML(FILE *f, int page);
void dumpComplex(FILE *f, int page, const std::vector<std::string> &backgroundImages);
int dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page);
// marks the position of the fonts that belong to current page (for noframes)
int fontsPageMarker;
HtmlFontAccu *fonts;
HtmlLinks *links;
std::vector<HtmlImage *> imgList;
GooString *DocName;
int pageWidth;
int pageHeight;
int firstPage; // used to begin the numeration of pages
friend class HtmlOutputDev;
};
//------------------------------------------------------------------------
// HtmlMetaVar
//------------------------------------------------------------------------
class HtmlMetaVar
{
public:
HtmlMetaVar(const char *_name, const char *_content);
~HtmlMetaVar();
HtmlMetaVar(const HtmlMetaVar &) = delete;
HtmlMetaVar &operator=(const HtmlMetaVar &) = delete;
GooString *toString() const;
private:
GooString *name;
GooString *content;
};
//------------------------------------------------------------------------
// HtmlOutputDev
//------------------------------------------------------------------------
class HtmlOutputDev : public OutputDev
{
public:
// Open a text output file. If <fileName> is nullptr, no file is written
// (this is useful, e.g., for searching text). If <useASCII7> is true,
// text is converted to 7-bit ASCII; otherwise, text is converted to
// 8-bit ISO Latin-1. <useASCII7> should also be set for Japanese
// (EUC-JP) text. If <rawOrder> is true, the text is kept in content
// stream order.
HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrder, int firstPage = 1, bool outline = false);
// Destructor.
~HtmlOutputDev() override;
// Check if file was successfully created.
virtual bool isOk() { return ok; }
//---- get info about output device
// Does this device use upside-down coordinates?
// (Upside-down means (0,0) is the top left corner of the page.)
bool upsideDown() override { return true; }
// Does this device use drawChar() or drawString()?
bool useDrawChar() override { return true; }
// Does this device use beginType3Char/endType3Char? Otherwise,
// text in Type 3 fonts will be drawn with drawChar/drawString.
bool interpretType3Chars() override { return false; }
// Does this device need non-text content?
bool needNonText() override { return true; }
//----- initialization and control
bool checkPageSlice(Page *p, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, int sliceX, int sliceY, int sliceW, int sliceH, bool printing, bool (*abortCheckCbk)(void *data) = nullptr,
void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr) override
{
docPage = p;
return true;
}
// Start a page.
void startPage(int pageNum, GfxState *state, XRef *xref) override;
// End a page.
void endPage() override;
// add a background image to the list of background images,
// as this seems to be done outside other processing. takes ownership of img.
void addBackgroundImage(const std::string &img);
//----- update text state
void updateFont(GfxState *state) override;
//----- text drawing
void beginString(GfxState *state, const GooString *s) override;
void endString(GfxState *state) override;
void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, const Unicode *u, int uLen) override;
void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) override;
void drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg) override;
// new feature
virtual int DevType() { return 1234; }
int getPageWidth() { return maxPageWidth; }
int getPageHeight() { return maxPageHeight; }
bool dumpDocOutline(PDFDoc *doc);
private:
// convert encoding into a HTML standard, or encoding->c_str if not
// recognized.
static std::string mapEncodingToHtml(const std::string &encoding);
void doProcessLink(AnnotLink *link);
GooString *getLinkDest(AnnotLink *link);
void dumpMetaVars(FILE *);
void doFrame(int firstPage);
bool newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level = 1);
void newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines);
int getOutlinePageNum(OutlineItem *item);
void drawJpegImage(GfxState *state, Stream *str);
void drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask = false);
std::unique_ptr<GooString> createImageFileName(const char *ext);
FILE *fContentsFrame;
FILE *page; // html file
// FILE *tin; // image log file
// bool write;
bool needClose; // need to close the file?
HtmlPage *pages; // text for the current page
bool rawOrder; // keep text in content stream order
bool doOutline; // output document outline
bool ok; // set up ok?
bool dumpJPEG;
int pageNum;
int maxPageWidth;
int maxPageHeight;
GooString *Docname;
GooString *docTitle;
std::vector<HtmlMetaVar *> glMetaVars;
Catalog *catalog;
Page *docPage;
std::vector<std::string> backgroundImages;
friend class HtmlPage;
};
#endif
|