File size: 11,492 Bytes
5cee033
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
//========================================================================
//
// HtmlOutputDev.h
//
// Copyright 1997 Derek B. Noonburg
//
// Changed 1999 by G.Ovtcharov
//========================================================================

//========================================================================
//
// Modified under the Poppler project - http://poppler.freedesktop.org
//
// All changes made under the Poppler project to this file are licensed
// under GPL version 2 or later
//
// Copyright (C) 2006, 2007, 2009, 2012, 2018-2022 Albert Astals Cid <[email protected]>
// Copyright (C) 2008, 2009 Warren Toomey <[email protected]>
// Copyright (C) 2009, 2011 Carlos Garcia Campos <[email protected]>
// Copyright (C) 2009 Kovid Goyal <[email protected]>
// Copyright (C) 2010 Hib Eris <[email protected]>
// Copyright (C) 2011 Joshua Richardson <[email protected]>
// Copyright (C) 2011 Stephen Reichling <[email protected]>
// Copyright (C) 2012 Igor Slepchin <[email protected]>
// Copyright (C) 2012 Fabio D'Urso <[email protected]>
// Copyright (C) 2013 Thomas Freitag <[email protected]>
// Copyright (C) 2018 Klarälvdalens Datakonsult AB, a KDAB Group company, <[email protected]>. Work sponsored by the LiMux project of the city of Munich
// Copyright (C) 2019 Oliver Sander <[email protected]>
// Copyright (C) 2024 g10 Code GmbH, Author: Sune Stolborg Vuorela <[email protected]>
//
// To see a description of the changes please see the Changelog file that
// came with your tarball or type make ChangeLog if you are building from git
//
//========================================================================

#ifndef HTMLOUTPUTDEV_H
#define HTMLOUTPUTDEV_H

#include <cstdio>
#include "goo/gbasename.h"
#include "GfxFont.h"
#include "OutputDev.h"
#include "HtmlLinks.h"
#include "HtmlFonts.h"
#include "Link.h"
#include "Catalog.h"
#include "UnicodeMap.h"

#define xoutRound(x) ((int)(x + 0.5))

#define DOCTYPE "<!DOCTYPE html>"

class GfxState;
class GooString;
class HtmlImage;
class PDFDoc;
class OutlineItem;
//------------------------------------------------------------------------
// HtmlString
//------------------------------------------------------------------------

enum UnicodeTextDirection
{
    textDirUnknown,
    textDirLeftRight,
    textDirRightLeft,
    textDirTopBottom
};

class HtmlString
{
public:
    // Constructor.
    HtmlString(GfxState *state, double fontSize, HtmlFontAccu *fonts);

    // Destructor.
    ~HtmlString();

    HtmlString(const HtmlString &) = delete;
    HtmlString &operator=(const HtmlString &) = delete;

    // Add a character to the string.
    void addChar(GfxState *state, double x, double y, double dx, double dy, Unicode u);
    const HtmlLink *getLink() const { return link; }
    const HtmlFont &getFont() const { return *fonts->Get(fontpos); }
    void endString(); // postprocessing

private:
    // aender die text variable
    const HtmlLink *link;
    double xMin, xMax; // bounding box x coordinates
    double yMin, yMax; // bounding box y coordinates
    int col; // starting column
    Unicode *text; // the text
    double *xRight; // right-hand x coord of each char
    HtmlString *yxNext; // next string in y-major order
    HtmlString *xyNext; // next string in x-major order
    int fontpos;
    std::unique_ptr<GooString> htext;
    int len; // length of text and xRight
    int size; // size of text and xRight arrays
    UnicodeTextDirection dir; // direction (left to right/right to left)
    HtmlFontAccu *fonts;

    friend class HtmlPage;
};

//------------------------------------------------------------------------
// HtmlPage
//------------------------------------------------------------------------

class HtmlPage
{
public:
    // Constructor.
    explicit HtmlPage(bool rawOrder);

    // Destructor.
    ~HtmlPage();

    HtmlPage(const HtmlPage &) = delete;
    HtmlPage &operator=(const HtmlPage &) = delete;

    // Begin a new string.
    void beginString(GfxState *state, const GooString *s);

    // Add a character to the current string.
    void addChar(GfxState *state, double x, double y, double dx, double dy, double ox, double oy, const Unicode *u, int uLen); // unsigned char c);

    void updateFont(GfxState *state);

    // End the current string, sorting it into the list of strings.
    void endString();

    // Coalesce strings that look like parts of the same line.
    void coalesce();

    // Find a string.  If <top> is true, starts looking at top of page;
    // otherwise starts looking at <xMin>,<yMin>.  If <bottom> is true,
    // stops looking at bottom of page; otherwise stops looking at
    // <xMax>,<yMax>.  If found, sets the text bounding rectangle and
    // returns true; otherwise returns false.

    // new functions
    void AddLink(const HtmlLink &x) { links->AddLink(x); }

    // add an image to the current page
    void addImage(std::unique_ptr<GooString> &&fname, GfxState *state);

    // number of images on the current page
    int getNumImages() { return imgList.size(); }

    void dump(FILE *f, int pageNum, const std::vector<std::string> &backgroundImages);

    // Clear the page.
    void clear();

    void conv();

private:
    const HtmlFont *getFont(HtmlString *hStr) const { return fonts->Get(hStr->fontpos); }

    double fontSize; // current font size
    bool rawOrder; // keep strings in content stream order

    HtmlString *curStr; // currently active string

    HtmlString *yxStrings; // strings in y-major order
    HtmlString *xyStrings; // strings in x-major order
    HtmlString *yxCur1, *yxCur2; // cursors for yxStrings list

    void setDocName(const char *fname);
    void dumpAsXML(FILE *f, int page);
    void dumpComplex(FILE *f, int page, const std::vector<std::string> &backgroundImages);
    int dumpComplexHeaders(FILE *const file, FILE *&pageFile, int page);

    // marks the position of the fonts that belong to current page (for noframes)
    int fontsPageMarker;
    HtmlFontAccu *fonts;
    HtmlLinks *links;
    std::vector<HtmlImage *> imgList;

    GooString *DocName;
    int pageWidth;
    int pageHeight;
    int firstPage; // used to begin the numeration of pages

    friend class HtmlOutputDev;
};

//------------------------------------------------------------------------
// HtmlMetaVar
//------------------------------------------------------------------------
class HtmlMetaVar
{
public:
    HtmlMetaVar(const char *_name, const char *_content);
    ~HtmlMetaVar();

    HtmlMetaVar(const HtmlMetaVar &) = delete;
    HtmlMetaVar &operator=(const HtmlMetaVar &) = delete;

    GooString *toString() const;

private:
    GooString *name;
    GooString *content;
};

//------------------------------------------------------------------------
// HtmlOutputDev
//------------------------------------------------------------------------

class HtmlOutputDev : public OutputDev
{
public:
    // Open a text output file.  If <fileName> is nullptr, no file is written
    // (this is useful, e.g., for searching text).  If <useASCII7> is true,
    // text is converted to 7-bit ASCII; otherwise, text is converted to
    // 8-bit ISO Latin-1.  <useASCII7> should also be set for Japanese
    // (EUC-JP) text.  If <rawOrder> is true, the text is kept in content
    // stream order.
    HtmlOutputDev(Catalog *catalogA, const char *fileName, const char *title, const char *author, const char *keywords, const char *subject, const char *date, bool rawOrder, int firstPage = 1, bool outline = false);

    // Destructor.
    ~HtmlOutputDev() override;

    // Check if file was successfully created.
    virtual bool isOk() { return ok; }

    //---- get info about output device

    // Does this device use upside-down coordinates?
    // (Upside-down means (0,0) is the top left corner of the page.)
    bool upsideDown() override { return true; }

    // Does this device use drawChar() or drawString()?
    bool useDrawChar() override { return true; }

    // Does this device use beginType3Char/endType3Char?  Otherwise,
    // text in Type 3 fonts will be drawn with drawChar/drawString.
    bool interpretType3Chars() override { return false; }

    // Does this device need non-text content?
    bool needNonText() override { return true; }

    //----- initialization and control

    bool checkPageSlice(Page *p, double hDPI, double vDPI, int rotate, bool useMediaBox, bool crop, int sliceX, int sliceY, int sliceW, int sliceH, bool printing, bool (*abortCheckCbk)(void *data) = nullptr,
                        void *abortCheckCbkData = nullptr, bool (*annotDisplayDecideCbk)(Annot *annot, void *user_data) = nullptr, void *annotDisplayDecideCbkData = nullptr) override
    {
        docPage = p;
        return true;
    }

    // Start a page.
    void startPage(int pageNum, GfxState *state, XRef *xref) override;

    // End a page.
    void endPage() override;

    // add a background image to the list of background images,
    // as this seems to be done outside other processing. takes ownership of img.
    void addBackgroundImage(const std::string &img);

    //----- update text state
    void updateFont(GfxState *state) override;

    //----- text drawing
    void beginString(GfxState *state, const GooString *s) override;
    void endString(GfxState *state) override;
    void drawChar(GfxState *state, double x, double y, double dx, double dy, double originX, double originY, CharCode code, int nBytes, const Unicode *u, int uLen) override;

    void drawImageMask(GfxState *state, Object *ref, Stream *str, int width, int height, bool invert, bool interpolate, bool inlineImg) override;
    void drawImage(GfxState *state, Object *ref, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool interpolate, const int *maskColors, bool inlineImg) override;

    // new feature
    virtual int DevType() { return 1234; }

    int getPageWidth() { return maxPageWidth; }
    int getPageHeight() { return maxPageHeight; }

    bool dumpDocOutline(PDFDoc *doc);

private:
    // convert encoding into a HTML standard, or encoding->c_str if not
    // recognized.
    static std::string mapEncodingToHtml(const std::string &encoding);
    void doProcessLink(AnnotLink *link);
    GooString *getLinkDest(AnnotLink *link);
    void dumpMetaVars(FILE *);
    void doFrame(int firstPage);
    bool newHtmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines, int level = 1);
    void newXmlOutlineLevel(FILE *output, const std::vector<OutlineItem *> *outlines);
    int getOutlinePageNum(OutlineItem *item);
    void drawJpegImage(GfxState *state, Stream *str);
    void drawPngImage(GfxState *state, Stream *str, int width, int height, GfxImageColorMap *colorMap, bool isMask = false);
    std::unique_ptr<GooString> createImageFileName(const char *ext);

    FILE *fContentsFrame;
    FILE *page; // html file
    // FILE *tin;                    // image log file
    // bool write;
    bool needClose; // need to close the file?
    HtmlPage *pages; // text for the current page
    bool rawOrder; // keep text in content stream order
    bool doOutline; // output document outline
    bool ok; // set up ok?
    bool dumpJPEG;
    int pageNum;
    int maxPageWidth;
    int maxPageHeight;
    GooString *Docname;
    GooString *docTitle;
    std::vector<HtmlMetaVar *> glMetaVars;
    Catalog *catalog;
    Page *docPage;
    std::vector<std::string> backgroundImages;
    friend class HtmlPage;
};

#endif