Spaces:
Running
Running
File size: 6,364 Bytes
5cee033 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
#include <QtCore/QScopedPointer>
#include <QtTest/QTest>
#include <poppler-private.h>
#include <cstring>
#include "GlobalParams.h"
#include "UnicodeTypeTable.h"
#include "UTF.h"
class TestUTFConversion : public QObject
{
Q_OBJECT
public:
explicit TestUTFConversion(QObject *parent = nullptr) : QObject(parent) { }
private slots:
void testUTF_data();
void testUTF();
void testUnicodeToAscii7();
void testUnicodeLittleEndian();
};
static bool compare(const char *a, const char *b)
{
return strcmp(a, b) == 0;
}
static bool compare(const uint16_t *a, const uint16_t *b)
{
while (*a && *b) {
if (*a++ != *b++) {
return false;
}
}
return *a == *b;
}
static bool compare(const Unicode *a, const char *b, int len)
{
for (int i = 0; i < len; i++) {
if (a[i] != (Unicode)b[i]) {
return false;
}
}
return true;
}
static bool compare(const Unicode *a, const uint16_t *b, int len)
{
for (int i = 0; i < len; i++) {
if (a[i] != b[i]) {
return false;
}
}
return true;
}
void TestUTFConversion::testUTF_data()
{
QTest::addColumn<QString>("s");
QTest::newRow("<empty>") << QString(QLatin1String(""));
QTest::newRow("a") << QStringLiteral("a");
QTest::newRow("abc") << QStringLiteral("abc");
QTest::newRow("Latin") << QStringLiteral("Vitrum edere possum; mihi non nocet");
QTest::newRow("Greek") << QStringLiteral("Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα");
QTest::newRow("Icelandic") << QStringLiteral("Ég get etið gler án þess að meiða mig");
QTest::newRow("Russian") << QStringLiteral("Я могу есть стекло, оно мне не вредит.");
QTest::newRow("Sanskrit") << QStringLiteral("काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥");
QTest::newRow("Arabic") << QStringLiteral("أنا قادر على أكل الزجاج و هذا لا يؤلمني");
QTest::newRow("Chinese") << QStringLiteral("我能吞下玻璃而不伤身体。");
QTest::newRow("Thai") << QStringLiteral("ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ");
QTest::newRow("non BMP") << QStringLiteral("𝓹𝓸𝓹𝓹𝓵𝓮𝓻");
}
void TestUTFConversion::testUTF()
{
char utf8Buf[1000];
char *utf8String;
uint16_t utf16Buf[1000];
uint16_t *utf16String;
int len;
QFETCH(QString, s);
char *str = strdup(s.toUtf8().constData());
// UTF-8 to UTF-16
len = utf8CountUtf16CodeUnits(str);
QCOMPARE(len, s.size()); // QString size() returns number of code units, not code points
Q_ASSERT(len < (int)sizeof(utf16Buf)); // if this fails, make utf16Buf larger
len = utf8ToUtf16(str, utf16Buf, sizeof(utf16Buf), INT_MAX);
QVERIFY(compare(utf16Buf, s.utf16()));
QCOMPARE(len, s.size());
utf16String = utf8ToUtf16(str);
QVERIFY(compare(utf16String, s.utf16()));
free(utf16String);
std::string sUtf8(str);
std::string gsUtf16_a(utf8ToUtf16WithBom(sUtf8));
std::unique_ptr<GooString> gsUtf16_b(Poppler::QStringToUnicodeGooString(s));
QCOMPARE(gsUtf16_b->cmp(gsUtf16_a), 0);
// UTF-16 to UTF-8
len = utf16CountUtf8Bytes(s.utf16());
QCOMPARE(len, (int)strlen(str));
Q_ASSERT(len < (int)sizeof(utf8Buf)); // if this fails, make utf8Buf larger
len = utf16ToUtf8(s.utf16(), utf8Buf);
QVERIFY(compare(utf8Buf, str));
QCOMPARE(len, (int)strlen(str));
utf8String = utf16ToUtf8(s.utf16());
QVERIFY(compare(utf8String, str));
free(utf8String);
free(str);
}
void TestUTFConversion::testUnicodeToAscii7()
{
globalParams = std::make_unique<GlobalParams>();
// Test string is one 'Registered' and twenty 'Copyright' chars
// so it's long enough to reproduce the bug given that glibc
// malloc() always returns 8-byte aligned memory addresses.
GooString *goo = Poppler::QStringToUnicodeGooString(QString::fromUtf8("®©©©©©©©©©©©©©©©©©©©©")); // clazy:exclude=qstring-allocations
const std::vector<Unicode> in = TextStringToUCS4(goo->toStr());
delete goo;
int in_norm_len;
int *in_norm_idx;
Unicode *in_norm = unicodeNormalizeNFKC(in.data(), in.size(), &in_norm_len, &in_norm_idx, true);
Unicode *out;
int out_len;
int *out_ascii_idx;
unicodeToAscii7(in_norm, in_norm_len, &out, &out_len, in_norm_idx, &out_ascii_idx);
free(in_norm);
free(in_norm_idx);
// ascii7 conversion: ® -> (R) © -> (c)
const char *expected_ascii = (char *)"(R)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)(c)";
QCOMPARE(out_len, (int)strlen(expected_ascii));
QVERIFY(compare(out, expected_ascii, out_len));
free(out);
free(out_ascii_idx);
}
void TestUTFConversion::testUnicodeLittleEndian()
{
uint16_t UTF16LE_hi[5] { 0xFFFE, 0x4800, 0x4900, 0x2100, 0x1126 }; // UTF16-LE "HI!☑"
std::string GooUTF16LE(reinterpret_cast<const char *>(UTF16LE_hi), sizeof(UTF16LE_hi));
uint16_t UTF16BE_hi[5] { 0xFEFF, 0x0048, 0x0049, 0x0021, 0x2611 }; // UTF16-BE "HI!☑"
std::string GooUTF16BE(reinterpret_cast<const char *>(UTF16BE_hi), sizeof(UTF16BE_hi));
// Let's assert both GooString's are different
QVERIFY(GooUTF16LE != GooUTF16BE);
const std::vector<Unicode> UCS4fromLE = TextStringToUCS4(GooUTF16LE);
const std::vector<Unicode> UCS4fromBE = TextStringToUCS4(GooUTF16BE);
// len is 4 because TextStringToUCS4() removes the two leading Byte Order Mark (BOM) code points
QCOMPARE(UCS4fromLE.size(), UCS4fromBE.size());
QCOMPARE(UCS4fromLE.size(), 4);
// Check that now after conversion, UCS4fromLE and UCS4fromBE are now the same
for (size_t i = 0; i < UCS4fromLE.size(); i++) {
QCOMPARE(UCS4fromLE[i], UCS4fromBE[i]);
}
const QString expected = QStringLiteral("HI!☑");
// Do some final verifications, checking the strings to be "HI!"
QVERIFY(UCS4fromLE == UCS4fromBE);
QVERIFY(compare(UCS4fromLE.data(), expected.utf16(), UCS4fromLE.size()));
QVERIFY(compare(UCS4fromBE.data(), expected.utf16(), UCS4fromBE.size()));
}
QTEST_GUILESS_MAIN(TestUTFConversion)
#include "check_utf_conversion.moc"
|