Spaces:
Sleeping
Sleeping
// © 2016 and later: Unicode, Inc. and others. | |
// License & terms of use: http://www.unicode.org/copyright.html | |
/* | |
******************************************************************************* | |
* Copyright (C) 2010-2012, International Business Machines | |
* Corporation and others. All Rights Reserved. | |
******************************************************************************* | |
* file name: idna.h | |
* encoding: UTF-8 | |
* tab size: 8 (not used) | |
* indentation:4 | |
* | |
* created on: 2010mar05 | |
* created by: Markus W. Scherer | |
*/ | |
/** | |
* \file | |
* \brief C++ API: Internationalizing Domain Names in Applications (IDNA) | |
*/ | |
U_NAMESPACE_BEGIN | |
class IDNAInfo; | |
/** | |
* Abstract base class for IDNA processing. | |
* See http://www.unicode.org/reports/tr46/ | |
* and http://www.ietf.org/rfc/rfc3490.txt | |
* | |
* The IDNA class is not intended for public subclassing. | |
* | |
* This C++ API currently only implements UTS #46. | |
* The uidna.h C API implements both UTS #46 (functions using UIDNA service object) | |
* and IDNA2003 (functions that do not use a service object). | |
* @stable ICU 4.6 | |
*/ | |
class U_COMMON_API IDNA : public UObject { | |
public: | |
/** | |
* Destructor. | |
* @stable ICU 4.6 | |
*/ | |
~IDNA(); | |
/** | |
* Returns an IDNA instance which implements UTS #46. | |
* Returns an unmodifiable instance, owned by the caller. | |
* Cache it for multiple operations, and delete it when done. | |
* The instance is thread-safe, that is, it can be used concurrently. | |
* | |
* UTS #46 defines Unicode IDNA Compatibility Processing, | |
* updated to the latest version of Unicode and compatible with both | |
* IDNA2003 and IDNA2008. | |
* | |
* The worker functions use transitional processing, including deviation mappings, | |
* unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE | |
* is used in which case the deviation characters are passed through without change. | |
* | |
* Disallowed characters are mapped to U+FFFD. | |
* | |
* For available options see the uidna.h header. | |
* Operations with the UTS #46 instance do not support the | |
* UIDNA_ALLOW_UNASSIGNED option. | |
* | |
* By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped). | |
* When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than | |
* letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD. | |
* | |
* @param options Bit set to modify the processing and error checking. | |
* See option bit set values in uidna.h. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return the UTS #46 IDNA instance, if successful | |
* @stable ICU 4.6 | |
*/ | |
static IDNA * | |
createUTS46Instance(uint32_t options, UErrorCode &errorCode); | |
/** | |
* Converts a single domain name label into its ASCII form for DNS lookup. | |
* If any processing step fails, then info.hasErrors() will be true and | |
* the result might not be an ASCII string. | |
* The label might be modified according to the types of errors. | |
* Labels with severe errors will be left in (or turned into) their Unicode form. | |
* | |
* The UErrorCode indicates an error only in exceptional cases, | |
* such as a U_MEMORY_ALLOCATION_ERROR. | |
* | |
* @param label Input domain name label | |
* @param dest Destination string object | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual UnicodeString & | |
labelToASCII(const UnicodeString &label, UnicodeString &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
/** | |
* Converts a single domain name label into its Unicode form for human-readable display. | |
* If any processing step fails, then info.hasErrors() will be true. | |
* The label might be modified according to the types of errors. | |
* | |
* The UErrorCode indicates an error only in exceptional cases, | |
* such as a U_MEMORY_ALLOCATION_ERROR. | |
* | |
* @param label Input domain name label | |
* @param dest Destination string object | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual UnicodeString & | |
labelToUnicode(const UnicodeString &label, UnicodeString &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
/** | |
* Converts a whole domain name into its ASCII form for DNS lookup. | |
* If any processing step fails, then info.hasErrors() will be true and | |
* the result might not be an ASCII string. | |
* The domain name might be modified according to the types of errors. | |
* Labels with severe errors will be left in (or turned into) their Unicode form. | |
* | |
* The UErrorCode indicates an error only in exceptional cases, | |
* such as a U_MEMORY_ALLOCATION_ERROR. | |
* | |
* @param name Input domain name | |
* @param dest Destination string object | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual UnicodeString & | |
nameToASCII(const UnicodeString &name, UnicodeString &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
/** | |
* Converts a whole domain name into its Unicode form for human-readable display. | |
* If any processing step fails, then info.hasErrors() will be true. | |
* The domain name might be modified according to the types of errors. | |
* | |
* The UErrorCode indicates an error only in exceptional cases, | |
* such as a U_MEMORY_ALLOCATION_ERROR. | |
* | |
* @param name Input domain name | |
* @param dest Destination string object | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual UnicodeString & | |
nameToUnicode(const UnicodeString &name, UnicodeString &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const = 0; | |
// UTF-8 versions of the processing methods ---------------------------- *** | |
/** | |
* Converts a single domain name label into its ASCII form for DNS lookup. | |
* UTF-8 version of labelToASCII(), same behavior. | |
* | |
* @param label Input domain name label | |
* @param dest Destination byte sink; Flush()ed if successful | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual void | |
labelToASCII_UTF8(StringPiece label, ByteSink &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const; | |
/** | |
* Converts a single domain name label into its Unicode form for human-readable display. | |
* UTF-8 version of labelToUnicode(), same behavior. | |
* | |
* @param label Input domain name label | |
* @param dest Destination byte sink; Flush()ed if successful | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual void | |
labelToUnicodeUTF8(StringPiece label, ByteSink &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const; | |
/** | |
* Converts a whole domain name into its ASCII form for DNS lookup. | |
* UTF-8 version of nameToASCII(), same behavior. | |
* | |
* @param name Input domain name | |
* @param dest Destination byte sink; Flush()ed if successful | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual void | |
nameToASCII_UTF8(StringPiece name, ByteSink &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const; | |
/** | |
* Converts a whole domain name into its Unicode form for human-readable display. | |
* UTF-8 version of nameToUnicode(), same behavior. | |
* | |
* @param name Input domain name | |
* @param dest Destination byte sink; Flush()ed if successful | |
* @param info Output container of IDNA processing details. | |
* @param errorCode Standard ICU error code. Its input value must | |
* pass the U_SUCCESS() test, or else the function returns | |
* immediately. Check for U_FAILURE() on output or use with | |
* function chaining. (See User Guide for details.) | |
* @return dest | |
* @stable ICU 4.6 | |
*/ | |
virtual void | |
nameToUnicodeUTF8(StringPiece name, ByteSink &dest, | |
IDNAInfo &info, UErrorCode &errorCode) const; | |
}; | |
class UTS46; | |
/** | |
* Output container for IDNA processing errors. | |
* The IDNAInfo class is not suitable for subclassing. | |
* @stable ICU 4.6 | |
*/ | |
class U_COMMON_API IDNAInfo : public UMemory { | |
public: | |
/** | |
* Constructor for stack allocation. | |
* @stable ICU 4.6 | |
*/ | |
IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {} | |
/** | |
* Were there IDNA processing errors? | |
* @return true if there were processing errors | |
* @stable ICU 4.6 | |
*/ | |
UBool hasErrors() const { return errors!=0; } | |
/** | |
* Returns a bit set indicating IDNA processing errors. | |
* See UIDNA_ERROR_... constants in uidna.h. | |
* @return bit set of processing errors | |
* @stable ICU 4.6 | |
*/ | |
uint32_t getErrors() const { return errors; } | |
/** | |
* Returns true if transitional and nontransitional processing produce different results. | |
* This is the case when the input label or domain name contains | |
* one or more deviation characters outside a Punycode label (see UTS #46). | |
* <ul> | |
* <li>With nontransitional processing, such characters are | |
* copied to the destination string. | |
* <li>With transitional processing, such characters are | |
* mapped (sharp s/sigma) or removed (joiner/nonjoiner). | |
* </ul> | |
* @return true if transitional and nontransitional processing produce different results | |
* @stable ICU 4.6 | |
*/ | |
UBool isTransitionalDifferent() const { return isTransDiff; } | |
private: | |
friend class UTS46; | |
IDNAInfo(const IDNAInfo &other) = delete; // no copying | |
IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying | |
void reset() { | |
errors=labelErrors=0; | |
isTransDiff=false; | |
isBiDi=false; | |
isOkBiDi=true; | |
} | |
uint32_t errors, labelErrors; | |
UBool isTransDiff; | |
UBool isBiDi; | |
UBool isOkBiDi; | |
}; | |
U_NAMESPACE_END | |