File size: 4,906 Bytes
5fae594 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
/**
* Implement a factory allowing to plug different implementations of suffix
* lookup (e.g.: using a trie or the packed hashes datastructures). This is used
* and exposed in `tldts.ts` and `tldts-experimental.ts` bundle entrypoints.
*/
import getDomain from './domain';
import getDomainWithoutSuffix from './domain-without-suffix';
import extractHostname from './extract-hostname';
import isIp from './is-ip';
import isValidHostname from './is-valid';
import { IPublicSuffix, ISuffixLookupOptions } from './lookup/interface';
import { IOptions, setDefaults } from './options';
import getSubdomain from './subdomain';
export interface IResult {
// `hostname` is either a registered name (including but not limited to a
// hostname), or an IP address. IPv4 addresses must be in dot-decimal
// notation, and IPv6 addresses must be enclosed in brackets ([]). This is
// directly extracted from the input URL.
hostname: string | null;
// Is `hostname` an IP? (IPv4 or IPv6)
isIp: boolean | null;
// `hostname` split between subdomain, domain and its public suffix (if any)
subdomain: string | null;
domain: string | null;
publicSuffix: string | null;
domainWithoutSuffix: string | null;
// Specifies if `publicSuffix` comes from the ICANN or PRIVATE section of the list
isIcann: boolean | null;
isPrivate: boolean | null;
}
export function getEmptyResult(): IResult {
return {
domain: null,
domainWithoutSuffix: null,
hostname: null,
isIcann: null,
isIp: null,
isPrivate: null,
publicSuffix: null,
subdomain: null,
};
}
export function resetResult(result: IResult): void {
result.domain = null;
result.domainWithoutSuffix = null;
result.hostname = null;
result.isIcann = null;
result.isIp = null;
result.isPrivate = null;
result.publicSuffix = null;
result.subdomain = null;
}
// Flags representing steps in the `parse` function. They are used to implement
// an early stop mechanism (simulating some form of laziness) to avoid doing
// more work than necessary to perform a given action (e.g.: we don't need to
// extract the domain and subdomain if we are only interested in public suffix).
export const enum FLAG {
HOSTNAME,
IS_VALID,
PUBLIC_SUFFIX,
DOMAIN,
SUB_DOMAIN,
ALL,
}
export function parseImpl(
url: string,
step: FLAG,
suffixLookup: (
_1: string,
_2: ISuffixLookupOptions,
_3: IPublicSuffix,
) => void,
partialOptions: Partial<IOptions>,
result: IResult,
): IResult {
const options: IOptions = /*@__INLINE__*/ setDefaults(partialOptions);
// Very fast approximate check to make sure `url` is a string. This is needed
// because the library will not necessarily be used in a typed setup and
// values of arbitrary types might be given as argument.
if (typeof url !== 'string') {
return result;
}
// Extract hostname from `url` only if needed. This can be made optional
// using `options.extractHostname`. This option will typically be used
// whenever we are sure the inputs to `parse` are already hostnames and not
// arbitrary URLs.
//
// `mixedInput` allows to specify if we expect a mix of URLs and hostnames
// as input. If only hostnames are expected then `extractHostname` can be
// set to `false` to speed-up parsing. If only URLs are expected then
// `mixedInputs` can be set to `false`. The `mixedInputs` is only a hint
// and will not change the behavior of the library.
if (!options.extractHostname) {
result.hostname = url;
} else if (options.mixedInputs) {
result.hostname = extractHostname(url, isValidHostname(url));
} else {
result.hostname = extractHostname(url, false);
}
if (step === FLAG.HOSTNAME || result.hostname === null) {
return result;
}
// Check if `hostname` is a valid ip address
if (options.detectIp) {
result.isIp = isIp(result.hostname);
if (result.isIp) {
return result;
}
}
// Perform optional hostname validation. If hostname is not valid, no need to
// go further as there will be no valid domain or sub-domain.
if (
options.validateHostname &&
options.extractHostname &&
!isValidHostname(result.hostname)
) {
result.hostname = null;
return result;
}
// Extract public suffix
suffixLookup(result.hostname, options, result);
if (step === FLAG.PUBLIC_SUFFIX || result.publicSuffix === null) {
return result;
}
// Extract domain
result.domain = getDomain(result.publicSuffix, result.hostname, options);
if (step === FLAG.DOMAIN || result.domain === null) {
return result;
}
// Extract subdomain
result.subdomain = getSubdomain(result.hostname, result.domain);
if (step === FLAG.SUB_DOMAIN) {
return result;
}
// Extract domain without suffix
result.domainWithoutSuffix = getDomainWithoutSuffix(
result.domain,
result.publicSuffix,
);
return result;
}
|