File size: 4,906 Bytes
5fae594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/**
 * Implement a factory allowing to plug different implementations of suffix
 * lookup (e.g.: using a trie or the packed hashes datastructures). This is used
 * and exposed in `tldts.ts` and `tldts-experimental.ts` bundle entrypoints.
 */

import getDomain from './domain';
import getDomainWithoutSuffix from './domain-without-suffix';
import extractHostname from './extract-hostname';
import isIp from './is-ip';
import isValidHostname from './is-valid';
import { IPublicSuffix, ISuffixLookupOptions } from './lookup/interface';
import { IOptions, setDefaults } from './options';
import getSubdomain from './subdomain';

export interface IResult {
  // `hostname` is either a registered name (including but not limited to a
  // hostname), or an IP address. IPv4 addresses must be in dot-decimal
  // notation, and IPv6 addresses must be enclosed in brackets ([]). This is
  // directly extracted from the input URL.
  hostname: string | null;

  // Is `hostname` an IP? (IPv4 or IPv6)
  isIp: boolean | null;

  // `hostname` split between subdomain, domain and its public suffix (if any)
  subdomain: string | null;
  domain: string | null;
  publicSuffix: string | null;
  domainWithoutSuffix: string | null;

  // Specifies if `publicSuffix` comes from the ICANN or PRIVATE section of the list
  isIcann: boolean | null;
  isPrivate: boolean | null;
}

export function getEmptyResult(): IResult {
  return {
    domain: null,
    domainWithoutSuffix: null,
    hostname: null,
    isIcann: null,
    isIp: null,
    isPrivate: null,
    publicSuffix: null,
    subdomain: null,
  };
}

export function resetResult(result: IResult): void {
  result.domain = null;
  result.domainWithoutSuffix = null;
  result.hostname = null;
  result.isIcann = null;
  result.isIp = null;
  result.isPrivate = null;
  result.publicSuffix = null;
  result.subdomain = null;
}

// Flags representing steps in the `parse` function. They are used to implement
// an early stop mechanism (simulating some form of laziness) to avoid doing
// more work than necessary to perform a given action (e.g.: we don't need to
// extract the domain and subdomain if we are only interested in public suffix).
export const enum FLAG {
  HOSTNAME,
  IS_VALID,
  PUBLIC_SUFFIX,
  DOMAIN,
  SUB_DOMAIN,
  ALL,
}

export function parseImpl(
  url: string,
  step: FLAG,
  suffixLookup: (
    _1: string,
    _2: ISuffixLookupOptions,
    _3: IPublicSuffix,
  ) => void,
  partialOptions: Partial<IOptions>,
  result: IResult,
): IResult {
  const options: IOptions = /*@__INLINE__*/ setDefaults(partialOptions);

  // Very fast approximate check to make sure `url` is a string. This is needed
  // because the library will not necessarily be used in a typed setup and
  // values of arbitrary types might be given as argument.
  if (typeof url !== 'string') {
    return result;
  }

  // Extract hostname from `url` only if needed. This can be made optional
  // using `options.extractHostname`. This option will typically be used
  // whenever we are sure the inputs to `parse` are already hostnames and not
  // arbitrary URLs.
  //
  // `mixedInput` allows to specify if we expect a mix of URLs and hostnames
  // as input. If only hostnames are expected then `extractHostname` can be
  // set to `false` to speed-up parsing. If only URLs are expected then
  // `mixedInputs` can be set to `false`. The `mixedInputs` is only a hint
  // and will not change the behavior of the library.
  if (!options.extractHostname) {
    result.hostname = url;
  } else if (options.mixedInputs) {
    result.hostname = extractHostname(url, isValidHostname(url));
  } else {
    result.hostname = extractHostname(url, false);
  }

  if (step === FLAG.HOSTNAME || result.hostname === null) {
    return result;
  }

  // Check if `hostname` is a valid ip address
  if (options.detectIp) {
    result.isIp = isIp(result.hostname);
    if (result.isIp) {
      return result;
    }
  }

  // Perform optional hostname validation. If hostname is not valid, no need to
  // go further as there will be no valid domain or sub-domain.
  if (
    options.validateHostname &&
    options.extractHostname &&
    !isValidHostname(result.hostname)
  ) {
    result.hostname = null;
    return result;
  }

  // Extract public suffix
  suffixLookup(result.hostname, options, result);
  if (step === FLAG.PUBLIC_SUFFIX || result.publicSuffix === null) {
    return result;
  }

  // Extract domain
  result.domain = getDomain(result.publicSuffix, result.hostname, options);
  if (step === FLAG.DOMAIN || result.domain === null) {
    return result;
  }

  // Extract subdomain
  result.subdomain = getSubdomain(result.hostname, result.domain);
  if (step === FLAG.SUB_DOMAIN) {
    return result;
  }

  // Extract domain without suffix
  result.domainWithoutSuffix = getDomainWithoutSuffix(
    result.domain,
    result.publicSuffix,
  );

  return result;
}