reisarod's picture
Upload folder using huggingface_hub
5fae594 verified
import { IOptions } from './options';
/**
* Check if `vhost` is a valid suffix of `hostname` (top-domain)
*
* It means that `vhost` needs to be a suffix of `hostname` and we then need to
* make sure that: either they are equal, or the character preceding `vhost` in
* `hostname` is a '.' (it should not be a partial label).
*
* * hostname = 'not.evil.com' and vhost = 'vil.com' => not ok
* * hostname = 'not.evil.com' and vhost = 'evil.com' => ok
* * hostname = 'not.evil.com' and vhost = 'not.evil.com' => ok
*/
function shareSameDomainSuffix(hostname: string, vhost: string): boolean {
if (hostname.endsWith(vhost)) {
return (
hostname.length === vhost.length ||
hostname[hostname.length - vhost.length - 1] === '.'
);
}
return false;
}
/**
* Given a hostname and its public suffix, extract the general domain.
*/
function extractDomainWithSuffix(
hostname: string,
publicSuffix: string,
): string {
// Locate the index of the last '.' in the part of the `hostname` preceding
// the public suffix.
//
// examples:
// 1. not.evil.co.uk => evil.co.uk
// ^ ^
// | | start of public suffix
// | index of the last dot
//
// 2. example.co.uk => example.co.uk
// ^ ^
// | | start of public suffix
// |
// | (-1) no dot found before the public suffix
const publicSuffixIndex = hostname.length - publicSuffix.length - 2;
const lastDotBeforeSuffixIndex = hostname.lastIndexOf('.', publicSuffixIndex);
// No '.' found, then `hostname` is the general domain (no sub-domain)
if (lastDotBeforeSuffixIndex === -1) {
return hostname;
}
// Extract the part between the last '.'
return hostname.slice(lastDotBeforeSuffixIndex + 1);
}
/**
* Detects the domain based on rules and upon and a host string
*/
export default function getDomain(
suffix: string,
hostname: string,
options: IOptions,
): string | null {
// Check if `hostname` ends with a member of `validHosts`.
if (options.validHosts !== null) {
const validHosts = options.validHosts;
for (const vhost of validHosts) {
if (/*@__INLINE__*/ shareSameDomainSuffix(hostname, vhost)) {
return vhost;
}
}
}
let numberOfLeadingDots = 0;
if (hostname.startsWith('.')) {
while (
numberOfLeadingDots < hostname.length &&
hostname[numberOfLeadingDots] === '.'
) {
numberOfLeadingDots += 1;
}
}
// If `hostname` is a valid public suffix, then there is no domain to return.
// Since we already know that `getPublicSuffix` returns a suffix of `hostname`
// there is no need to perform a string comparison and we only compare the
// size.
if (suffix.length === hostname.length - numberOfLeadingDots) {
return null;
}
// To extract the general domain, we start by identifying the public suffix
// (if any), then consider the domain to be the public suffix with one added
// level of depth. (e.g.: if hostname is `not.evil.co.uk` and public suffix:
// `co.uk`, then we take one more level: `evil`, giving the final result:
// `evil.co.uk`).
return /*@__INLINE__*/ extractDomainWithSuffix(hostname, suffix);
}