|
import re |
|
|
|
__version__ = '0.1.1' |
|
__author__ = 'Nicolas Aimetti <[email protected]>' |
|
__all__ = ['validate_rfc3986'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
IPv6_RE = ( |
|
r"(?:(?:[0-9A-Fa-f]{1,4}:){6}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][" |
|
r"0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|::(?:[0-9A-Fa-f]{1,4}:){5}(?:[0-9A-Fa-f]{1," |
|
r"4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][" |
|
r"0-9]?))|(?:[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){4}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[" |
|
r"0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1," |
|
r"4}:)?[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){3}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][" |
|
r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){," |
|
r"2}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:){2}(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][" |
|
r"0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){," |
|
r"3}[0-9A-Fa-f]{1,4})?::(?:[0-9A-Fa-f]{1,4}:)(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[" |
|
r"01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,4}[0-9A-Fa-f]{1," |
|
r"4})?::(?:[0-9A-Fa-f]{1,4}:[0-9A-Fa-f]{1,4}|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[" |
|
r"0-4][0-9]|[01]?[0-9][0-9]?))|(?:(?:[0-9A-Fa-f]{1,4}:){,5}[0-9A-Fa-f]{1,4})?::[0-9A-Fa-f]{1,4}|(?:(?:[" |
|
r"0-9A-Fa-f]{1,4}:){,6}[0-9A-Fa-f]{1,4})?::)" |
|
) |
|
|
|
|
|
|
|
|
|
AUTHORITY_RE = r""" |
|
(?:(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:]|%[0-9A-Fa-f]{{2}})*@)? # user info |
|
(?: |
|
\[(?:{ip_v6}|v[0-9A-Fa-f]+\.[a-zA-Z0-9_.~\-!$&'()*+,;=:]+)\] # IP-literal |
|
| (?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){{3}}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) # IPv4 |
|
| (?:[a-zA-Z0-9_.~\-!$&'()*+,;=]|%[0-9A-Fa-f]{{2}})* # reg-name |
|
) # host |
|
(?::[0-9]*)? # port |
|
""".format(ip_v6=IPv6_RE,) |
|
|
|
PCHAR_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@]|%[0-9A-Fa-f]{2})" |
|
|
|
QUERY_RE = r"(?:[a-zA-Z0-9_.~\-!$&'()*+,;=:@/?]|%[0-9A-Fa-f]{2})*" |
|
|
|
URI_RE = r""" |
|
[a-zA-Z][a-zA-Z0-9+.-]* #scheme |
|
: |
|
(?: |
|
// |
|
{authority} |
|
(?:/{pchar}*)* # path-abempty |
|
| /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute |
|
| {pchar}+ (?:/{pchar}*)* # path-rootless |
|
| # or nothing |
|
) # hier-part |
|
(?:\?{query})? # Query |
|
(?:\#{fragment})? # Fragment |
|
""".format( |
|
authority=AUTHORITY_RE, |
|
query=QUERY_RE, |
|
fragment=QUERY_RE, |
|
pchar=PCHAR_RE |
|
) |
|
|
|
|
|
RELATIVE_REF_RE = r""" |
|
(?: |
|
// |
|
{authority} |
|
(?:/{pchar}*)* # path-abempty |
|
| /(?:{pchar}+ (?:/{pchar}*)*)? # path-absolute |
|
| (?:[a-zA-Z0-9_.~\-!$&'()*+,;=@]|%[0-9A-Fa-f]{{2}})+ (?:/{pchar}*)* # path-noscheme |
|
| # or nothing |
|
) # relative-part |
|
(?:\?{query})? # Query |
|
(?:\#{fragment})? # Fragment |
|
""".format( |
|
authority=AUTHORITY_RE, |
|
query=QUERY_RE, |
|
fragment=QUERY_RE, |
|
pchar=PCHAR_RE |
|
) |
|
|
|
URI_RE_COMP = re.compile(r"^{uri_re}$".format(uri_re=URI_RE), re.VERBOSE) |
|
|
|
URI_REF_RE_COMP = re.compile(r"^(?:{uri_re}|{relative_ref})$".format( |
|
uri_re=URI_RE, |
|
relative_ref=RELATIVE_REF_RE, |
|
), re.VERBOSE) |
|
|
|
|
|
def validate_rfc3986(url, rule='URI'): |
|
""" |
|
Validates strings according to RFC3986 |
|
|
|
:param url: String cointaining URI to validate |
|
:param rule: It could be 'URI' (default) or 'URI_reference'. |
|
:return: True or False |
|
""" |
|
if rule == 'URI': |
|
return URI_RE_COMP.match(url) |
|
elif rule == 'URI_reference': |
|
return URI_REF_RE_COMP.match(url) |
|
else: |
|
raise ValueError('Invalid rule') |
|
|