Spaces:

kdamevski
/

conversational_uav_explorer

Runtime error

App Files Files Community

conversational_uav_explorer / myenv /Lib /site-packages /rfc3986 /abnf_regexp.py

kdamevski

Upload folder using huggingface_hub

1c60c6e almost 2 years ago

raw

history blame contribute delete

9.12 kB

	# -- coding: utf-8 --
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
	# implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	"""Module for the regular expressions crafted from ABNF."""

	import sys

	# https://tools.ietf.org/html/rfc3986#page-13
	GEN_DELIMS = GENERIC_DELIMITERS = ":/?#[]@"
	GENERIC_DELIMITERS_SET = set(GENERIC_DELIMITERS)
	# https://tools.ietf.org/html/rfc3986#page-13
	SUB_DELIMS = SUB_DELIMITERS = "!$&'()*+,;="
	SUB_DELIMITERS_SET = set(SUB_DELIMITERS)
	# Escape the '*' for use in regular expressions
	SUB_DELIMITERS_RE = r"!$&'()\*+,;="
	RESERVED_CHARS_SET = GENERIC_DELIMITERS_SET.union(SUB_DELIMITERS_SET)
	ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
	DIGIT = "0123456789"
	# https://tools.ietf.org/html/rfc3986#section-2.3
	UNRESERVED = UNRESERVED_CHARS = ALPHA + DIGIT + r"._!-~"
	UNRESERVED_CHARS_SET = set(UNRESERVED_CHARS)
	NON_PCT_ENCODED_SET = RESERVED_CHARS_SET.union(UNRESERVED_CHARS_SET)
	# We need to escape the '-' in this case:
	UNRESERVED_RE = r"A-Za-z0-9._~\-"

	# Percent encoded character values
	PERCENT_ENCODED = PCT_ENCODED = "%[A-Fa-f0-9]{2}"
	PCHAR = "([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":@]\|%s)" % PCT_ENCODED

	# NOTE(sigmavirus24): We're going to use more strict regular expressions
	# than appear in Appendix B for scheme. This will prevent over-eager
	# consuming of items that aren't schemes.
	SCHEME_RE = "[a-zA-Z][a-zA-Z0-9+.-]*"
	_AUTHORITY_RE = "[^\\\\/?#]*"
	_PATH_RE = "[^?#]*"
	_QUERY_RE = "[^#]*"
	_FRAGMENT_RE = ".*"

	# Extracted from http://tools.ietf.org/html/rfc3986#appendix-B
	COMPONENT_PATTERN_DICT = {
	"scheme": SCHEME_RE,
	"authority": _AUTHORITY_RE,
	"path": _PATH_RE,
	"query": _QUERY_RE,
	"fragment": _FRAGMENT_RE,
	}

	# See http://tools.ietf.org/html/rfc3986#appendix-B
	# In this case, we name each of the important matches so we can use
	# SRE_Match#groupdict to parse the values out if we so choose. This is also
	# modified to ignore other matches that are not important to the parsing of
	# the reference so we can also simply use SRE_Match#groups.
	URL_PARSING_RE = (
	r"(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?"
	r"(?P<path>{path})(?:\?(?P<query>{query}))?"
	r"(?:#(?P<fragment>{fragment}))?"
	).format(**COMPONENT_PATTERN_DICT)


	# #########################
	# Authority Matcher Section
	# #########################

	# Host patterns, see: http://tools.ietf.org/html/rfc3986#section-3.2.2
	# The pattern for a regular name, e.g., www.google.com, api.github.com
	REGULAR_NAME_RE = REG_NAME = "((?:{0}\|[{1}])*)".format(
	"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + UNRESERVED_RE
	)
	# The pattern for an IPv4 address, e.g., 192.168.255.255, 127.0.0.1,
	IPv4_RE = r"([0-9]{1,3}\.){3}[0-9]{1,3}"
	# Hexadecimal characters used in each piece of an IPv6 address
	HEXDIG_RE = "[0-9A-Fa-f]{1,4}"
	# Least-significant 32 bits of an IPv6 address
	LS32_RE = "({hex}:{hex}\|{ipv4})".format(hex=HEXDIG_RE, ipv4=IPv4_RE)
	# Substitutions into the following patterns for IPv6 patterns defined
	# http://tools.ietf.org/html/rfc3986#page-20
	_subs = {"hex": HEXDIG_RE, "ls32": LS32_RE}

	# Below: h16 = hexdig, see: https://tools.ietf.org/html/rfc5234 for details
	# about ABNF (Augmented Backus-Naur Form) use in the comments
	variations = [
	# 6( h16 ":" ) ls32
	"(%(hex)s:){6}%(ls32)s" % _subs,
	# "::" 5( h16 ":" ) ls32
	"::(%(hex)s:){5}%(ls32)s" % _subs,
	# [ h16 ] "::" 4( h16 ":" ) ls32
	"(%(hex)s)?::(%(hex)s:){4}%(ls32)s" % _subs,
	# [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
	"((%(hex)s:)?%(hex)s)?::(%(hex)s:){3}%(ls32)s" % _subs,
	# [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
	"((%(hex)s:){0,2}%(hex)s)?::(%(hex)s:){2}%(ls32)s" % _subs,
	# [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
	"((%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s" % _subs,
	# [ *4( h16 ":" ) h16 ] "::" ls32
	"((%(hex)s:){0,4}%(hex)s)?::%(ls32)s" % _subs,
	# [ *5( h16 ":" ) h16 ] "::" h16
	"((%(hex)s:){0,5}%(hex)s)?::%(hex)s" % _subs,
	# [ *6( h16 ":" ) h16 ] "::"
	"((%(hex)s:){0,6}%(hex)s)?::" % _subs,
	]

	IPv6_RE = "(({0})\|({1})\|({2})\|({3})\|({4})\|({5})\|({6})\|({7})\|({8}))".format(
	*variations
	)

	IPv_FUTURE_RE = r"v[0-9A-Fa-f]+\.[%s]+" % (
	UNRESERVED_RE + SUB_DELIMITERS_RE + ":"
	)

	# RFC 6874 Zone ID ABNF
	ZONE_ID = "(?:[" + UNRESERVED_RE + "]\|" + PCT_ENCODED + ")+"

	IPv6_ADDRZ_RFC4007_RE = IPv6_RE + "(?:(?:%25\|%)" + ZONE_ID + ")?"
	IPv6_ADDRZ_RE = IPv6_RE + "(?:%25" + ZONE_ID + ")?"

	IP_LITERAL_RE = r"\[({0}\|{1})\]".format(
	IPv6_ADDRZ_RFC4007_RE,
	IPv_FUTURE_RE,
	)

	# Pattern for matching the host piece of the authority
	HOST_RE = HOST_PATTERN = "({0}\|{1}\|{2})".format(
	REG_NAME,
	IPv4_RE,
	IP_LITERAL_RE,
	)
	USERINFO_RE = (
	"^([" + UNRESERVED_RE + SUB_DELIMITERS_RE + ":]\|%s)+" % (PCT_ENCODED)
	)
	PORT_RE = "[0-9]{1,5}"

	# ####################
	# Path Matcher Section
	# ####################

	# See http://tools.ietf.org/html/rfc3986#section-3.3 for more information
	# about the path patterns defined below.
	segments = {
	"segment": PCHAR + "*",
	# Non-zero length segment
	"segment-nz": PCHAR + "+",
	# Non-zero length segment without ":"
	"segment-nz-nc": PCHAR.replace(":", "") + "+",
	}

	# Path types taken from Section 3.3 (linked above)
	PATH_EMPTY = "^$"
	PATH_ROOTLESS = "%(segment-nz)s(/%(segment)s)*" % segments
	PATH_NOSCHEME = "%(segment-nz-nc)s(/%(segment)s)*" % segments
	PATH_ABSOLUTE = "/(%s)?" % PATH_ROOTLESS
	PATH_ABEMPTY = "(/%(segment)s)*" % segments
	PATH_RE = "^(%s\|%s\|%s\|%s\|%s)$" % (
	PATH_ABEMPTY,
	PATH_ABSOLUTE,
	PATH_NOSCHEME,
	PATH_ROOTLESS,
	PATH_EMPTY,
	)

	FRAGMENT_RE = QUERY_RE = (
	"^([/?:@" + UNRESERVED_RE + SUB_DELIMITERS_RE + "]\|%s)*$" % PCT_ENCODED
	)

	# ##########################
	# Relative reference matcher
	# ##########################

	# See http://tools.ietf.org/html/rfc3986#section-4.2 for details
	RELATIVE_PART_RE = "(//%s%s\|%s\|%s\|%s)" % (
	COMPONENT_PATTERN_DICT["authority"],
	PATH_ABEMPTY,
	PATH_ABSOLUTE,
	PATH_NOSCHEME,
	PATH_EMPTY,
	)

	# See http://tools.ietf.org/html/rfc3986#section-3 for definition
	HIER_PART_RE = "(//%s%s\|%s\|%s\|%s)" % (
	COMPONENT_PATTERN_DICT["authority"],
	PATH_ABEMPTY,
	PATH_ABSOLUTE,
	PATH_ROOTLESS,
	PATH_EMPTY,
	)

	# ###############
	# IRIs / RFC 3987
	# ###############

	# Only wide-unicode gets the high-ranges of UCSCHAR
	if sys.maxunicode > 0xFFFF: # pragma: no cover
	IPRIVATE = u"\uE000-\uF8FF\U000F0000-\U000FFFFD\U00100000-\U0010FFFD"
	UCSCHAR_RE = (
	u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"
	u"\U00010000-\U0001FFFD\U00020000-\U0002FFFD"
	u"\U00030000-\U0003FFFD\U00040000-\U0004FFFD"
	u"\U00050000-\U0005FFFD\U00060000-\U0006FFFD"
	u"\U00070000-\U0007FFFD\U00080000-\U0008FFFD"
	u"\U00090000-\U0009FFFD\U000A0000-\U000AFFFD"
	u"\U000B0000-\U000BFFFD\U000C0000-\U000CFFFD"
	u"\U000D0000-\U000DFFFD\U000E1000-\U000EFFFD"
	)
	else: # pragma: no cover
	IPRIVATE = u"\uE000-\uF8FF"
	UCSCHAR_RE = u"\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF"

	IUNRESERVED_RE = u"A-Za-z0-9\\._~\\-" + UCSCHAR_RE
	IPCHAR = u"([" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":@]\|%s)" % PCT_ENCODED

	isegments = {
	"isegment": IPCHAR + u"*",
	# Non-zero length segment
	"isegment-nz": IPCHAR + u"+",
	# Non-zero length segment without ":"
	"isegment-nz-nc": IPCHAR.replace(":", "") + u"+",
	}

	IPATH_ROOTLESS = u"%(isegment-nz)s(/%(isegment)s)*" % isegments
	IPATH_NOSCHEME = u"%(isegment-nz-nc)s(/%(isegment)s)*" % isegments
	IPATH_ABSOLUTE = u"/(?:%s)?" % IPATH_ROOTLESS
	IPATH_ABEMPTY = u"(?:/%(isegment)s)*" % isegments
	IPATH_RE = u"^(?:%s\|%s\|%s\|%s\|%s)$" % (
	IPATH_ABEMPTY,
	IPATH_ABSOLUTE,
	IPATH_NOSCHEME,
	IPATH_ROOTLESS,
	PATH_EMPTY,
	)

	IREGULAR_NAME_RE = IREG_NAME = u"(?:{0}\|[{1}])*".format(
	u"%[0-9A-Fa-f]{2}", SUB_DELIMITERS_RE + IUNRESERVED_RE
	)

	IHOST_RE = IHOST_PATTERN = u"({0}\|{1}\|{2})".format(
	IREG_NAME,
	IPv4_RE,
	IP_LITERAL_RE,
	)

	IUSERINFO_RE = (
	u"^(?:[" + IUNRESERVED_RE + SUB_DELIMITERS_RE + u":]\|%s)+" % (PCT_ENCODED)
	)

	IFRAGMENT_RE = (
	u"^(?:[/?:@"
	+ IUNRESERVED_RE
	+ SUB_DELIMITERS_RE
	+ u"]\|%s)*$" % PCT_ENCODED
	)
	IQUERY_RE = (
	u"^(?:[/?:@"
	+ IUNRESERVED_RE
	+ SUB_DELIMITERS_RE
	+ IPRIVATE
	+ u"]\|%s)*$" % PCT_ENCODED
	)

	IRELATIVE_PART_RE = u"(//%s%s\|%s\|%s\|%s)" % (
	COMPONENT_PATTERN_DICT["authority"],
	IPATH_ABEMPTY,
	IPATH_ABSOLUTE,
	IPATH_NOSCHEME,
	PATH_EMPTY,
	)

	IHIER_PART_RE = u"(//%s%s\|%s\|%s\|%s)" % (
	COMPONENT_PATTERN_DICT["authority"],
	IPATH_ABEMPTY,
	IPATH_ABSOLUTE,
	IPATH_ROOTLESS,
	PATH_EMPTY,
	)