Upload folder using huggingface_hub

d1ceb73 verified 11 months ago

10.2 kB

	import os
	import re
	from pathlib import Path
	from importlib.machinery import all_suffixes

	from jedi.inference.cache import inference_state_method_cache
	from jedi.inference.base_value import ContextualizedNode
	from jedi.inference.helpers import is_string, get_str_or_none
	from jedi.parser_utils import get_cached_code_lines
	from jedi.file_io import FileIO
	from jedi import settings
	from jedi import debug

	_BUILDOUT_PATH_INSERTION_LIMIT = 10


	def _abs_path(module_context, str_path: str):
	path = Path(str_path)
	if path.is_absolute():
	return path

	module_path = module_context.py__file__()
	if module_path is None:
	# In this case we have no idea where we actually are in the file
	# system.
	return None

	base_dir = module_path.parent
	return base_dir.joinpath(path).absolute()


	def _paths_from_assignment(module_context, expr_stmt):
	"""
	Extracts the assigned strings from an assignment that looks as follows::

	sys.path[0:0] = ['module/path', 'another/module/path']

	This function is in general pretty tolerant (and therefore 'buggy').
	However, it's not a big issue usually to add more paths to Jedi's sys_path,
	because it will only affect Jedi in very random situations and by adding
	more paths than necessary, it usually benefits the general user.
	"""
	for assignee, operator in zip(expr_stmt.children[::2], expr_stmt.children[1::2]):
	try:
	assert operator in ['=', '+=']
	assert assignee.type in ('power', 'atom_expr') and \
	len(assignee.children) > 1
	c = assignee.children
	assert c[0].type == 'name' and c[0].value == 'sys'
	trailer = c[1]
	assert trailer.children[0] == '.' and trailer.children[1].value == 'path'
	# TODO Essentially we're not checking details on sys.path
	# manipulation. Both assigment of the sys.path and changing/adding
	# parts of the sys.path are the same: They get added to the end of
	# the current sys.path.
	"""
	execution = c[2]
	assert execution.children[0] == '['
	subscript = execution.children[1]
	assert subscript.type == 'subscript'
	assert ':' in subscript.children
	"""
	except AssertionError:
	continue

	cn = ContextualizedNode(module_context.create_context(expr_stmt), expr_stmt)
	for lazy_value in cn.infer().iterate(cn):
	for value in lazy_value.infer():
	if is_string(value):
	abs_path = _abs_path(module_context, value.get_safe_value())
	if abs_path is not None:
	yield abs_path


	def _paths_from_list_modifications(module_context, trailer1, trailer2):
	""" extract the path from either "sys.path.append" or "sys.path.insert" """
	# Guarantee that both are trailers, the first one a name and the second one
	# a function execution with at least one param.
	if not (trailer1.type == 'trailer' and trailer1.children[0] == '.'
	and trailer2.type == 'trailer' and trailer2.children[0] == '('
	and len(trailer2.children) == 3):
	return

	name = trailer1.children[1].value
	if name not in ['insert', 'append']:
	return
	arg = trailer2.children[1]
	if name == 'insert' and len(arg.children) in (3, 4): # Possible trailing comma.
	arg = arg.children[2]

	for value in module_context.create_context(arg).infer_node(arg):
	p = get_str_or_none(value)
	if p is None:
	continue
	abs_path = _abs_path(module_context, p)
	if abs_path is not None:
	yield abs_path


	@inference_state_method_cache(default=[])
	def check_sys_path_modifications(module_context):
	"""
	Detect sys.path modifications within module.
	"""
	def get_sys_path_powers(names):
	for name in names:
	power = name.parent.parent
	if power is not None and power.type in ('power', 'atom_expr'):
	c = power.children
	if c[0].type == 'name' and c[0].value == 'sys' \
	and c[1].type == 'trailer':
	n = c[1].children[1]
	if n.type == 'name' and n.value == 'path':
	yield name, power

	if module_context.tree_node is None:
	return []

	added = []
	try:
	possible_names = module_context.tree_node.get_used_names()['path']
	except KeyError:
	pass
	else:
	for name, power in get_sys_path_powers(possible_names):
	expr_stmt = power.parent
	if len(power.children) >= 4:
	added.extend(
	_paths_from_list_modifications(
	module_context, *power.children[2:4]
	)
	)
	elif expr_stmt is not None and expr_stmt.type == 'expr_stmt':
	added.extend(_paths_from_assignment(module_context, expr_stmt))
	return added


	def discover_buildout_paths(inference_state, script_path):
	buildout_script_paths = set()

	for buildout_script_path in _get_buildout_script_paths(script_path):
	for path in _get_paths_from_buildout_script(inference_state, buildout_script_path):
	buildout_script_paths.add(path)
	if len(buildout_script_paths) >= _BUILDOUT_PATH_INSERTION_LIMIT:
	break

	return buildout_script_paths


	def _get_paths_from_buildout_script(inference_state, buildout_script_path):
	file_io = FileIO(str(buildout_script_path))
	try:
	module_node = inference_state.parse(
	file_io=file_io,
	cache=True,
	cache_path=settings.cache_directory
	)
	except IOError:
	debug.warning('Error trying to read buildout_script: %s', buildout_script_path)
	return

	from jedi.inference.value import ModuleValue
	module_context = ModuleValue(
	inference_state, module_node,
	file_io=file_io,
	string_names=None,
	code_lines=get_cached_code_lines(inference_state.grammar, buildout_script_path),
	).as_context()
	yield from check_sys_path_modifications(module_context)


	def _get_parent_dir_with_file(path: Path, filename):
	for parent in path.parents:
	try:
	if parent.joinpath(filename).is_file():
	return parent
	except OSError:
	continue
	return None


	def _get_buildout_script_paths(search_path: Path):
	"""
	if there is a 'buildout.cfg' file in one of the parent directories of the
	given module it will return a list of all files in the buildout bin
	directory that look like python files.

	:param search_path: absolute path to the module.
	"""
	project_root = _get_parent_dir_with_file(search_path, 'buildout.cfg')
	if not project_root:
	return
	bin_path = project_root.joinpath('bin')
	if not bin_path.exists():
	return

	for filename in os.listdir(bin_path):
	try:
	filepath = bin_path.joinpath(filename)
	with open(filepath, 'r') as f:
	firstline = f.readline()
	if firstline.startswith('#!') and 'python' in firstline:
	yield filepath
	except (UnicodeDecodeError, IOError) as e:
	# Probably a binary file; permission error or race cond. because
	# file got deleted. Ignore it.
	debug.warning(str(e))
	continue


	def remove_python_path_suffix(path):
	for suffix in all_suffixes() + ['.pyi']:
	if path.suffix == suffix:
	path = path.with_name(path.stem)
	break
	return path


	def transform_path_to_dotted(sys_path, module_path):
	"""
	Returns the dotted path inside a sys.path as a list of names. e.g.

	>>> transform_path_to_dotted([str(Path("/foo").absolute())], Path('/foo/bar/baz.py').absolute())
	(('bar', 'baz'), False)

	Returns (None, False) if the path doesn't really resolve to anything.
	The second return part is if it is a package.
	"""
	# First remove the suffix.
	module_path = remove_python_path_suffix(module_path)
	if module_path.name.startswith('.'):
	return None, False

	# Once the suffix was removed we are using the files as we know them. This
	# means that if someone uses an ending like .vim for a Python file, .vim
	# will be part of the returned dotted part.

	is_package = module_path.name == '__init__'
	if is_package:
	module_path = module_path.parent

	def iter_potential_solutions():
	for p in sys_path:
	if str(module_path).startswith(p):
	# Strip the trailing slash/backslash
	rest = str(module_path)[len(p):]
	# On Windows a path can also use a slash.
	if rest.startswith(os.path.sep) or rest.startswith('/'):
	# Remove a slash in cases it's still there.
	rest = rest[1:]

	if rest:
	split = rest.split(os.path.sep)
	if not all(split):
	# This means that part of the file path was empty, this
	# is very strange and is probably a file that is called
	# `.py`.
	return
	# Stub folders for foo can end with foo-stubs. Just remove
	# it.
	yield tuple(re.sub(r'-stubs$', '', s) for s in split)

	potential_solutions = tuple(iter_potential_solutions())
	if not potential_solutions:
	return None, False
	# Try to find the shortest path, this makes more sense usually, because the
	# user usually has venvs somewhere. This means that a path like
	# .tox/py37/lib/python3.7/os.py can be normal for a file. However in that
	# case we definitely want to return ['os'] as a path and not a crazy
	# ['.tox', 'py37', 'lib', 'python3.7', 'os']. Keep in mind that this is a
	# heuristic and there's now ay to "always" do it right.
	return sorted(potential_solutions, key=lambda p: len(p))[0], is_package