File size: 11,407 Bytes
d1ceb73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
import os
import re
from parso import python_bytes_to_unicode
from jedi.debug import dbg
from jedi.file_io import KnownContentFileIO, FolderIO
from jedi.inference.names import SubModuleName
from jedi.inference.imports import load_module_from_path
from jedi.inference.filters import ParserTreeFilter
from jedi.inference.gradual.conversion import convert_names
_IGNORE_FOLDERS = ('.tox', '.venv', '.mypy_cache', 'venv', '__pycache__')
_OPENED_FILE_LIMIT = 2000
"""
Stats from a 2016 Lenovo Notebook running Linux:
With os.walk, it takes about 10s to scan 11'000 files (without filesystem
caching). Once cached it only takes 5s. So it is expected that reading all
those files might take a few seconds, but not a lot more.
"""
_PARSED_FILE_LIMIT = 30
"""
For now we keep the amount of parsed files really low, since parsing might take
easily 100ms for bigger files.
"""
def _resolve_names(definition_names, avoid_names=()):
for name in definition_names:
if name in avoid_names:
# Avoiding recursions here, because goto on a module name lands
# on the same module.
continue
if not isinstance(name, SubModuleName):
# SubModuleNames are not actually existing names but created
# names when importing something like `import foo.bar.baz`.
yield name
if name.api_type == 'module':
yield from _resolve_names(name.goto(), definition_names)
def _dictionarize(names):
return dict(
(n if n.tree_name is None else n.tree_name, n)
for n in names
)
def _find_defining_names(module_context, tree_name):
found_names = _find_names(module_context, tree_name)
for name in list(found_names):
# Convert from/to stubs, because those might also be usages.
found_names |= set(convert_names(
[name],
only_stubs=not name.get_root_context().is_stub(),
prefer_stub_to_compiled=False
))
found_names |= set(_find_global_variables(found_names, tree_name.value))
for name in list(found_names):
if name.api_type == 'param' or name.tree_name is None \
or name.tree_name.parent.type == 'trailer':
continue
found_names |= set(_add_names_in_same_context(name.parent_context, name.string_name))
return set(_resolve_names(found_names))
def _find_names(module_context, tree_name):
name = module_context.create_name(tree_name)
found_names = set(name.goto())
found_names.add(name)
return set(_resolve_names(found_names))
def _add_names_in_same_context(context, string_name):
if context.tree_node is None:
return
until_position = None
while True:
filter_ = ParserTreeFilter(
parent_context=context,
until_position=until_position,
)
names = set(filter_.get(string_name))
if not names:
break
yield from names
ordered = sorted(names, key=lambda x: x.start_pos)
until_position = ordered[0].start_pos
def _find_global_variables(names, search_name):
for name in names:
if name.tree_name is None:
continue
module_context = name.get_root_context()
try:
method = module_context.get_global_filter
except AttributeError:
continue
else:
for global_name in method().get(search_name):
yield global_name
c = module_context.create_context(global_name.tree_name)
yield from _add_names_in_same_context(c, global_name.string_name)
def find_references(module_context, tree_name, only_in_module=False):
inf = module_context.inference_state
search_name = tree_name.value
# We disable flow analysis, because if we have ifs that are only true in
# certain cases, we want both sides.
try:
inf.flow_analysis_enabled = False
found_names = _find_defining_names(module_context, tree_name)
finally:
inf.flow_analysis_enabled = True
found_names_dct = _dictionarize(found_names)
module_contexts = [module_context]
if not only_in_module:
for m in set(d.get_root_context() for d in found_names):
if m != module_context and m.tree_node is not None \
and inf.project.path in m.py__file__().parents:
module_contexts.append(m)
# For param no search for other modules is necessary.
if only_in_module or any(n.api_type == 'param' for n in found_names):
potential_modules = module_contexts
else:
potential_modules = get_module_contexts_containing_name(
inf,
module_contexts,
search_name,
)
non_matching_reference_maps = {}
for module_context in potential_modules:
for name_leaf in module_context.tree_node.get_used_names().get(search_name, []):
new = _dictionarize(_find_names(module_context, name_leaf))
if any(tree_name in found_names_dct for tree_name in new):
found_names_dct.update(new)
for tree_name in new:
for dct in non_matching_reference_maps.get(tree_name, []):
# A reference that was previously searched for matches
# with a now found name. Merge.
found_names_dct.update(dct)
try:
del non_matching_reference_maps[tree_name]
except KeyError:
pass
else:
for name in new:
non_matching_reference_maps.setdefault(name, []).append(new)
result = found_names_dct.values()
if only_in_module:
return [n for n in result if n.get_root_context() == module_context]
return result
def _check_fs(inference_state, file_io, regex):
try:
code = file_io.read()
except FileNotFoundError:
return None
code = python_bytes_to_unicode(code, errors='replace')
if not regex.search(code):
return None
new_file_io = KnownContentFileIO(file_io.path, code)
m = load_module_from_path(inference_state, new_file_io)
if m.is_compiled():
return None
return m.as_context()
def gitignored_paths(folder_io, file_io):
ignored_paths_abs = set()
ignored_paths_rel = set()
for l in file_io.read().splitlines():
if not l or l.startswith(b'#') or l.startswith(b'!') or b'*' in l:
continue
p = l.decode('utf-8', 'ignore').rstrip('/')
if '/' in p:
name = p.lstrip('/')
ignored_paths_abs.add(os.path.join(folder_io.path, name))
else:
name = p
ignored_paths_rel.add((folder_io.path, name))
return ignored_paths_abs, ignored_paths_rel
def expand_relative_ignore_paths(folder_io, relative_paths):
curr_path = folder_io.path
return {os.path.join(curr_path, p[1]) for p in relative_paths if curr_path.startswith(p[0])}
def recurse_find_python_folders_and_files(folder_io, except_paths=()):
except_paths = set(except_paths)
except_paths_relative = set()
for root_folder_io, folder_ios, file_ios in folder_io.walk():
# Delete folders that we don't want to iterate over.
for file_io in file_ios:
path = file_io.path
if path.suffix in ('.py', '.pyi'):
if path not in except_paths:
yield None, file_io
if path.name == '.gitignore':
ignored_paths_abs, ignored_paths_rel = gitignored_paths(
root_folder_io, file_io
)
except_paths |= ignored_paths_abs
except_paths_relative |= ignored_paths_rel
except_paths_relative_expanded = expand_relative_ignore_paths(
root_folder_io, except_paths_relative
)
folder_ios[:] = [
folder_io
for folder_io in folder_ios
if folder_io.path not in except_paths
and folder_io.path not in except_paths_relative_expanded
and folder_io.get_base_name() not in _IGNORE_FOLDERS
]
for folder_io in folder_ios:
yield folder_io, None
def recurse_find_python_files(folder_io, except_paths=()):
for folder_io, file_io in recurse_find_python_folders_and_files(folder_io, except_paths):
if file_io is not None:
yield file_io
def _find_python_files_in_sys_path(inference_state, module_contexts):
sys_path = inference_state.get_sys_path()
except_paths = set()
yielded_paths = [m.py__file__() for m in module_contexts]
for module_context in module_contexts:
file_io = module_context.get_value().file_io
if file_io is None:
continue
folder_io = file_io.get_parent_folder()
while True:
path = folder_io.path
if not any(path.startswith(p) for p in sys_path) or path in except_paths:
break
for file_io in recurse_find_python_files(folder_io, except_paths):
if file_io.path not in yielded_paths:
yield file_io
except_paths.add(path)
folder_io = folder_io.get_parent_folder()
def _find_project_modules(inference_state, module_contexts):
except_ = [m.py__file__() for m in module_contexts]
yield from recurse_find_python_files(FolderIO(inference_state.project.path), except_)
def get_module_contexts_containing_name(inference_state, module_contexts, name,
limit_reduction=1):
"""
Search a name in the directories of modules.
:param limit_reduction: Divides the limits on opening/parsing files by this
factor.
"""
# Skip non python modules
for module_context in module_contexts:
if module_context.is_compiled():
continue
yield module_context
# Very short names are not searched in other modules for now to avoid lots
# of file lookups.
if len(name) <= 2:
return
# Currently not used, because there's only `scope=project` and `scope=file`
# At the moment there is no such thing as `scope=sys.path`.
# file_io_iterator = _find_python_files_in_sys_path(inference_state, module_contexts)
file_io_iterator = _find_project_modules(inference_state, module_contexts)
yield from search_in_file_ios(inference_state, file_io_iterator, name,
limit_reduction=limit_reduction)
def search_in_file_ios(inference_state, file_io_iterator, name,
limit_reduction=1, complete=False):
parse_limit = _PARSED_FILE_LIMIT / limit_reduction
open_limit = _OPENED_FILE_LIMIT / limit_reduction
file_io_count = 0
parsed_file_count = 0
regex = re.compile(r'\b' + re.escape(name) + (r'' if complete else r'\b'))
for file_io in file_io_iterator:
file_io_count += 1
m = _check_fs(inference_state, file_io, regex)
if m is not None:
parsed_file_count += 1
yield m
if parsed_file_count >= parse_limit:
dbg('Hit limit of parsed files: %s', parse_limit)
break
if file_io_count >= open_limit:
dbg('Hit limit of opened files: %s', open_limit)
break
|