File size: 10,218 Bytes
d1ceb73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import os
import re
from pathlib import Path
from importlib.machinery import all_suffixes

from jedi.inference.cache import inference_state_method_cache
from jedi.inference.base_value import ContextualizedNode
from jedi.inference.helpers import is_string, get_str_or_none
from jedi.parser_utils import get_cached_code_lines
from jedi.file_io import FileIO
from jedi import settings
from jedi import debug

_BUILDOUT_PATH_INSERTION_LIMIT = 10


def _abs_path(module_context, str_path: str):
    path = Path(str_path)
    if path.is_absolute():
        return path

    module_path = module_context.py__file__()
    if module_path is None:
        # In this case we have no idea where we actually are in the file
        # system.
        return None

    base_dir = module_path.parent
    return base_dir.joinpath(path).absolute()


def _paths_from_assignment(module_context, expr_stmt):
    """
    Extracts the assigned strings from an assignment that looks as follows::

        sys.path[0:0] = ['module/path', 'another/module/path']

    This function is in general pretty tolerant (and therefore 'buggy').
    However, it's not a big issue usually to add more paths to Jedi's sys_path,
    because it will only affect Jedi in very random situations and by adding
    more paths than necessary, it usually benefits the general user.
    """
    for assignee, operator in zip(expr_stmt.children[::2], expr_stmt.children[1::2]):
        try:
            assert operator in ['=', '+=']
            assert assignee.type in ('power', 'atom_expr') and \
                len(assignee.children) > 1
            c = assignee.children
            assert c[0].type == 'name' and c[0].value == 'sys'
            trailer = c[1]
            assert trailer.children[0] == '.' and trailer.children[1].value == 'path'
            # TODO Essentially we're not checking details on sys.path
            # manipulation. Both assigment of the sys.path and changing/adding
            # parts of the sys.path are the same: They get added to the end of
            # the current sys.path.
            """
            execution = c[2]
            assert execution.children[0] == '['
            subscript = execution.children[1]
            assert subscript.type == 'subscript'
            assert ':' in subscript.children
            """
        except AssertionError:
            continue

        cn = ContextualizedNode(module_context.create_context(expr_stmt), expr_stmt)
        for lazy_value in cn.infer().iterate(cn):
            for value in lazy_value.infer():
                if is_string(value):
                    abs_path = _abs_path(module_context, value.get_safe_value())
                    if abs_path is not None:
                        yield abs_path


def _paths_from_list_modifications(module_context, trailer1, trailer2):
    """ extract the path from either "sys.path.append" or "sys.path.insert" """
    # Guarantee that both are trailers, the first one a name and the second one
    # a function execution with at least one param.
    if not (trailer1.type == 'trailer' and trailer1.children[0] == '.'
            and trailer2.type == 'trailer' and trailer2.children[0] == '('
            and len(trailer2.children) == 3):
        return

    name = trailer1.children[1].value
    if name not in ['insert', 'append']:
        return
    arg = trailer2.children[1]
    if name == 'insert' and len(arg.children) in (3, 4):  # Possible trailing comma.
        arg = arg.children[2]

    for value in module_context.create_context(arg).infer_node(arg):
        p = get_str_or_none(value)
        if p is None:
            continue
        abs_path = _abs_path(module_context, p)
        if abs_path is not None:
            yield abs_path


@inference_state_method_cache(default=[])
def check_sys_path_modifications(module_context):
    """
    Detect sys.path modifications within module.
    """
    def get_sys_path_powers(names):
        for name in names:
            power = name.parent.parent
            if power is not None and power.type in ('power', 'atom_expr'):
                c = power.children
                if c[0].type == 'name' and c[0].value == 'sys' \
                        and c[1].type == 'trailer':
                    n = c[1].children[1]
                    if n.type == 'name' and n.value == 'path':
                        yield name, power

    if module_context.tree_node is None:
        return []

    added = []
    try:
        possible_names = module_context.tree_node.get_used_names()['path']
    except KeyError:
        pass
    else:
        for name, power in get_sys_path_powers(possible_names):
            expr_stmt = power.parent
            if len(power.children) >= 4:
                added.extend(
                    _paths_from_list_modifications(
                        module_context, *power.children[2:4]
                    )
                )
            elif expr_stmt is not None and expr_stmt.type == 'expr_stmt':
                added.extend(_paths_from_assignment(module_context, expr_stmt))
    return added


def discover_buildout_paths(inference_state, script_path):
    buildout_script_paths = set()

    for buildout_script_path in _get_buildout_script_paths(script_path):
        for path in _get_paths_from_buildout_script(inference_state, buildout_script_path):
            buildout_script_paths.add(path)
            if len(buildout_script_paths) >= _BUILDOUT_PATH_INSERTION_LIMIT:
                break

    return buildout_script_paths


def _get_paths_from_buildout_script(inference_state, buildout_script_path):
    file_io = FileIO(str(buildout_script_path))
    try:
        module_node = inference_state.parse(
            file_io=file_io,
            cache=True,
            cache_path=settings.cache_directory
        )
    except IOError:
        debug.warning('Error trying to read buildout_script: %s', buildout_script_path)
        return

    from jedi.inference.value import ModuleValue
    module_context = ModuleValue(
        inference_state, module_node,
        file_io=file_io,
        string_names=None,
        code_lines=get_cached_code_lines(inference_state.grammar, buildout_script_path),
    ).as_context()
    yield from check_sys_path_modifications(module_context)


def _get_parent_dir_with_file(path: Path, filename):
    for parent in path.parents:
        try:
            if parent.joinpath(filename).is_file():
                return parent
        except OSError:
            continue
    return None


def _get_buildout_script_paths(search_path: Path):
    """
    if there is a 'buildout.cfg' file in one of the parent directories of the
    given module it will return a list of all files in the buildout bin
    directory that look like python files.

    :param search_path: absolute path to the module.
    """
    project_root = _get_parent_dir_with_file(search_path, 'buildout.cfg')
    if not project_root:
        return
    bin_path = project_root.joinpath('bin')
    if not bin_path.exists():
        return

    for filename in os.listdir(bin_path):
        try:
            filepath = bin_path.joinpath(filename)
            with open(filepath, 'r') as f:
                firstline = f.readline()
                if firstline.startswith('#!') and 'python' in firstline:
                    yield filepath
        except (UnicodeDecodeError, IOError) as e:
            # Probably a binary file; permission error or race cond. because
            # file got deleted. Ignore it.
            debug.warning(str(e))
            continue


def remove_python_path_suffix(path):
    for suffix in all_suffixes() + ['.pyi']:
        if path.suffix == suffix:
            path = path.with_name(path.stem)
            break
    return path


def transform_path_to_dotted(sys_path, module_path):
    """
    Returns the dotted path inside a sys.path as a list of names. e.g.

    >>> transform_path_to_dotted([str(Path("/foo").absolute())], Path('/foo/bar/baz.py').absolute())
    (('bar', 'baz'), False)

    Returns (None, False) if the path doesn't really resolve to anything.
    The second return part is if it is a package.
    """
    # First remove the suffix.
    module_path = remove_python_path_suffix(module_path)
    if module_path.name.startswith('.'):
        return None, False

    # Once the suffix was removed we are using the files as we know them. This
    # means that if someone uses an ending like .vim for a Python file, .vim
    # will be part of the returned dotted part.

    is_package = module_path.name == '__init__'
    if is_package:
        module_path = module_path.parent

    def iter_potential_solutions():
        for p in sys_path:
            if str(module_path).startswith(p):
                # Strip the trailing slash/backslash
                rest = str(module_path)[len(p):]
                # On Windows a path can also use a slash.
                if rest.startswith(os.path.sep) or rest.startswith('/'):
                    # Remove a slash in cases it's still there.
                    rest = rest[1:]

                if rest:
                    split = rest.split(os.path.sep)
                    if not all(split):
                        # This means that part of the file path was empty, this
                        # is very strange and is probably a file that is called
                        # `.py`.
                        return
                    # Stub folders for foo can end with foo-stubs. Just remove
                    # it.
                    yield tuple(re.sub(r'-stubs$', '', s) for s in split)

    potential_solutions = tuple(iter_potential_solutions())
    if not potential_solutions:
        return None, False
    # Try to find the shortest path, this makes more sense usually, because the
    # user usually has venvs somewhere. This means that a path like
    # .tox/py37/lib/python3.7/os.py can be normal for a file. However in that
    # case we definitely want to return ['os'] as a path and not a crazy
    # ['.tox', 'py37', 'lib', 'python3.7', 'os']. Keep in mind that this is a
    # heuristic and there's now ay to "always" do it right.
    return sorted(potential_solutions, key=lambda p: len(p))[0], is_package