fix
Browse files- .gitattributes +1 -0
- __pycache__/bleu.cpython-37.pyc +0 -0
- __pycache__/calc_code_bleu.cpython-37.pyc +0 -0
- __pycache__/codebleu.cpython-37.pyc +0 -0
- __pycache__/dataflow_match.cpython-37.pyc +0 -0
- __pycache__/syntax_match.cpython-37.pyc +0 -0
- __pycache__/utils.cpython-37.pyc +0 -0
- __pycache__/weighted_ngram_match.cpython-37.pyc +0 -0
- codebleu.py +0 -8
- parser/tree-sitter-c-sharp +0 -1
- parser/tree-sitter-cpp +0 -1
- parser/tree-sitter-go +0 -1
- parser/tree-sitter-java +0 -1
- parser/tree-sitter-javascript +0 -1
- parser/tree-sitter-php +0 -1
- parser/tree-sitter-python +0 -1
- parser/tree-sitter-ruby +0 -1
- {parser β parsercode}/DFG.py +0 -0
- {parser β parsercode}/__init__.py +0 -0
- parsercode/__pycache__/DFG.cpython-37.pyc +0 -0
- parsercode/__pycache__/__init__.cpython-37.pyc +0 -0
- parsercode/__pycache__/utils.cpython-37.pyc +0 -0
- {parser β parsercode}/build.py +0 -0
- {parser β parsercode}/build.sh +0 -0
- {parser β parsercode}/my-languages.so +0 -0
- {parser β parsercode}/utils.py +0 -0
- utils.py +105 -105
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
33 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
34 |
parser/my-languages.so filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
33 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
34 |
parser/my-languages.so filter=lfs diff=lfs merge=lfs -text
|
35 |
+
parsercode/my-languages.so filter=lfs diff=lfs merge=lfs -text
|
__pycache__/bleu.cpython-37.pyc
ADDED
Binary file (25.1 kB). View file
|
|
__pycache__/calc_code_bleu.cpython-37.pyc
ADDED
Binary file (2.91 kB). View file
|
|
__pycache__/codebleu.cpython-37.pyc
ADDED
Binary file (2.37 kB). View file
|
|
__pycache__/dataflow_match.cpython-37.pyc
ADDED
Binary file (4.18 kB). View file
|
|
__pycache__/syntax_match.cpython-37.pyc
ADDED
Binary file (2.13 kB). View file
|
|
__pycache__/utils.cpython-37.pyc
ADDED
Binary file (3.34 kB). View file
|
|
__pycache__/weighted_ngram_match.cpython-37.pyc
ADDED
Binary file (21.7 kB). View file
|
|
codebleu.py
CHANGED
@@ -83,14 +83,6 @@ class CodeBLEU(evaluate.Metric):
|
|
83 |
reference_urls=["http://path.to.reference.url/new_module"]
|
84 |
)
|
85 |
|
86 |
-
def _download_and_prepare(self, dl_manager):
|
87 |
-
"""Optional: download external resources useful to compute the scores"""
|
88 |
-
# TODO: Download external resources if needed
|
89 |
-
if self.config_name == "python":
|
90 |
-
Language.build_library('./parser/my-languages.so',['tree-sitter-python'])
|
91 |
-
elif self.config_name == "cpp":
|
92 |
-
Language.build_library('./parser/my-languages.so',['tree-sitter-cpp'])
|
93 |
-
|
94 |
|
95 |
def _compute(self, predictions, references, language="python", alpha=0.25, beta=0.25, gamma=0.25, theta=0.25):
|
96 |
|
|
|
83 |
reference_urls=["http://path.to.reference.url/new_module"]
|
84 |
)
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
def _compute(self, predictions, references, language="python", alpha=0.25, beta=0.25, gamma=0.25, theta=0.25):
|
88 |
|
parser/tree-sitter-c-sharp
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 5b60f99545fea00a33bbfae5be956f684c4c69e2
|
|
|
|
parser/tree-sitter-cpp
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 5ead1e26c6ab71919db0f1880c46a278a93bc5ea
|
|
|
|
parser/tree-sitter-go
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 05900faa3cdb5d2d8c8bd5e77ee698487e0a8611
|
|
|
|
parser/tree-sitter-java
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 09d650def6cdf7f479f4b78f595e9ef5b58ce31e
|
|
|
|
parser/tree-sitter-javascript
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 936d976a782e75395d9b1c8c7c7bf4ba6fe0d86b
|
|
|
|
parser/tree-sitter-php
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit ab2e72179ceb8bb0b249c8ac9162a148e911b3dc
|
|
|
|
parser/tree-sitter-python
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit b14614e2144b8f9ee54deed5a24f3c6f51f9ffa8
|
|
|
|
parser/tree-sitter-ruby
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
Subproject commit 252ca18be76b0918fb6b34c302292b6931876c25
|
|
|
|
{parser β parsercode}/DFG.py
RENAMED
File without changes
|
{parser β parsercode}/__init__.py
RENAMED
File without changes
|
parsercode/__pycache__/DFG.cpython-37.pyc
ADDED
Binary file (33.9 kB). View file
|
|
parsercode/__pycache__/__init__.cpython-37.pyc
ADDED
Binary file (437 Bytes). View file
|
|
parsercode/__pycache__/utils.cpython-37.pyc
ADDED
Binary file (2.43 kB). View file
|
|
{parser β parsercode}/build.py
RENAMED
File without changes
|
{parser β parsercode}/build.sh
RENAMED
File without changes
|
{parser β parsercode}/my-languages.so
RENAMED
File without changes
|
{parser β parsercode}/utils.py
RENAMED
File without changes
|
utils.py
CHANGED
@@ -1,106 +1,106 @@
|
|
1 |
-
# Natural Language Toolkit: Utility functions
|
2 |
-
#
|
3 |
-
# Copyright (C) 2001-2020 NLTK Project
|
4 |
-
# Author: Steven Bird <[email protected]>
|
5 |
-
# URL: <http://nltk.org/>
|
6 |
-
# For license information, see LICENSE.TXT
|
7 |
-
|
8 |
-
from itertools import chain
|
9 |
-
|
10 |
-
def pad_sequence(
|
11 |
-
sequence,
|
12 |
-
n,
|
13 |
-
pad_left=False,
|
14 |
-
pad_right=False,
|
15 |
-
left_pad_symbol=None,
|
16 |
-
right_pad_symbol=None,
|
17 |
-
):
|
18 |
-
"""
|
19 |
-
Returns a padded sequence of items before ngram extraction.
|
20 |
-
>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
|
21 |
-
['<s>', 1, 2, 3, 4, 5, '</s>']
|
22 |
-
>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
|
23 |
-
['<s>', 1, 2, 3, 4, 5]
|
24 |
-
>>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
|
25 |
-
[1, 2, 3, 4, 5, '</s>']
|
26 |
-
:param sequence: the source data to be padded
|
27 |
-
:type sequence: sequence or iter
|
28 |
-
:param n: the degree of the ngrams
|
29 |
-
:type n: int
|
30 |
-
:param pad_left: whether the ngrams should be left-padded
|
31 |
-
:type pad_left: bool
|
32 |
-
:param pad_right: whether the ngrams should be right-padded
|
33 |
-
:type pad_right: bool
|
34 |
-
:param left_pad_symbol: the symbol to use for left padding (default is None)
|
35 |
-
:type left_pad_symbol: any
|
36 |
-
:param right_pad_symbol: the symbol to use for right padding (default is None)
|
37 |
-
:type right_pad_symbol: any
|
38 |
-
:rtype: sequence or iter
|
39 |
-
"""
|
40 |
-
sequence = iter(sequence)
|
41 |
-
if pad_left:
|
42 |
-
sequence = chain((left_pad_symbol,) * (n - 1), sequence)
|
43 |
-
if pad_right:
|
44 |
-
sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
|
45 |
-
return sequence
|
46 |
-
|
47 |
-
|
48 |
-
# add a flag to pad the sequence so we get peripheral ngrams?
|
49 |
-
|
50 |
-
|
51 |
-
def ngrams(
|
52 |
-
sequence,
|
53 |
-
n,
|
54 |
-
pad_left=False,
|
55 |
-
pad_right=False,
|
56 |
-
left_pad_symbol=None,
|
57 |
-
right_pad_symbol=None,
|
58 |
-
):
|
59 |
-
"""
|
60 |
-
Return the ngrams generated from a sequence of items, as an iterator.
|
61 |
-
For example:
|
62 |
-
>>> from nltk.util import ngrams
|
63 |
-
>>> list(ngrams([1,2,3,4,5], 3))
|
64 |
-
[(1, 2, 3), (2, 3, 4), (3, 4, 5)]
|
65 |
-
Wrap with list for a list version of this function. Set pad_left
|
66 |
-
or pad_right to true in order to get additional ngrams:
|
67 |
-
>>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
|
68 |
-
[(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
|
69 |
-
>>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
|
70 |
-
[(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
|
71 |
-
>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
|
72 |
-
[('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
|
73 |
-
>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
|
74 |
-
[('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
|
75 |
-
:param sequence: the source data to be converted into ngrams
|
76 |
-
:type sequence: sequence or iter
|
77 |
-
:param n: the degree of the ngrams
|
78 |
-
:type n: int
|
79 |
-
:param pad_left: whether the ngrams should be left-padded
|
80 |
-
:type pad_left: bool
|
81 |
-
:param pad_right: whether the ngrams should be right-padded
|
82 |
-
:type pad_right: bool
|
83 |
-
:param left_pad_symbol: the symbol to use for left padding (default is None)
|
84 |
-
:type left_pad_symbol: any
|
85 |
-
:param right_pad_symbol: the symbol to use for right padding (default is None)
|
86 |
-
:type right_pad_symbol: any
|
87 |
-
:rtype: sequence or iter
|
88 |
-
"""
|
89 |
-
sequence = pad_sequence(
|
90 |
-
sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
|
91 |
-
)
|
92 |
-
|
93 |
-
history = []
|
94 |
-
while n > 1:
|
95 |
-
# PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
|
96 |
-
try:
|
97 |
-
next_item = next(sequence)
|
98 |
-
except StopIteration:
|
99 |
-
# no more data, terminate the generator
|
100 |
-
return
|
101 |
-
history.append(next_item)
|
102 |
-
n -= 1
|
103 |
-
for item in sequence:
|
104 |
-
history.append(item)
|
105 |
-
yield tuple(history)
|
106 |
del history[0]
|
|
|
1 |
+
# Natural Language Toolkit: Utility functions
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2020 NLTK Project
|
4 |
+
# Author: Steven Bird <[email protected]>
|
5 |
+
# URL: <http://nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
from itertools import chain
|
9 |
+
|
10 |
+
def pad_sequence(
|
11 |
+
sequence,
|
12 |
+
n,
|
13 |
+
pad_left=False,
|
14 |
+
pad_right=False,
|
15 |
+
left_pad_symbol=None,
|
16 |
+
right_pad_symbol=None,
|
17 |
+
):
|
18 |
+
"""
|
19 |
+
Returns a padded sequence of items before ngram extraction.
|
20 |
+
>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
|
21 |
+
['<s>', 1, 2, 3, 4, 5, '</s>']
|
22 |
+
>>> list(pad_sequence([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
|
23 |
+
['<s>', 1, 2, 3, 4, 5]
|
24 |
+
>>> list(pad_sequence([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
|
25 |
+
[1, 2, 3, 4, 5, '</s>']
|
26 |
+
:param sequence: the source data to be padded
|
27 |
+
:type sequence: sequence or iter
|
28 |
+
:param n: the degree of the ngrams
|
29 |
+
:type n: int
|
30 |
+
:param pad_left: whether the ngrams should be left-padded
|
31 |
+
:type pad_left: bool
|
32 |
+
:param pad_right: whether the ngrams should be right-padded
|
33 |
+
:type pad_right: bool
|
34 |
+
:param left_pad_symbol: the symbol to use for left padding (default is None)
|
35 |
+
:type left_pad_symbol: any
|
36 |
+
:param right_pad_symbol: the symbol to use for right padding (default is None)
|
37 |
+
:type right_pad_symbol: any
|
38 |
+
:rtype: sequence or iter
|
39 |
+
"""
|
40 |
+
sequence = iter(sequence)
|
41 |
+
if pad_left:
|
42 |
+
sequence = chain((left_pad_symbol,) * (n - 1), sequence)
|
43 |
+
if pad_right:
|
44 |
+
sequence = chain(sequence, (right_pad_symbol,) * (n - 1))
|
45 |
+
return sequence
|
46 |
+
|
47 |
+
|
48 |
+
# add a flag to pad the sequence so we get peripheral ngrams?
|
49 |
+
|
50 |
+
|
51 |
+
def ngrams(
|
52 |
+
sequence,
|
53 |
+
n,
|
54 |
+
pad_left=False,
|
55 |
+
pad_right=False,
|
56 |
+
left_pad_symbol=None,
|
57 |
+
right_pad_symbol=None,
|
58 |
+
):
|
59 |
+
"""
|
60 |
+
Return the ngrams generated from a sequence of items, as an iterator.
|
61 |
+
For example:
|
62 |
+
>>> from nltk.util import ngrams
|
63 |
+
>>> list(ngrams([1,2,3,4,5], 3))
|
64 |
+
[(1, 2, 3), (2, 3, 4), (3, 4, 5)]
|
65 |
+
Wrap with list for a list version of this function. Set pad_left
|
66 |
+
or pad_right to true in order to get additional ngrams:
|
67 |
+
>>> list(ngrams([1,2,3,4,5], 2, pad_right=True))
|
68 |
+
[(1, 2), (2, 3), (3, 4), (4, 5), (5, None)]
|
69 |
+
>>> list(ngrams([1,2,3,4,5], 2, pad_right=True, right_pad_symbol='</s>'))
|
70 |
+
[(1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
|
71 |
+
>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, left_pad_symbol='<s>'))
|
72 |
+
[('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5)]
|
73 |
+
>>> list(ngrams([1,2,3,4,5], 2, pad_left=True, pad_right=True, left_pad_symbol='<s>', right_pad_symbol='</s>'))
|
74 |
+
[('<s>', 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, '</s>')]
|
75 |
+
:param sequence: the source data to be converted into ngrams
|
76 |
+
:type sequence: sequence or iter
|
77 |
+
:param n: the degree of the ngrams
|
78 |
+
:type n: int
|
79 |
+
:param pad_left: whether the ngrams should be left-padded
|
80 |
+
:type pad_left: bool
|
81 |
+
:param pad_right: whether the ngrams should be right-padded
|
82 |
+
:type pad_right: bool
|
83 |
+
:param left_pad_symbol: the symbol to use for left padding (default is None)
|
84 |
+
:type left_pad_symbol: any
|
85 |
+
:param right_pad_symbol: the symbol to use for right padding (default is None)
|
86 |
+
:type right_pad_symbol: any
|
87 |
+
:rtype: sequence or iter
|
88 |
+
"""
|
89 |
+
sequence = pad_sequence(
|
90 |
+
sequence, n, pad_left, pad_right, left_pad_symbol, right_pad_symbol
|
91 |
+
)
|
92 |
+
|
93 |
+
history = []
|
94 |
+
while n > 1:
|
95 |
+
# PEP 479, prevent RuntimeError from being raised when StopIteration bubbles out of generator
|
96 |
+
try:
|
97 |
+
next_item = next(sequence)
|
98 |
+
except StopIteration:
|
99 |
+
# no more data, terminate the generator
|
100 |
+
return
|
101 |
+
history.append(next_item)
|
102 |
+
n -= 1
|
103 |
+
for item in sequence:
|
104 |
+
history.append(item)
|
105 |
+
yield tuple(history)
|
106 |
del history[0]
|