Spaces:
Running
Running
Refactor CodeParser and update test cases
Browse files- Removed the `return_simple_line_numbers_with_code` function from CodeParser.py.
- Improved repository path handling in CodeParser.py by removing unnecessary quotes in command strings.
- Added error handling for missing PHP directory during parser build.
- Updated test cases in test_code_chunker.py to reflect changes in the patching of `count_tokens` function.
- CodeParser.py +16 -12
- test_code_chunker.py +5 -5
CodeParser.py
CHANGED
@@ -2,16 +2,8 @@ import os
|
|
2 |
import subprocess
|
3 |
from typing import List, Dict, Union, Tuple
|
4 |
from tree_sitter import Language, Parser, Node
|
5 |
-
from typing import Union, List
|
6 |
import logging
|
7 |
|
8 |
-
def return_simple_line_numbers_with_code(code: str) -> str:
|
9 |
-
code_lines = code.split('\n')
|
10 |
-
code_with_line_numbers = [f"Line {i + 1}: {line}" for i, line in enumerate(code_lines)]
|
11 |
-
joined_lines = "\n".join(code_with_line_numbers)
|
12 |
-
return joined_lines
|
13 |
-
|
14 |
-
|
15 |
class CodeParser:
|
16 |
# Added a CACHE_DIR class attribute for caching
|
17 |
CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
|
@@ -53,11 +45,11 @@ class CodeParser:
|
|
53 |
try:
|
54 |
if os.path.exists(repo_path):
|
55 |
logging.info(f"Updating existing repository for {language}")
|
56 |
-
update_command = f
|
57 |
subprocess.run(update_command, shell=True, check=True)
|
58 |
else:
|
59 |
logging.info(f"Cloning repository for {language}")
|
60 |
-
clone_command = f
|
61 |
subprocess.run(clone_command, shell=True, check=True)
|
62 |
except subprocess.CalledProcessError as e:
|
63 |
logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
|
@@ -74,9 +66,12 @@ class CodeParser:
|
|
74 |
Language.build_library(build_path, [ts_dir, tsx_dir])
|
75 |
else:
|
76 |
raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
|
77 |
-
|
78 |
php_dir = os.path.join(repo_path, 'php')
|
79 |
-
|
|
|
|
|
|
|
80 |
else:
|
81 |
Language.build_library(build_path, [repo_path])
|
82 |
|
@@ -84,6 +79,13 @@ class CodeParser:
|
|
84 |
logging.info(f"Successfully built and loaded {language} parser")
|
85 |
except Exception as e:
|
86 |
logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
except Exception as e:
|
89 |
logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
|
@@ -218,6 +220,8 @@ class CodeParser:
|
|
218 |
return node_types[file_extension]
|
219 |
elif file_extension == "jsx":
|
220 |
return node_types["js"]
|
|
|
|
|
221 |
else:
|
222 |
raise ValueError("Unsupported file type")
|
223 |
|
|
|
2 |
import subprocess
|
3 |
from typing import List, Dict, Union, Tuple
|
4 |
from tree_sitter import Language, Parser, Node
|
|
|
5 |
import logging
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
class CodeParser:
|
8 |
# Added a CACHE_DIR class attribute for caching
|
9 |
CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
|
|
|
45 |
try:
|
46 |
if os.path.exists(repo_path):
|
47 |
logging.info(f"Updating existing repository for {language}")
|
48 |
+
update_command = f"cd {repo_path} && git pull"
|
49 |
subprocess.run(update_command, shell=True, check=True)
|
50 |
else:
|
51 |
logging.info(f"Cloning repository for {language}")
|
52 |
+
clone_command = f"git clone https://github.com/tree-sitter/tree-sitter-{language} {repo_path}"
|
53 |
subprocess.run(clone_command, shell=True, check=True)
|
54 |
except subprocess.CalledProcessError as e:
|
55 |
logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
|
|
|
66 |
Language.build_library(build_path, [ts_dir, tsx_dir])
|
67 |
else:
|
68 |
raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
|
69 |
+
elif language == 'php':
|
70 |
php_dir = os.path.join(repo_path, 'php')
|
71 |
+
if os.path.exists(php_dir):
|
72 |
+
Language.build_library(build_path, [php_dir])
|
73 |
+
else:
|
74 |
+
raise FileNotFoundError(f"PHP directory not found in {repo_path}")
|
75 |
else:
|
76 |
Language.build_library(build_path, [repo_path])
|
77 |
|
|
|
79 |
logging.info(f"Successfully built and loaded {language} parser")
|
80 |
except Exception as e:
|
81 |
logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
|
82 |
+
logging.error(f"Repository path: {repo_path}")
|
83 |
+
logging.error(f"Build path: {build_path}")
|
84 |
+
if language == 'typescript':
|
85 |
+
logging.error(f"TypeScript dir exists: {os.path.exists(ts_dir)}")
|
86 |
+
logging.error(f"TSX dir exists: {os.path.exists(tsx_dir)}")
|
87 |
+
elif language == 'php':
|
88 |
+
logging.error(f"PHP dir exists: {os.path.exists(php_dir)}")
|
89 |
|
90 |
except Exception as e:
|
91 |
logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
|
|
|
220 |
return node_types[file_extension]
|
221 |
elif file_extension == "jsx":
|
222 |
return node_types["js"]
|
223 |
+
elif file_extension == "tsx":
|
224 |
+
return node_types["ts"]
|
225 |
else:
|
226 |
raise ValueError("Unsupported file type")
|
227 |
|
test_code_chunker.py
CHANGED
@@ -104,7 +104,7 @@ class TestCodeChunkerPython(unittest.TestCase):
|
|
104 |
class TestCodeChunkerJavaScript(unittest.TestCase):
|
105 |
|
106 |
def setUp(self):
|
107 |
-
self.patcher = patch('
|
108 |
self.mock_count_tokens = self.patcher.start()
|
109 |
self.code_chunker = CodeChunker(file_extension='js')
|
110 |
self.mock_codebase = load_json('mock_codefiles.json')
|
@@ -181,7 +181,7 @@ class TestCodeChunkerJavaScript(unittest.TestCase):
|
|
181 |
class TestCodeChunkerCSS(unittest.TestCase):
|
182 |
|
183 |
def setUp(self):
|
184 |
-
self.patcher = patch('
|
185 |
self.mock_count_tokens = self.patcher.start()
|
186 |
self.code_chunker = CodeChunker(file_extension='css')
|
187 |
#Load the JSON data
|
@@ -214,7 +214,7 @@ class TestCodeChunkerCSS(unittest.TestCase):
|
|
214 |
class TestCodeChunkerTypeScript(unittest.TestCase):
|
215 |
|
216 |
def setUp(self):
|
217 |
-
self.patcher = patch('
|
218 |
self.mock_count_tokens = self.patcher.start()
|
219 |
self.code_chunker = CodeChunker(file_extension='ts')
|
220 |
self.mock_codebase = load_json('mock_codefiles.json')
|
@@ -237,7 +237,7 @@ class TestCodeChunkerTypeScript(unittest.TestCase):
|
|
237 |
class TestCodeChunkerRuby(unittest.TestCase):
|
238 |
|
239 |
def setUp(self):
|
240 |
-
self.patcher = patch('
|
241 |
self.mock_count_tokens = self.patcher.start()
|
242 |
self.code_chunker = CodeChunker(file_extension='rb')
|
243 |
self.mock_codebase = load_json('mock_codefiles.json')
|
@@ -260,7 +260,7 @@ class TestCodeChunkerRuby(unittest.TestCase):
|
|
260 |
class TestCodeChunkerPHP(unittest.TestCase):
|
261 |
|
262 |
def setUp(self):
|
263 |
-
self.patcher = patch('
|
264 |
self.mock_count_tokens = self.patcher.start()
|
265 |
self.code_chunker = CodeChunker(file_extension='php')
|
266 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
104 |
class TestCodeChunkerJavaScript(unittest.TestCase):
|
105 |
|
106 |
def setUp(self):
|
107 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
108 |
self.mock_count_tokens = self.patcher.start()
|
109 |
self.code_chunker = CodeChunker(file_extension='js')
|
110 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
181 |
class TestCodeChunkerCSS(unittest.TestCase):
|
182 |
|
183 |
def setUp(self):
|
184 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
185 |
self.mock_count_tokens = self.patcher.start()
|
186 |
self.code_chunker = CodeChunker(file_extension='css')
|
187 |
#Load the JSON data
|
|
|
214 |
class TestCodeChunkerTypeScript(unittest.TestCase):
|
215 |
|
216 |
def setUp(self):
|
217 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
218 |
self.mock_count_tokens = self.patcher.start()
|
219 |
self.code_chunker = CodeChunker(file_extension='ts')
|
220 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
237 |
class TestCodeChunkerRuby(unittest.TestCase):
|
238 |
|
239 |
def setUp(self):
|
240 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
241 |
self.mock_count_tokens = self.patcher.start()
|
242 |
self.code_chunker = CodeChunker(file_extension='rb')
|
243 |
self.mock_codebase = load_json('mock_codefiles.json')
|
|
|
260 |
class TestCodeChunkerPHP(unittest.TestCase):
|
261 |
|
262 |
def setUp(self):
|
263 |
+
self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
|
264 |
self.mock_count_tokens = self.patcher.start()
|
265 |
self.code_chunker = CodeChunker(file_extension='php')
|
266 |
self.mock_codebase = load_json('mock_codefiles.json')
|