CintraAI commited on
Commit
f78cc9c
·
1 Parent(s): dc781f8

Refactor CodeParser and update test cases

Browse files

- Removed the `return_simple_line_numbers_with_code` function from CodeParser.py.
- Improved repository path handling in CodeParser.py by removing unnecessary quotes in command strings.
- Added error handling for missing PHP directory during parser build.
- Updated test cases in test_code_chunker.py to reflect changes in the patching of `count_tokens` function.

Files changed (2) hide show
  1. CodeParser.py +16 -12
  2. test_code_chunker.py +5 -5
CodeParser.py CHANGED
@@ -2,16 +2,8 @@ import os
2
  import subprocess
3
  from typing import List, Dict, Union, Tuple
4
  from tree_sitter import Language, Parser, Node
5
- from typing import Union, List
6
  import logging
7
 
8
- def return_simple_line_numbers_with_code(code: str) -> str:
9
- code_lines = code.split('\n')
10
- code_with_line_numbers = [f"Line {i + 1}: {line}" for i, line in enumerate(code_lines)]
11
- joined_lines = "\n".join(code_with_line_numbers)
12
- return joined_lines
13
-
14
-
15
  class CodeParser:
16
  # Added a CACHE_DIR class attribute for caching
17
  CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
@@ -53,11 +45,11 @@ class CodeParser:
53
  try:
54
  if os.path.exists(repo_path):
55
  logging.info(f"Updating existing repository for {language}")
56
- update_command = f'cd "{repo_path}" && git pull'
57
  subprocess.run(update_command, shell=True, check=True)
58
  else:
59
  logging.info(f"Cloning repository for {language}")
60
- clone_command = f'git clone https://github.com/tree-sitter/tree-sitter-{language} "{repo_path}"'
61
  subprocess.run(clone_command, shell=True, check=True)
62
  except subprocess.CalledProcessError as e:
63
  logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
@@ -74,9 +66,12 @@ class CodeParser:
74
  Language.build_library(build_path, [ts_dir, tsx_dir])
75
  else:
76
  raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
77
- if language == 'php':
78
  php_dir = os.path.join(repo_path, 'php')
79
- Language.build_library(build_path, [php_dir])
 
 
 
80
  else:
81
  Language.build_library(build_path, [repo_path])
82
 
@@ -84,6 +79,13 @@ class CodeParser:
84
  logging.info(f"Successfully built and loaded {language} parser")
85
  except Exception as e:
86
  logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
 
 
 
 
 
 
 
87
 
88
  except Exception as e:
89
  logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
@@ -218,6 +220,8 @@ class CodeParser:
218
  return node_types[file_extension]
219
  elif file_extension == "jsx":
220
  return node_types["js"]
 
 
221
  else:
222
  raise ValueError("Unsupported file type")
223
 
 
2
  import subprocess
3
  from typing import List, Dict, Union, Tuple
4
  from tree_sitter import Language, Parser, Node
 
5
  import logging
6
 
 
 
 
 
 
 
 
7
  class CodeParser:
8
  # Added a CACHE_DIR class attribute for caching
9
  CACHE_DIR = os.path.expanduser("~/.code_parser_cache")
 
45
  try:
46
  if os.path.exists(repo_path):
47
  logging.info(f"Updating existing repository for {language}")
48
+ update_command = f"cd {repo_path} && git pull"
49
  subprocess.run(update_command, shell=True, check=True)
50
  else:
51
  logging.info(f"Cloning repository for {language}")
52
+ clone_command = f"git clone https://github.com/tree-sitter/tree-sitter-{language} {repo_path}"
53
  subprocess.run(clone_command, shell=True, check=True)
54
  except subprocess.CalledProcessError as e:
55
  logging.error(f"Failed to clone/update repository for {language}. Error: {e}")
 
66
  Language.build_library(build_path, [ts_dir, tsx_dir])
67
  else:
68
  raise FileNotFoundError(f"TypeScript or TSX directory not found in {repo_path}")
69
+ elif language == 'php':
70
  php_dir = os.path.join(repo_path, 'php')
71
+ if os.path.exists(php_dir):
72
+ Language.build_library(build_path, [php_dir])
73
+ else:
74
+ raise FileNotFoundError(f"PHP directory not found in {repo_path}")
75
  else:
76
  Language.build_library(build_path, [repo_path])
77
 
 
79
  logging.info(f"Successfully built and loaded {language} parser")
80
  except Exception as e:
81
  logging.error(f"Failed to build or load language {language}. Error: {str(e)}")
82
+ logging.error(f"Repository path: {repo_path}")
83
+ logging.error(f"Build path: {build_path}")
84
+ if language == 'typescript':
85
+ logging.error(f"TypeScript dir exists: {os.path.exists(ts_dir)}")
86
+ logging.error(f"TSX dir exists: {os.path.exists(tsx_dir)}")
87
+ elif language == 'php':
88
+ logging.error(f"PHP dir exists: {os.path.exists(php_dir)}")
89
 
90
  except Exception as e:
91
  logging.error(f"An unexpected error occurred during parser installation: {str(e)}")
 
220
  return node_types[file_extension]
221
  elif file_extension == "jsx":
222
  return node_types["js"]
223
+ elif file_extension == "tsx":
224
+ return node_types["ts"]
225
  else:
226
  raise ValueError("Unsupported file type")
227
 
test_code_chunker.py CHANGED
@@ -104,7 +104,7 @@ class TestCodeChunkerPython(unittest.TestCase):
104
  class TestCodeChunkerJavaScript(unittest.TestCase):
105
 
106
  def setUp(self):
107
- self.patcher = patch('app.util.TextChunker.Chunker.count_tokens', side_effect=mock_count_tokens)
108
  self.mock_count_tokens = self.patcher.start()
109
  self.code_chunker = CodeChunker(file_extension='js')
110
  self.mock_codebase = load_json('mock_codefiles.json')
@@ -181,7 +181,7 @@ class TestCodeChunkerJavaScript(unittest.TestCase):
181
  class TestCodeChunkerCSS(unittest.TestCase):
182
 
183
  def setUp(self):
184
- self.patcher = patch('app.util.TextChunker.Chunker.count_tokens', side_effect=mock_count_tokens)
185
  self.mock_count_tokens = self.patcher.start()
186
  self.code_chunker = CodeChunker(file_extension='css')
187
  #Load the JSON data
@@ -214,7 +214,7 @@ class TestCodeChunkerCSS(unittest.TestCase):
214
  class TestCodeChunkerTypeScript(unittest.TestCase):
215
 
216
  def setUp(self):
217
- self.patcher = patch('app.util.TextChunker.Chunker.count_tokens', side_effect=mock_count_tokens)
218
  self.mock_count_tokens = self.patcher.start()
219
  self.code_chunker = CodeChunker(file_extension='ts')
220
  self.mock_codebase = load_json('mock_codefiles.json')
@@ -237,7 +237,7 @@ class TestCodeChunkerTypeScript(unittest.TestCase):
237
  class TestCodeChunkerRuby(unittest.TestCase):
238
 
239
  def setUp(self):
240
- self.patcher = patch('app.util.TextChunker.Chunker.count_tokens', side_effect=mock_count_tokens)
241
  self.mock_count_tokens = self.patcher.start()
242
  self.code_chunker = CodeChunker(file_extension='rb')
243
  self.mock_codebase = load_json('mock_codefiles.json')
@@ -260,7 +260,7 @@ class TestCodeChunkerRuby(unittest.TestCase):
260
  class TestCodeChunkerPHP(unittest.TestCase):
261
 
262
  def setUp(self):
263
- self.patcher = patch('app.util.TextChunker.Chunker.count_tokens', side_effect=mock_count_tokens)
264
  self.mock_count_tokens = self.patcher.start()
265
  self.code_chunker = CodeChunker(file_extension='php')
266
  self.mock_codebase = load_json('mock_codefiles.json')
 
104
  class TestCodeChunkerJavaScript(unittest.TestCase):
105
 
106
  def setUp(self):
107
+ self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
108
  self.mock_count_tokens = self.patcher.start()
109
  self.code_chunker = CodeChunker(file_extension='js')
110
  self.mock_codebase = load_json('mock_codefiles.json')
 
181
  class TestCodeChunkerCSS(unittest.TestCase):
182
 
183
  def setUp(self):
184
+ self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
185
  self.mock_count_tokens = self.patcher.start()
186
  self.code_chunker = CodeChunker(file_extension='css')
187
  #Load the JSON data
 
214
  class TestCodeChunkerTypeScript(unittest.TestCase):
215
 
216
  def setUp(self):
217
+ self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
218
  self.mock_count_tokens = self.patcher.start()
219
  self.code_chunker = CodeChunker(file_extension='ts')
220
  self.mock_codebase = load_json('mock_codefiles.json')
 
237
  class TestCodeChunkerRuby(unittest.TestCase):
238
 
239
  def setUp(self):
240
+ self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
241
  self.mock_count_tokens = self.patcher.start()
242
  self.code_chunker = CodeChunker(file_extension='rb')
243
  self.mock_codebase = load_json('mock_codefiles.json')
 
260
  class TestCodeChunkerPHP(unittest.TestCase):
261
 
262
  def setUp(self):
263
+ self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
264
  self.mock_count_tokens = self.patcher.start()
265
  self.code_chunker = CodeChunker(file_extension='php')
266
  self.mock_codebase = load_json('mock_codefiles.json')