File size: 1,556 Bytes
a50881d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import unittest
from unittest.mock import patch
import tiktoken
import sys
import os

# Add parent directory to path to allow imports
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from Chunker import Chunker, CodeChunker
from utils import load_json

# Mocking the count_tokens function as it's external and not the focus of these tests
def mock_count_tokens(string: str, encoding_name='gpt-4') -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

class BaseChunkerTest(unittest.TestCase):
    """Base class for all code chunker tests with common setup and utilities."""
    
    def setUp(self):
        self.patcher = patch('utils.count_tokens', side_effect=mock_count_tokens)
        self.mock_count_tokens = self.patcher.start()
        self.mock_codebase = load_json('mock_codefiles.json')
        
    def tearDown(self):
        self.patcher.stop()
    
    def run_chunker_test(self, code, token_limit=20):
        """Helper method to run standard chunker tests."""
        chunks = self.code_chunker.chunk(code, token_limit=token_limit)
        Chunker.print_chunks(chunks)
        final_code = Chunker.consolidate_chunks_into_file(chunks)
        num_lines = Chunker.count_lines(final_code)
        
        # Common assertions
        self.assertEqual(num_lines, len(code.split("\n")))
        self.assertIn(code, final_code)
        
        return chunks, final_code