File size: 2,560 Bytes
9afc52f
182adbd
4644b40
9358586
1d6fa11
dbc4628
9358586
 
 
182adbd
34165ae
 
 
 
9358586
 
4644b40
9358586
 
 
dbc4628
 
 
4644b40
9358586
 
4644b40
34165ae
 
4644b40
34165ae
 
 
1d6fa11
34165ae
 
 
dbc4628
34165ae
 
 
 
 
 
 
dbc4628
 
 
 
 
 
 
 
 
 
 
 
 
9358586
34165ae
 
 
9358586
 
 
 
dbc4628
9358586
1d6fa11
 
9358586
1d6fa11
 
9358586
1d6fa11
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
import json
import os
from Chunker import CodeChunker

# Function to load JSON data
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        return json.load(file)

# Function to read code from an uploaded file
def read_code_from_file(uploaded_file):
    return uploaded_file.getvalue().decode("utf-8")

# Setup Streamlit page
st.set_page_config(page_title="Cintra Code Chunker", layout="wide")

# Assuming app.py and mock_codefiles.json are in the same directory
json_file_path = os.path.join(os.path.dirname(__file__), 'mock_codefiles.json')
code_files_data = load_json_file(json_file_path)

# Extract filenames and contents
code_files = list(code_files_data.keys())

# UI Elements
st.title('Cintra Code Chunker')

# Create two columns for file selection and file upload
col1, col2 = st.columns(2)

with col1:
    # File selection dropdown
    selected_file_name = st.selectbox("Select an example code file", code_files)

with col2:
    # File upload
    uploaded_file = st.file_uploader("Or upload your code file", type=['py', 'js', 'css'])

# Determine the content and file extension based on selection or upload
if uploaded_file is not None:
    code_content = read_code_from_file(uploaded_file)
    file_extension = uploaded_file.name.split('.')[-1]
else:
    code_content = code_files_data.get(selected_file_name, "")
    file_extension = selected_file_name.split('.')[-1] if selected_file_name else None

# Determine the language for syntax highlighting
def get_language_by_extension(file_extension):
    if file_extension in ['py', 'python']:
        return 'python'
    elif file_extension in ['js', 'jsx', 'javascript']:
        return 'javascript'
    elif file_extension == 'css':
        return 'css'
    else:
        return None  # Default to no syntax highlighting if extension is not recognized

language = get_language_by_extension(file_extension)

# User input for Token Chunk Size
token_chunk_size = st.number_input('Token Chunk Size Target', min_value=5, max_value=1000, value=25)

col1, col2 = st.columns(2)

with col1:
    st.subheader('Original File')
    st.code(code_content, language=language)

# Initialize the code chunker
code_chunker = CodeChunker(file_extension=file_extension)

# Chunk the code content
chunked_code_dict = code_chunker.chunk(code_content, token_chunk_size)

# Automatically display chunks without needing to select
with col2:
    st.subheader('Chunked Code')
    for chunk_key, chunk_code in chunked_code_dict.items():
        st.code(chunk_code, language=language)