Joe Shamon commited on
Commit
5463b34
·
unverified ·
2 Parent(s): 5fb15bd 19c8428

Merge pull request #2 from CintraAI/enhancement/updated-header

Browse files
Files changed (1) hide show
  1. app.py +13 -17
app.py CHANGED
@@ -3,6 +3,9 @@ import json
3
  import os
4
  from Chunker import CodeChunker
5
 
 
 
 
6
  # Function to load JSON data
7
  def load_json_file(file_path):
8
  with open(file_path, 'r') as file:
@@ -12,29 +15,24 @@ def load_json_file(file_path):
12
  def read_code_from_file(uploaded_file):
13
  return uploaded_file.getvalue().decode("utf-8")
14
 
15
- # Setup Streamlit page
16
- st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
17
 
18
- # Assuming app.py and mock_codefiles.json are in the same directory
19
  json_file_path = os.path.join(os.path.dirname(__file__), 'mock_codefiles.json')
20
  code_files_data = load_json_file(json_file_path)
21
 
22
  # Extract filenames and contents
23
  code_files = list(code_files_data.keys())
24
 
25
- # UI Elements
26
  st.title('Cintra Code Chunker')
27
 
28
- # Create two columns for file selection and file upload
29
- col1, col2 = st.columns(2)
30
-
31
- with col1:
32
  # File selection dropdown
33
  selected_file_name = st.selectbox("Select an example code file", code_files)
34
 
35
- with col2:
36
  # File upload
37
- uploaded_file = st.file_uploader("Or upload your code file", type=['py', 'js', 'css'])
38
 
39
  # Determine the content and file extension based on selection or upload
40
  if uploaded_file is not None:
@@ -53,16 +51,15 @@ def get_language_by_extension(file_extension):
53
  elif file_extension == 'css':
54
  return 'css'
55
  else:
56
- return None # Default to no syntax highlighting if extension is not recognized
57
 
58
  language = get_language_by_extension(file_extension)
59
 
60
- # User input for Token Chunk Size
61
- token_chunk_size = st.number_input('Token Chunk Size Target', min_value=5, max_value=1000, value=25)
62
 
63
- col1, col2 = st.columns(2)
64
 
65
- with col1:
66
  st.subheader('Original File')
67
  st.code(code_content, language=language)
68
 
@@ -72,8 +69,7 @@ code_chunker = CodeChunker(file_extension=file_extension)
72
  # Chunk the code content
73
  chunked_code_dict = code_chunker.chunk(code_content, token_chunk_size)
74
 
75
- # Automatically display chunks without needing to select
76
- with col2:
77
  st.subheader('Chunked Code')
78
  for chunk_key, chunk_code in chunked_code_dict.items():
79
  st.code(chunk_code, language=language)
 
3
  import os
4
  from Chunker import CodeChunker
5
 
6
+ # Set Streamlit page config at the very beginning
7
+ st.set_page_config(page_title="Cintra Code Chunker", layout="wide")
8
+
9
  # Function to load JSON data
10
  def load_json_file(file_path):
11
  with open(file_path, 'r') as file:
 
15
  def read_code_from_file(uploaded_file):
16
  return uploaded_file.getvalue().decode("utf-8")
17
 
18
+ st.link_button('Contribute on GitHub', 'https://github.com/CintraAI/code-chunker', help=None, type="secondary", disabled=False, use_container_width=False)
 
19
 
 
20
  json_file_path = os.path.join(os.path.dirname(__file__), 'mock_codefiles.json')
21
  code_files_data = load_json_file(json_file_path)
22
 
23
  # Extract filenames and contents
24
  code_files = list(code_files_data.keys())
25
 
 
26
  st.title('Cintra Code Chunker')
27
 
28
+ selection_col, upload_col = st.columns(2)
29
+ with selection_col:
 
 
30
  # File selection dropdown
31
  selected_file_name = st.selectbox("Select an example code file", code_files)
32
 
33
+ with upload_col:
34
  # File upload
35
+ uploaded_file = st.file_uploader("Or upload your code file", type=['py', 'js', 'css', 'jsx'])
36
 
37
  # Determine the content and file extension based on selection or upload
38
  if uploaded_file is not None:
 
51
  elif file_extension == 'css':
52
  return 'css'
53
  else:
54
+ return None
55
 
56
  language = get_language_by_extension(file_extension)
57
 
58
+ token_chunk_size = st.number_input('Chunk Size Target Measured in Tokens (tiktoken, gpt-4)', min_value=5, max_value=1000, value=25)
 
59
 
60
+ original_col, chunked_col = st.columns(2)
61
 
62
+ with original_col:
63
  st.subheader('Original File')
64
  st.code(code_content, language=language)
65
 
 
69
  # Chunk the code content
70
  chunked_code_dict = code_chunker.chunk(code_content, token_chunk_size)
71
 
72
+ with chunked_col:
 
73
  st.subheader('Chunked Code')
74
  for chunk_key, chunk_code in chunked_code_dict.items():
75
  st.code(chunk_code, language=language)