rushankg commited on
Commit
1b98b85
·
verified ·
1 Parent(s): 27ab21c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+
5
+ def process_jsonl(file):
6
+ """
7
+ Processes an uploaded JSONL file to expand the 'messages' column and saves the result as a CSV.
8
+
9
+ Args:
10
+ file: Uploaded JSONL file.
11
+
12
+ Returns:
13
+ str: Path to the expanded CSV file.
14
+ """
15
+ # Read the JSONL file
16
+ data = [eval(line.strip()) for line in file.readlines()]
17
+ df = pd.DataFrame(data)
18
+
19
+ # Expand the 'messages' column if it exists
20
+ if 'messages' in df.columns:
21
+ messages_df = df['messages'].apply(pd.Series)
22
+ expanded_messages_df = pd.DataFrame()
23
+
24
+ for col in messages_df.columns:
25
+ if messages_df[col].apply(lambda x: isinstance(x, dict)).any():
26
+ expanded_columns = messages_df[col].apply(pd.Series)
27
+ expanded_columns = expanded_columns.add_prefix(f'message_{col}_')
28
+ expanded_messages_df = pd.concat([expanded_messages_df, expanded_columns], axis=1)
29
+ else:
30
+ expanded_messages_df = pd.concat([expanded_messages_df, messages_df[col].rename(f'message_{col}')], axis=1)
31
+
32
+ # Merge expanded columns back into the original DataFrame
33
+ df = pd.concat([df.drop(columns=['messages']), expanded_messages_df], axis=1)
34
+
35
+ # Save the expanded DataFrame to a CSV file
36
+ output_csv_path = "expanded_messages_data.csv"
37
+ df.to_csv(output_csv_path, index=False)
38
+
39
+ return output_csv_path
40
+
41
+ # Streamlit app
42
+ st.title("JSONL to CSV Converter with Message Expansion")
43
+
44
+ st.write("Upload a JSONL file, and download the processed CSV file with the `messages` column expanded.")
45
+
46
+ uploaded_file = st.file_uploader("Upload your JSONL file", type=["jsonl"])
47
+
48
+ if uploaded_file is not None:
49
+ st.success("File uploaded successfully!")
50
+
51
+ # Process the uploaded file
52
+ output_csv_path = process_jsonl(uploaded_file)
53
+
54
+ # Provide download link
55
+ st.download_button(
56
+ label="Download Expanded CSV",
57
+ data=open(output_csv_path, "rb"),
58
+ file_name="expanded_messages_data.csv",
59
+ mime="text/csv"
60
+ )
61
+
62
+ # Ensure cleanup of temporary files
63
+ if os.path.exists("expanded_messages_data.csv"):
64
+ os.remove("expanded_messages_data.csv")