Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import os
|
4 |
+
|
5 |
+
def process_jsonl(file):
|
6 |
+
"""
|
7 |
+
Processes an uploaded JSONL file to expand the 'messages' column and saves the result as a CSV.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
file: Uploaded JSONL file.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
str: Path to the expanded CSV file.
|
14 |
+
"""
|
15 |
+
# Read the JSONL file
|
16 |
+
data = [eval(line.strip()) for line in file.readlines()]
|
17 |
+
df = pd.DataFrame(data)
|
18 |
+
|
19 |
+
# Expand the 'messages' column if it exists
|
20 |
+
if 'messages' in df.columns:
|
21 |
+
messages_df = df['messages'].apply(pd.Series)
|
22 |
+
expanded_messages_df = pd.DataFrame()
|
23 |
+
|
24 |
+
for col in messages_df.columns:
|
25 |
+
if messages_df[col].apply(lambda x: isinstance(x, dict)).any():
|
26 |
+
expanded_columns = messages_df[col].apply(pd.Series)
|
27 |
+
expanded_columns = expanded_columns.add_prefix(f'message_{col}_')
|
28 |
+
expanded_messages_df = pd.concat([expanded_messages_df, expanded_columns], axis=1)
|
29 |
+
else:
|
30 |
+
expanded_messages_df = pd.concat([expanded_messages_df, messages_df[col].rename(f'message_{col}')], axis=1)
|
31 |
+
|
32 |
+
# Merge expanded columns back into the original DataFrame
|
33 |
+
df = pd.concat([df.drop(columns=['messages']), expanded_messages_df], axis=1)
|
34 |
+
|
35 |
+
# Save the expanded DataFrame to a CSV file
|
36 |
+
output_csv_path = "expanded_messages_data.csv"
|
37 |
+
df.to_csv(output_csv_path, index=False)
|
38 |
+
|
39 |
+
return output_csv_path
|
40 |
+
|
41 |
+
# Streamlit app
|
42 |
+
st.title("JSONL to CSV Converter with Message Expansion")
|
43 |
+
|
44 |
+
st.write("Upload a JSONL file, and download the processed CSV file with the `messages` column expanded.")
|
45 |
+
|
46 |
+
uploaded_file = st.file_uploader("Upload your JSONL file", type=["jsonl"])
|
47 |
+
|
48 |
+
if uploaded_file is not None:
|
49 |
+
st.success("File uploaded successfully!")
|
50 |
+
|
51 |
+
# Process the uploaded file
|
52 |
+
output_csv_path = process_jsonl(uploaded_file)
|
53 |
+
|
54 |
+
# Provide download link
|
55 |
+
st.download_button(
|
56 |
+
label="Download Expanded CSV",
|
57 |
+
data=open(output_csv_path, "rb"),
|
58 |
+
file_name="expanded_messages_data.csv",
|
59 |
+
mime="text/csv"
|
60 |
+
)
|
61 |
+
|
62 |
+
# Ensure cleanup of temporary files
|
63 |
+
if os.path.exists("expanded_messages_data.csv"):
|
64 |
+
os.remove("expanded_messages_data.csv")
|