jsonl-to-csv / app.py
rushankg's picture
Create app.py
1b98b85 verified
import streamlit as st
import pandas as pd
import os
def process_jsonl(file):
"""
Processes an uploaded JSONL file to expand the 'messages' column and saves the result as a CSV.
Args:
file: Uploaded JSONL file.
Returns:
str: Path to the expanded CSV file.
"""
# Read the JSONL file
data = [eval(line.strip()) for line in file.readlines()]
df = pd.DataFrame(data)
# Expand the 'messages' column if it exists
if 'messages' in df.columns:
messages_df = df['messages'].apply(pd.Series)
expanded_messages_df = pd.DataFrame()
for col in messages_df.columns:
if messages_df[col].apply(lambda x: isinstance(x, dict)).any():
expanded_columns = messages_df[col].apply(pd.Series)
expanded_columns = expanded_columns.add_prefix(f'message_{col}_')
expanded_messages_df = pd.concat([expanded_messages_df, expanded_columns], axis=1)
else:
expanded_messages_df = pd.concat([expanded_messages_df, messages_df[col].rename(f'message_{col}')], axis=1)
# Merge expanded columns back into the original DataFrame
df = pd.concat([df.drop(columns=['messages']), expanded_messages_df], axis=1)
# Save the expanded DataFrame to a CSV file
output_csv_path = "expanded_messages_data.csv"
df.to_csv(output_csv_path, index=False)
return output_csv_path
# Streamlit app
st.title("JSONL to CSV Converter with Message Expansion")
st.write("Upload a JSONL file, and download the processed CSV file with the `messages` column expanded.")
uploaded_file = st.file_uploader("Upload your JSONL file", type=["jsonl"])
if uploaded_file is not None:
st.success("File uploaded successfully!")
# Process the uploaded file
output_csv_path = process_jsonl(uploaded_file)
# Provide download link
st.download_button(
label="Download Expanded CSV",
data=open(output_csv_path, "rb"),
file_name="expanded_messages_data.csv",
mime="text/csv"
)
# Ensure cleanup of temporary files
if os.path.exists("expanded_messages_data.csv"):
os.remove("expanded_messages_data.csv")