import streamlit as st import pandas as pd import os def process_jsonl(file): """ Processes an uploaded JSONL file to expand the 'messages' column and saves the result as a CSV. Args: file: Uploaded JSONL file. Returns: str: Path to the expanded CSV file. """ # Read the JSONL file data = [eval(line.strip()) for line in file.readlines()] df = pd.DataFrame(data) # Expand the 'messages' column if it exists if 'messages' in df.columns: messages_df = df['messages'].apply(pd.Series) expanded_messages_df = pd.DataFrame() for col in messages_df.columns: if messages_df[col].apply(lambda x: isinstance(x, dict)).any(): expanded_columns = messages_df[col].apply(pd.Series) expanded_columns = expanded_columns.add_prefix(f'message_{col}_') expanded_messages_df = pd.concat([expanded_messages_df, expanded_columns], axis=1) else: expanded_messages_df = pd.concat([expanded_messages_df, messages_df[col].rename(f'message_{col}')], axis=1) # Merge expanded columns back into the original DataFrame df = pd.concat([df.drop(columns=['messages']), expanded_messages_df], axis=1) # Save the expanded DataFrame to a CSV file output_csv_path = "expanded_messages_data.csv" df.to_csv(output_csv_path, index=False) return output_csv_path # Streamlit app st.title("JSONL to CSV Converter with Message Expansion") st.write("Upload a JSONL file, and download the processed CSV file with the `messages` column expanded.") uploaded_file = st.file_uploader("Upload your JSONL file", type=["jsonl"]) if uploaded_file is not None: st.success("File uploaded successfully!") # Process the uploaded file output_csv_path = process_jsonl(uploaded_file) # Provide download link st.download_button( label="Download Expanded CSV", data=open(output_csv_path, "rb"), file_name="expanded_messages_data.csv", mime="text/csv" ) # Ensure cleanup of temporary files if os.path.exists("expanded_messages_data.csv"): os.remove("expanded_messages_data.csv")