File size: 1,907 Bytes
428777e
 
 
 
f41d972
428777e
 
 
 
 
 
 
f41d972
428777e
 
 
 
 
f41d972
 
 
 
 
 
 
 
 
 
 
 
 
428777e
f41d972
 
 
 
 
 
a266b98
f41d972
 
 
 
 
 
 
 
 
 
 
 
a23421d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# this app is streamlit app for the current project hosted on huggingface spaces

import streamlit as st
from openai_chat_completion import OpenAIChatCompletions
from dataclean_hf import main

st.title("Kaleidoscope Data - Data Cleaning LLM App")

st.write("This app is a demo of the LLM model for data cleaning. It is a work in progress and is not yet ready for production use.")

# text box or csv upload
text_input = st.text_input("Enter text", "")
csv_file = st.file_uploader("Upload CSV", type=['csv'])

# button to run data cleaning API on text via c class in openai_chat_completion.py
if st.button("Run Data Cleaning API"):
    
    # if text_input is not empty, run data cleaning API on text_input
    if text_input:

        model = "gpt-4" # "gpt-3.5-turbo"
        sys_mes = "prompts/gpt4-system-message.txt"
        
        # instantiate OpenAIChatCompletions class
        # get response from openai_chat_completion method
        chat = OpenAIChatCompletions(model=model, system_message=sys_mes)
        response = chat.openai_chat_completion(text_input, n_shot=5)
        
        
        # display response
        st.write(response['choices'][0]['message']['content'])
    
    # if csv_file is not empty, run data cleaning API on csv_file
    elif csv_file:
        
        # run data cleaning API on csv_file    
        output_df = main(csv_file)

        @st.cache_data
        def convert_df(df):
            # IMPORTANT: Cache the conversion to prevent computation on every rerun
            return df.to_csv().encode('utf-8')

        csv = convert_df(output_df)

        st.download_button(
            label="Download data as CSV",
            data=csv,
            file_name='cleaned_df.csv',
            mime='text/csv',
        )
    
    # if both text_input and csv_file are empty, display error message
    else:
        st.write("Please enter text or upload a CSV file.")