File size: 2,605 Bytes
73588d1
428777e
 
 
f41d972
73588d1
428777e
 
 
 
 
 
 
f41d972
428777e
 
 
 
 
f41d972
 
73588d1
57da852
 
 
 
 
 
 
 
 
 
f41d972
 
73588d1
db2ee0f
f41d972
 
 
73588d1
 
 
428777e
f41d972
477d34f
f41d972
477d34f
 
f41d972
477d34f
 
 
73588d1
477d34f
 
73588d1
477d34f
 
 
 
 
f41d972
477d34f
f41d972
477d34f
 
 
 
 
 
a23421d
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
""" this app is streamlit app for the current project hosted on HuggingFace spaces """

import streamlit as st
from openai_chat_completion import OpenAIChatCompletions
from dataclean_hf import main
from util import json_to_dict #, join_dicts

st.title("Kaleidoscope Data - Data Cleaning LLM App")

st.write("This app is a demo of the LLM model for data cleaning. It is a work in progress and is not yet ready for production use.")

# text box or csv upload
text_input = st.text_input("Enter text", "")
csv_file = st.file_uploader("Upload CSV", type=['csv'])

# button to run data cleaning API on text via c class in openai_chat_completion.py
if st.button("Run Data Cleaning API"):
    
    # if text_input is not empty, run data cleaning API on text_input
    if text_input:

        MODEL = "gpt-4" # "gpt-3.5-turbo"
        try:
            with open('prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f:
                sys_mes = f.read()
                f.close()
            
        except FileNotFoundError:
            with open('../prompts/gpt4-system-message2.txt', 'r', encoding='utf8') as f:
                sys_mes = f.read()
                f.close()
                
        # instantiate OpenAIChatCompletions class
        # get response from openai_chat_completion method
        chat = OpenAIChatCompletions(model=MODEL, system_message=sys_mes)
        response = chat.openai_chat_completion(text_input, n_shot=None)
        
        
        # display response
        # st.write(response['choices'][0]['message']['content'])
        response_content = response['choices'][0]['message']['content']
        st.write(json_to_dict(response_content))
    
    # if csv_file is not empty, run data cleaning API on csv_file
    # elif csv_file:
        
    #     # run data cleaning API on csv_file    
    #     output_df = main(csv_file)

    #     @st.cache_data
    #     def convert_df(df):
    #         """coverting dataframe to csv

    #         Args:
    #             df (_type_): pd.DataFrame

    #         Returns:
    #             _type_: csv
    #         """
    #         # IMPORTANT: Cache the conversion to prevent computation on every rerun
    #         return df.to_csv().encode('utf-8')

    #     csv = convert_df(output_df)

    #     st.download_button(
    #         label="Download data as CSV",
    #         data=csv,
    #         file_name='cleaned_df.csv',
    #         mime='text/csv',
    #     )
    
    # if both text_input and csv_file are empty, display error message
    else:
        st.write("Please enter text or upload a CSV file.")