abdfajar707 commited on
Commit
23599de
Β·
verified Β·
1 Parent(s): 3a37c48

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +51 -14
  2. data.py +18 -0
  3. requirements.txt +3 -0
  4. streamlit_app.py +75 -0
README.md CHANGED
@@ -1,14 +1,51 @@
1
- ---
2
- title: DialogData
3
- emoji: 🐨
4
- colorFrom: green
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.42.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- short_description: Analisis data menggunakan PandasAI
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Chat with your Dataset
2
+
3
+ This project demonstrates a web-based application to query a dataset through natural language.
4
+
5
+ ![](./assets/image.png)
6
+
7
+ For this purpose, it uses:
8
+
9
+ - [Streamlit](https://streamlit.io/) to build a data science web app
10
+ - [Pandasai](https://pandas-ai.com/) to generate Pandas code from a query through [OpenAI GPT-3.5](https://platform.openai.com/docs/api-reference)
11
+
12
+ ## Download dataset
13
+
14
+ Download the [dataset](https://github.com/Fraud-Detection-Handbook/simulated-data-transformed) into the `data` folder at the root of the project.
15
+
16
+ ## Run the project
17
+
18
+ If you don't have a Python environment available, you can use the [conda package manager](https://docs.conda.io/projects/conda/en/latest/index.html) which comes with the [Anaconda distribution](https://www.anaconda.com/download) to manage a clean Python environment.
19
+
20
+ Create a new environment and activate it:
21
+
22
+ ```sh
23
+ conda create -n streamlit-pandasai python=3.9
24
+ conda activate streamlit-pandasai
25
+ ```
26
+
27
+ Install Python dependencies in the activate Python environment:
28
+
29
+ ```sh
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ Create a [new API key](https://platform.openai.com/account/api-keys) and set it to the `OPENAI_API_KEY` environment variable beforehand.
34
+
35
+ On Windows:
36
+
37
+ ```bash
38
+ set OPENAI_API_KEY="sk-..."
39
+ ```
40
+
41
+ On Unix:
42
+
43
+ ```sh
44
+ export OPENAI_API_KEY="sk-..."
45
+ ```
46
+
47
+ Run the Streamlit project:
48
+
49
+ ```sh
50
+ streamlit run streamlit_app.py
51
+ ```
data.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+ import streamlit as st
6
+
7
+
8
+ def load_file(path: str) -> pd.DataFrame:
9
+ with open(path, "rb") as f:
10
+ dataset = pickle.load(f)
11
+ return dataset
12
+
13
+
14
+ @st.cache_data
15
+ def load_data(folder: str) -> pd.DataFrame:
16
+ all_datasets = [load_file(file) for file in Path(folder).iterdir()]
17
+ df = pd.concat(all_datasets)
18
+ return df
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ openai
2
+ pandasai==1.4.10
3
+ streamlit
streamlit_app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from pandasai import SmartDataframe
4
+ from pandasai.callbacks import BaseCallback
5
+ from pandasai.llm import OpenAI
6
+ from pandasai.responses.response_parser import ResponseParser
7
+ from data import load_data
8
+
9
+ # Fungsi untuk mendapatkan daftar folder dalam direktori
10
+ def list_folders(directory):
11
+ return [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]
12
+
13
+ class StreamlitCallback(BaseCallback):
14
+ def __init__(self, container) -> None:
15
+ """Initialize callback handler."""
16
+ self.container = container
17
+
18
+ def on_code(self, response: str):
19
+ self.container.code(response)
20
+
21
+ class StreamlitResponse(ResponseParser):
22
+ def __init__(self, context) -> None:
23
+ super().__init__(context)
24
+
25
+ def format_dataframe(self, result):
26
+ st.dataframe(result["value"])
27
+ return
28
+
29
+ def format_plot(self, result):
30
+ st.image(result["value"])
31
+ return
32
+
33
+ def format_other(self, result):
34
+ st.write(result["value"])
35
+ return
36
+
37
+ # Tampilkan judul aplikasi
38
+ st.write("# Chat with Tabular Dataframe πŸ¦™")
39
+
40
+ # Menentukan direktori asal
41
+ root_dir = "./data"
42
+
43
+ # Mendapatkan daftar folder
44
+ folders = list_folders(root_dir)
45
+
46
+ # Menampilkan kotak pemilihan folder
47
+ selected_folder = st.selectbox("Pilih folder data", folders)
48
+
49
+ # Memuat data berdasarkan folder yang dipilih
50
+ if selected_folder:
51
+ df = load_data(os.path.join(root_dir, selected_folder))
52
+
53
+ # Menampilkan preview data
54
+ with st.expander("πŸ”Ž Dataframe Preview"):
55
+ st.write(df.tail(3))
56
+
57
+ # Membuat input untuk query
58
+ query = st.text_area("πŸ—£οΈ Chat with Dataframe")
59
+ container = st.container()
60
+
61
+ # Eksekusi query jika ada input
62
+ if query:
63
+ llm = OpenAI(api_token=os.environ["BIT_OPENAI_API_KEY"])
64
+ query_engine = SmartDataframe(
65
+ df,
66
+ config={
67
+ "llm": llm,
68
+ "response_parser": StreamlitResponse,
69
+ # "callback": StreamlitCallback(container),
70
+ },
71
+ )
72
+
73
+ answer = query_engine.chat(query)
74
+ else:
75
+ st.write("Silakan pilih folder untuk memuat data.")