izammohammed commited on
Commit
dd65c5d
·
verified ·
1 Parent(s): 773d205

added all of the files

Browse files
Files changed (6) hide show
  1. README.md +1 -13
  2. app.py +127 -1
  3. credentials.json +1 -0
  4. prompt.txt +34 -0
  5. requirements.txt +17 -0
  6. utils.py +11 -0
README.md CHANGED
@@ -1,13 +1 @@
1
- ---
2
- title: Geminsights
3
- emoji: 👀
4
- colorFrom: pink
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.31.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ [Original repository](https://github.com/izam-mohammed/GemInsights)
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1 +1,127 @@
1
- hola.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ from utils import save_json, load_json
5
+ from markdown import markdown
6
+ from utils import load_json
7
+ from autoviz import AutoViz_Class
8
+ import base64
9
+ from google.cloud import aiplatform
10
+ import base64
11
+ import vertexai
12
+ from vertexai.preview.generative_models import GenerativeModel, Part
13
+ import json
14
+
15
+ #setup cloud
16
+ aiplatform.init(
17
+ project = "geminsights",
18
+ location="us-central1"
19
+ )
20
+
21
+ json_file = json.loads(st.secrets["credentials"], strict=False)
22
+ with open("credentials.json", "w") as f:
23
+ json.dump(json_file, f, indent=2)
24
+
25
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials.json"
26
+
27
+
28
+ dataframe = None
29
+ st.title("GemInsights 📊")
30
+ st.caption('A gemini powered data analysis tool to get insights from data 🔥')
31
+ file = st.file_uploader(
32
+ "Pick a dataframe", type=["csv", "xlsx"], accept_multiple_files=False
33
+ )
34
+
35
+ if file is not None:
36
+ _, extension = os.path.splitext(file.name)
37
+ if extension == ".csv":
38
+ dataframe = pd.read_csv(file)
39
+ else:
40
+ dataframe = pd.read_excel(file)
41
+ st.write(dataframe.head())
42
+ st.write(f"updated a dataframe with shape {dataframe.shape}")
43
+
44
+ if file is not None:
45
+ text_input = st.text_input(
46
+ "Enter something about the data 👇",
47
+ label_visibility="visible",
48
+ disabled=False,
49
+ placeholder="eg:- This is a sales dataframe",
50
+ )
51
+
52
+ option = st.selectbox(
53
+ "Which is the target column ? 🎯",
54
+ tuple(list(dataframe.columns)),
55
+ index=None,
56
+ placeholder="Select one column in here",
57
+ )
58
+
59
+ def plot(dataframe, target):
60
+
61
+ AV = AutoViz_Class()
62
+
63
+ dft = AV.AutoViz(
64
+ "",
65
+ sep=",",
66
+ depVar=target,
67
+ dfte=dataframe,
68
+ header=0,
69
+ verbose=2,
70
+ lowess=False,
71
+ chart_format="jpg",
72
+ max_rows_analyzed=500,
73
+ max_cols_analyzed=20,
74
+ save_plot_dir="plots",
75
+ )
76
+
77
+ def prompt_make(dataframe, target, info):
78
+ images = []
79
+ image_dir = f"plots/{target}"
80
+ image_files = os.listdir(image_dir)
81
+ for image_file in image_files:
82
+ image_path = os.path.join(image_dir, image_file)
83
+ img = open(image_path, "rb").read()
84
+ img_bytes = Part.from_data(
85
+ base64.b64decode(base64.encodebytes(img)), mime_type="image/jpeg"
86
+ )
87
+ images.append(img_bytes)
88
+ with open("prompt.txt", "rb") as file:
89
+ data = file.read()
90
+ prompt = f"{data}\n Here are some of the informations related to the dataset - '{info}'"
91
+
92
+ # print(f"{prompt}")
93
+ # print(images)
94
+ return prompt, images
95
+
96
+ def generate_res(prompt, images):
97
+ print("prompting ...")
98
+ model = GenerativeModel("gemini-pro-vision")
99
+ responses = model.generate_content(
100
+ [prompt]+images,
101
+ generation_config={
102
+ "max_output_tokens": 2048,
103
+ "temperature": 0.4,
104
+ "top_p": 1,
105
+ "top_k": 32
106
+ },
107
+ )
108
+ return responses.text
109
+
110
+
111
+
112
+ def generate(dataframe, text_input, option):
113
+ plot(dataframe, option)
114
+ prompt, images = prompt_make(dataframe, option, text_input)
115
+ res = generate_res(prompt, images)
116
+ return res
117
+
118
+ if st.button("Get Insights", type="primary"):
119
+ st.write("generating insights ⏳ ... ")
120
+ # running the pipeline
121
+
122
+ response = generate(dataframe, text_input, option)
123
+ res = markdown(response)
124
+ st.markdown(res, unsafe_allow_html=True)
125
+
126
+ else:
127
+ st.write("")
credentials.json ADDED
@@ -0,0 +1 @@
 
 
1
+
prompt.txt ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Act as an intelligent data Analyst who communicates in simple English and clear messages to the clients
2
+ give maximum of 10 insights from the data
3
+
4
+ We build an end-to-end application that internally involves visualizing datasets, and we aim to extract valuable insights from these visualizations using llm. The insights generated should be beneficial to both companies and end-users. It's crucial that the model refrains from explicitly mentioning the images and provides information in a clear, detailed, and actionable manner.
5
+ give the insights by considering the following points
6
+
7
+ Here are important notes for output generation:
8
+ - Analyze the visual elements within the dataset using the visualizations.
9
+ - Identify and describe any prominent trends, patterns, or anomalies observed in the visual representations.
10
+ - Derive insights that are specifically relevant to the industry or domain associated with the dataset.
11
+ - Emphasize actionable information that could be of value to companies operating in that industry.
12
+ - Explore the possibility of making predictions based on the visual content.
13
+ - Formulate insights that would be valuable from an end-user perspective.
14
+ - Consider how the extracted information can enhance user experience, decision-making, or engagement.
15
+ - Do not mention the images directly in your responses. Focus on conveying insights without explicitly stating the visual content.
16
+ - Ensure that the insights are presented in a language suitable for technical and non-technical audiences. I encourage you to give clear, detailed explanations.
17
+ - Prioritize insights that are actionable and can contribute to informed decision-making for both businesses and end-users.
18
+ - If there are any recognized design patterns or industry standards applicable to the analysis, please incorporate and explain them.
19
+
20
+ Note to Model:
21
+ - Do not explicitly reference the images in your responses.
22
+ - Focus on providing clear, detailed, and actionable insights.
23
+ - Ensure that the insights are presented in a language suitable for technical and non-technical audiences.
24
+
25
+ Remember to adapt the prompt based on the specific details of your dataset and the objectives of your application.
26
+ Give important actionable insights rather than giving all. give as pointwise. don't mention the visualizations of plots in the output.
27
+ don't use too much statistics jargon either.
28
+
29
+ Output example:
30
+ if the visualization indicates customer churn data: give a response like this -
31
+ - The male customers are staying so long in the business
32
+ - You have to focus on the happiness rate of each customer
33
+ - Customers who are longer than 2 years tend to stay longer with the business
34
+ - Customers in the kid's products category are leaving too early.
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ google-generativeai
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ seaborn
6
+ python-box
7
+ pexpect
8
+ streamlit
9
+ dataframe_image
10
+ jinja2
11
+ PyYAML
12
+ autoviz
13
+ ipython
14
+ google-cloud-aiplatform
15
+ markdown
16
+ llama-index
17
+ openpyxl
utils.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from box import ConfigBox
3
+
4
+ def load_json(file):
5
+ with open(path) as f:
6
+ content = json.load(f)
7
+ return ConfigBox(content)
8
+
9
+ def save_json(file, content):
10
+ with open(path, "w") as f:
11
+ json.dump(data, f, indent=4)