Spaces:
Running
Running
File size: 3,742 Bytes
e0d64af fa9b7c5 d08f679 e0d64af d08f679 e0d64af d08f679 e0d64af d08f679 e0d64af d08f679 e0d64af d08f679 e0d64af fa9b7c5 e0d64af fa9b7c5 e0d64af fa9b7c5 e0d64af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import streamlit as st
import pandas as pd
import openai
import torch
import matplotlib.pyplot as plt
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from dotenv import load_dotenv
import anthropic
# Load environment variables
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
st.title("Excel Q&A Chatbot π")
# Model Selection
model_choice = st.selectbox("Select LLM Model", ["OpenAI GPT-3.5", "Claude 3 Haiku", "Mistral-7B"])
# Load appropriate model based on selection
if model_choice == "Mistral-7B":
model_name = "mistralai/Mistral-7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
def ask_mistral(query):
inputs = tokenizer(query, return_tensors="pt").to("cuda")
output = model.generate(**inputs)
return tokenizer.decode(output[0])
elif model_choice == "Claude 3 Haiku":
client = anthropic.Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
def ask_claude(query):
response = client.messages.create(
model="claude-3-haiku",
messages=[{"role": "user", "content": query}]
)
return response.content
else:
client = openai.OpenAI()
def ask_gpt(query):
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": query}]
)
return response.choices[0].message.content
# File Upload
uploaded_file = st.file_uploader("Upload an Excel file", type=["csv", "xlsx"])
if uploaded_file is not None:
file_extension = uploaded_file.name.split(".")[-1].lower()
df = pd.read_csv(uploaded_file) if file_extension == "csv" else pd.read_excel(uploaded_file)
st.write("### Preview of Data:")
st.write(df.head())
# Extract metadata
column_names = df.columns.tolist()
data_types = df.dtypes.apply(lambda x: x.name).to_dict()
missing_values = df.isnull().sum().to_dict()
# Display metadata
st.write("### Column Details:")
st.write(pd.DataFrame({"Column": column_names, "Type": data_types.values(), "Missing Values": missing_values.values()}))
# User Query
query = st.text_input("Ask a question about this data:")
if st.button("Submit Query"):
if query:
# Interpret the query using selected LLM
if model_choice == "Mistral-7B":
parsed_query = ask_mistral(f"Convert this question into a Pandas operation: {query}")
elif model_choice == "Claude 3 Haiku":
parsed_query = ask_claude(f"Convert this question into a Pandas operation: {query}")
else:
parsed_query = ask_gpt(f"Convert this question into a Pandas operation: {query}")
# Execute the query
try:
result = eval(f"df.{parsed_query}")
st.write("### Result:")
st.write(result if isinstance(result, pd.DataFrame) else str(result))
# If numerical data, show a visualization
if isinstance(result, pd.Series) and result.dtype in ["int64", "float64"]:
fig, ax = plt.subplots()
result.plot(kind="bar", ax=ax)
st.pyplot(fig)
except Exception as e:
st.error(f"Error executing query: {str(e)}")
# Memory for context retention
if "query_history" not in st.session_state:
st.session_state.query_history = []
st.session_state.query_history.append(query)
|