Spaces:
Build error
Build error
Commit
Β·
70c712a
1
Parent(s):
f4bdea1
changing to add basic contextual options before analyzing data
Browse files
app.py
CHANGED
@@ -1,68 +1,31 @@
|
|
1 |
-
# Bring in deps
|
2 |
import streamlit as st
|
3 |
-
from
|
4 |
-
from langchain.embeddings import LlamaCppEmbeddings
|
5 |
-
from langchain.prompts import PromptTemplate
|
6 |
-
from langchain.chains import LLMChain
|
7 |
-
from langchain.document_loaders import CSVLoader # Import CSVLoader
|
8 |
-
from langchain.text_splitter import CharacterTextSplitter
|
9 |
-
from langchain.vectorstores import Chroma
|
10 |
import pandas as pd
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
.stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80");
|
17 |
-
background-attachment: fixed;
|
18 |
-
background-size: cover}}
|
19 |
-
</style>
|
20 |
-
""", unsafe_allow_html=True)
|
21 |
-
|
22 |
-
# function for writing uploaded file in temp
|
23 |
-
def write_csv_file(content, file_path):
|
24 |
-
try:
|
25 |
-
with open(file_path, 'w') as file:
|
26 |
-
file.write(content)
|
27 |
-
return True
|
28 |
-
except Exception as e:
|
29 |
-
print(f"Error occurred while writing the file: {e}")
|
30 |
-
return False
|
31 |
-
|
32 |
-
# set prompt template
|
33 |
-
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
34 |
-
|
35 |
-
{context}
|
36 |
-
|
37 |
-
Question: {question}
|
38 |
-
Answer:"""
|
39 |
-
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
40 |
-
|
41 |
-
# initialize the LLM & Embeddings
|
42 |
-
llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin")
|
43 |
-
embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin")
|
44 |
-
llm_chain = LLMChain(llm=llm, prompt=prompt)
|
45 |
|
46 |
st.title("π Document Conversation π€")
|
47 |
-
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
|
48 |
|
49 |
if uploaded_file is not None:
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
loader = CSVLoader(file_path) # Use CSVLoader
|
55 |
-
docs = loader.load()
|
56 |
-
text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
|
57 |
-
texts = text_splitter.split_documents(docs)
|
58 |
-
db = Chroma.from_documents(texts, embeddings)
|
59 |
-
st.success("File Loaded Successfully!!")
|
60 |
|
61 |
-
#
|
62 |
-
|
63 |
-
if
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import pandas as pd
|
4 |
|
5 |
+
# Initialize the LLM from HuggingFace
|
6 |
+
tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
|
7 |
+
model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
|
8 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
st.title("π Document Conversation π€")
|
11 |
+
uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
|
12 |
|
13 |
if uploaded_file is not None:
|
14 |
+
df = pd.read_csv(uploaded_file)
|
15 |
+
st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
|
16 |
+
st.write("Columns:", df.columns.tolist())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
# Allow user to select columns to focus on
|
19 |
+
selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
|
20 |
+
if selected_columns:
|
21 |
+
st.write(df[selected_columns].head()) # Display first few rows of selected columns
|
22 |
+
|
23 |
+
# Generate a textual representation of the selected data
|
24 |
+
context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
|
25 |
+
|
26 |
+
# Query through LLM
|
27 |
+
question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")
|
28 |
+
if question:
|
29 |
+
full_query = context + " " + question
|
30 |
+
response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
|
31 |
+
st.write(response[0]['generated_text'])
|