Spaces:

jskinner215
/

llma_tabular_qa

Build error

App Files Files Community

jskinner215 commited on Sep 9, 2023

Commit

70c712a

1 Parent(s): f4bdea1

changing to add basic contextual options before analyzing data

Browse files

Files changed (1) hide show

app.py +23 -60

app.py CHANGED Viewed

@@ -1,68 +1,31 @@
-# Bring in deps
 import streamlit as st
-from langchain.llms import LlamaCpp
-from langchain.embeddings import LlamaCppEmbeddings
-from langchain.prompts import PromptTemplate
-from langchain.chains import LLMChain
-from langchain.document_loaders import CSVLoader  # Import CSVLoader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain.vectorstores import Chroma
 import pandas as pd
-# Customize the layout
-st.set_page_config(page_title="DOCAI", page_icon="🤖", layout="wide", )
-st.markdown(f"""
-            <style>
-            .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80");
-                     background-attachment: fixed;
-                     background-size: cover}}
-         </style>
-         """, unsafe_allow_html=True)
-# function for writing uploaded file in temp
-def write_csv_file(content, file_path):
-    try:
-        with open(file_path, 'w') as file:
-            file.write(content)
-        return True
-    except Exception as e:
-        print(f"Error occurred while writing the file: {e}")
-        return False
-# set prompt template
-prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
-{context}
-Question: {question}
-Answer:"""
-prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
-# initialize the LLM & Embeddings
-llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin")
-embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin")
-llm_chain = LLMChain(llm=llm, prompt=prompt)
 st.title("📄 Document Conversation 🤖")
-uploaded_file = st.file_uploader("Upload a CSV file", type="csv")  # Change type to csv
 if uploaded_file is not None:
-    content = uploaded_file.read().decode('utf-8')
-    file_path = "temp/file.csv"
-    write_csv_file(content, file_path)
-    loader = CSVLoader(file_path)  # Use CSVLoader
-    docs = loader.load()
-    text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
-    texts = text_splitter.split_documents(docs)
-    db = Chroma.from_documents(texts, embeddings)
-    st.success("File Loaded Successfully!!")
-    # Query through LLM
-    question = st.text_input("Ask something from the file", placeholder="Find something similar to: ....this.... in the text?", disabled=not uploaded_file,)
-    if question:
-        similar_doc = db.similarity_search(question, k=1)
-        context = similar_doc[0].page_content
-        query_llm = LLMChain(llm=llm, prompt=prompt)
-        response = query_llm.run({"context": context, "question": question})
-        st.write(response)

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import pandas as pd
+# Initialize the LLM from HuggingFace
+tokenizer = AutoTokenizer.from_pretrained("upstage/SOLAR-0-70b-16bit")
+model = AutoModelForCausalLM.from_pretrained("upstage/SOLAR-0-70b-16bit")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 st.title("📄 Document Conversation 🤖")
+uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
 if uploaded_file is not None:
+    df = pd.read_csv(uploaded_file)
+    st.write(f"Loaded CSV with {df.shape[0]} rows and {df.shape[1]} columns.")
+    st.write("Columns:", df.columns.tolist())
+    # Allow user to select columns to focus on
+    selected_columns = st.multiselect("Select columns to focus on:", df.columns.tolist())
+    if selected_columns:
+        st.write(df[selected_columns].head())  # Display first few rows of selected columns
+        # Generate a textual representation of the selected data
+        context = f"The selected data has columns: {', '.join(selected_columns)}. Here are the first few entries: {df[selected_columns].head().to_string(index=False)}"
+        # Query through LLM
+        question = st.text_input("Ask something about the selected data", placeholder="What is the average of ...?")
+        if question:
+            full_query = context + " " + question
+            response = pipe(full_query, max_length=250, do_sample=True, top_k=50)
+            st.write(response[0]['generated_text'])