Update app.py
Browse files
app.py
CHANGED
@@ -3,51 +3,35 @@ import gc
|
|
3 |
import tempfile
|
4 |
import uuid
|
5 |
import pandas as pd
|
6 |
-
|
7 |
-
from llama_index.llms.cerebras import Cerebras
|
8 |
from llama_index.core import Settings
|
|
|
|
|
9 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
10 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
|
11 |
from llama_index.readers.docling import DoclingReader
|
12 |
from llama_index.core.node_parser import MarkdownNodeParser
|
13 |
-
|
14 |
-
import streamlit as st
|
15 |
|
16 |
if "id" not in st.session_state:
|
17 |
st.session_state.id = uuid.uuid4()
|
18 |
st.session_state.file_cache = {}
|
19 |
|
20 |
session_id = st.session_state.id
|
|
|
21 |
|
22 |
-
#
|
23 |
-
api_key = os.getenv("CEREBRAS_API_KEY")
|
24 |
-
if not api_key:
|
25 |
-
raise ValueError("CEREBRAS_API_KEY is not set in Hugging Face Secrets.")
|
26 |
-
else:
|
27 |
-
print("Cerebras API key loaded successfully.")
|
28 |
-
|
29 |
-
@st.cache_resource
|
30 |
def load_llm():
|
31 |
-
#
|
32 |
-
os.
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
def query_cerebras(client, prompt, max_tokens=1024, temperature=0.2, top_p=1):
|
38 |
-
# Query Cerebras model
|
39 |
-
stream = client.chat.completions.create(
|
40 |
-
messages=[{"role": "user", "content": prompt}],
|
41 |
-
model="llama-3.3-70b",
|
42 |
-
stream=True,
|
43 |
-
max_completion_tokens=max_tokens,
|
44 |
-
temperature=temperature,
|
45 |
-
top_p=top_p,
|
46 |
-
)
|
47 |
-
return stream
|
48 |
|
49 |
def reset_chat():
|
50 |
st.session_state.messages = []
|
|
|
51 |
gc.collect()
|
52 |
|
53 |
def display_excel(file):
|
@@ -87,24 +71,42 @@ with st.sidebar:
|
|
87 |
|
88 |
docs = loader.load_data()
|
89 |
|
90 |
-
#
|
|
|
91 |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
|
92 |
-
Settings.embed_model = embed_model
|
93 |
-
|
94 |
# Creating an index over loaded data
|
|
|
95 |
node_parser = MarkdownNodeParser()
|
96 |
index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
|
97 |
|
98 |
-
#
|
|
|
99 |
query_engine = index.as_query_engine(streaming=True)
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
st.session_state.file_cache[file_key] = query_engine
|
102 |
else:
|
103 |
query_engine = st.session_state.file_cache[file_key]
|
104 |
|
105 |
-
# Inform the user that the file is processed and
|
106 |
st.success("Ready to Chat!")
|
107 |
display_excel(uploaded_file)
|
|
|
108 |
except Exception as e:
|
109 |
st.error(f"An error occurred: {e}")
|
110 |
st.stop()
|
@@ -112,7 +114,7 @@ with st.sidebar:
|
|
112 |
col1, col2 = st.columns([6, 1])
|
113 |
|
114 |
with col1:
|
115 |
-
st.header(f"RAG over Excel using
|
116 |
|
117 |
with col2:
|
118 |
st.button("Clear ↺", on_click=reset_chat)
|
@@ -121,9 +123,6 @@ with col2:
|
|
121 |
if "messages" not in st.session_state:
|
122 |
reset_chat()
|
123 |
|
124 |
-
# Initialize LLM client
|
125 |
-
client = load_llm()
|
126 |
-
|
127 |
# Display chat messages from history on app rerun
|
128 |
for message in st.session_state.messages:
|
129 |
with st.chat_message(message["role"]):
|
@@ -142,13 +141,14 @@ if prompt := st.chat_input("What's up?"):
|
|
142 |
message_placeholder = st.empty()
|
143 |
full_response = ""
|
144 |
|
145 |
-
#
|
146 |
-
|
|
|
|
|
147 |
|
148 |
-
|
149 |
-
for
|
150 |
-
|
151 |
-
full_response += content
|
152 |
message_placeholder.markdown(full_response + "▌")
|
153 |
|
154 |
message_placeholder.markdown(full_response)
|
|
|
3 |
import tempfile
|
4 |
import uuid
|
5 |
import pandas as pd
|
6 |
+
import streamlit as st
|
|
|
7 |
from llama_index.core import Settings
|
8 |
+
from llama_index.llms.cerebras import Cerebras
|
9 |
+
from llama_index.core import PromptTemplate
|
10 |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
11 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
|
12 |
from llama_index.readers.docling import DoclingReader
|
13 |
from llama_index.core.node_parser import MarkdownNodeParser
|
14 |
+
from llama_index.core.llms import ChatMessage
|
|
|
15 |
|
16 |
if "id" not in st.session_state:
|
17 |
st.session_state.id = uuid.uuid4()
|
18 |
st.session_state.file_cache = {}
|
19 |
|
20 |
session_id = st.session_state.id
|
21 |
+
client = None
|
22 |
|
23 |
+
# Initialize Cerebras LLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def load_llm():
|
25 |
+
# Replace with your API Key and model
|
26 |
+
api_key = os.getenv("CEREBRAS_API_KEY")
|
27 |
+
if not api_key:
|
28 |
+
api_key = st.text_input("Enter your Cerebras API key:")
|
29 |
+
llm = Cerebras(model="llama-3.3-70b", api_key=api_key)
|
30 |
+
return llm
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def reset_chat():
|
33 |
st.session_state.messages = []
|
34 |
+
st.session_state.context = None
|
35 |
gc.collect()
|
36 |
|
37 |
def display_excel(file):
|
|
|
71 |
|
72 |
docs = loader.load_data()
|
73 |
|
74 |
+
# setup llm & embedding model
|
75 |
+
llm = load_llm() # Load the Cerebras model
|
76 |
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
|
|
|
|
|
77 |
# Creating an index over loaded data
|
78 |
+
Settings.embed_model = embed_model
|
79 |
node_parser = MarkdownNodeParser()
|
80 |
index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
|
81 |
|
82 |
+
# Create the query engine
|
83 |
+
Settings.llm = llm
|
84 |
query_engine = index.as_query_engine(streaming=True)
|
85 |
|
86 |
+
# Customise prompt template
|
87 |
+
qa_prompt_tmpl_str = (
|
88 |
+
"Context information is below.\n"
|
89 |
+
"---------------------\n"
|
90 |
+
"{context_str}\n"
|
91 |
+
"---------------------\n"
|
92 |
+
"Given the context information above I want you to think step by step to answer the query in a highly precise and crisp manner focused on the final answer, in case you don't know the answer say 'I don't know!'.\n"
|
93 |
+
"Query: {query_str}\n"
|
94 |
+
"Answer: "
|
95 |
+
)
|
96 |
+
qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
|
97 |
+
|
98 |
+
query_engine.update_prompts(
|
99 |
+
{"response_synthesizer:text_qa_template": qa_prompt_tmpl}
|
100 |
+
)
|
101 |
+
|
102 |
st.session_state.file_cache[file_key] = query_engine
|
103 |
else:
|
104 |
query_engine = st.session_state.file_cache[file_key]
|
105 |
|
106 |
+
# Inform the user that the file is processed and Display the PDF uploaded
|
107 |
st.success("Ready to Chat!")
|
108 |
display_excel(uploaded_file)
|
109 |
+
|
110 |
except Exception as e:
|
111 |
st.error(f"An error occurred: {e}")
|
112 |
st.stop()
|
|
|
114 |
col1, col2 = st.columns([6, 1])
|
115 |
|
116 |
with col1:
|
117 |
+
st.header(f"RAG over Excel using Dockling 🐥 & Llama-3.3 70B")
|
118 |
|
119 |
with col2:
|
120 |
st.button("Clear ↺", on_click=reset_chat)
|
|
|
123 |
if "messages" not in st.session_state:
|
124 |
reset_chat()
|
125 |
|
|
|
|
|
|
|
126 |
# Display chat messages from history on app rerun
|
127 |
for message in st.session_state.messages:
|
128 |
with st.chat_message(message["role"]):
|
|
|
141 |
message_placeholder = st.empty()
|
142 |
full_response = ""
|
143 |
|
144 |
+
# Using Cerebras stream_chat for streaming response
|
145 |
+
messages = [
|
146 |
+
ChatMessage(role="user", content=prompt)
|
147 |
+
]
|
148 |
|
149 |
+
response = llm.stream_chat(messages)
|
150 |
+
for r in response:
|
151 |
+
full_response += r.delta
|
|
|
152 |
message_placeholder.markdown(full_response + "▌")
|
153 |
|
154 |
message_placeholder.markdown(full_response)
|