Update prompt and streaming
Browse files
app.py
CHANGED
@@ -2,7 +2,8 @@ import os
|
|
2 |
import streamlit as st
|
3 |
from together import Together
|
4 |
from langchain_community.vectorstores import Chroma
|
5 |
-
|
|
|
6 |
|
7 |
# --- Configuration ---
|
8 |
# TogetherAI API key (env var name pilotikval)
|
@@ -14,9 +15,9 @@ if not TOGETHER_API_KEY:
|
|
14 |
# Initialize TogetherAI client
|
15 |
client = Together(api_key=TOGETHER_API_KEY)
|
16 |
|
17 |
-
# Embeddings setup
|
18 |
EMBED_MODEL_NAME = "BAAI/bge-base-en"
|
19 |
-
embeddings =
|
20 |
model_name=EMBED_MODEL_NAME,
|
21 |
encode_kwargs={"normalize_embeddings": True},
|
22 |
)
|
@@ -54,17 +55,33 @@ vectorstore = Chroma(
|
|
54 |
)
|
55 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) # k=20
|
56 |
|
57 |
-
# System prompt template
|
58 |
def build_system(context: str) -> dict:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
st.title("🩺 DocChatter RAG (Streaming & Memory)")
|
70 |
|
@@ -83,27 +100,29 @@ with chat_tab:
|
|
83 |
for msg in st.session_state.chat_history:
|
84 |
st.chat_message(msg['role']).write(msg['content'])
|
85 |
|
86 |
-
#
|
87 |
if user_prompt:
|
88 |
# Echo user
|
89 |
st.chat_message("user").write(user_prompt)
|
90 |
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
|
91 |
|
92 |
-
# Retrieve top-k
|
93 |
-
|
|
|
|
|
|
|
94 |
context = "\n---\n".join([d.page_content for d in docs])
|
95 |
|
96 |
-
# Build message sequence
|
97 |
messages = [build_system(context)]
|
98 |
for m in st.session_state.chat_history:
|
99 |
messages.append(m)
|
100 |
|
101 |
-
#
|
102 |
response_container = st.chat_message("assistant")
|
103 |
stream_placeholder = response_container.empty()
|
104 |
answer = ""
|
105 |
|
106 |
-
# Stream tokens
|
107 |
for token in client.chat.completions.create(
|
108 |
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
109 |
messages=messages,
|
@@ -111,10 +130,15 @@ with chat_tab:
|
|
111 |
temperature=0.1,
|
112 |
stream=True
|
113 |
):
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# Save assistant response
|
120 |
st.session_state.chat_history.append({"role": "assistant", "content": answer})
|
|
|
2 |
import streamlit as st
|
3 |
from together import Together
|
4 |
from langchain_community.vectorstores import Chroma
|
5 |
+
# Use the updated HuggingFace Embeddings class
|
6 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
7 |
|
8 |
# --- Configuration ---
|
9 |
# TogetherAI API key (env var name pilotikval)
|
|
|
15 |
# Initialize TogetherAI client
|
16 |
client = Together(api_key=TOGETHER_API_KEY)
|
17 |
|
18 |
+
# Embeddings setup (new huggingface integration)
|
19 |
EMBED_MODEL_NAME = "BAAI/bge-base-en"
|
20 |
+
embeddings = HuggingFaceEmbeddings(
|
21 |
model_name=EMBED_MODEL_NAME,
|
22 |
encode_kwargs={"normalize_embeddings": True},
|
23 |
)
|
|
|
55 |
)
|
56 |
retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) # k=20
|
57 |
|
58 |
+
# System prompt template for long, detailed answers
|
59 |
def build_system(context: str) -> dict:
|
60 |
+
"""
|
61 |
+
Build the system prompt that instructs the model to:
|
62 |
+
- Act as an expert medical assistant and attentive listener.
|
63 |
+
- Leverage all retrieved context to craft detailed, accurate, and empathetic responses.
|
64 |
+
- Ask clarifying follow-up questions if the user’s query is ambiguous.
|
65 |
+
- Structure answers clearly, using headings, bullet points, and step-by-step explanations.
|
66 |
+
- Cite relevant context sections when appropriate.
|
67 |
+
- Maintain conversational memory so follow-up queries build upon prior discussion.
|
68 |
+
"""
|
69 |
+
prompt = (
|
70 |
+
"You are a world-class medical assistant and conversational partner. "
|
71 |
+
"Listen carefully to the user’s questions, reference the context below, and provide a thorough, evidence-based response. "
|
72 |
+
"If any part of the question is unclear, ask a clarifying question before proceeding. "
|
73 |
+
"Organize your answer with clear headings or bullet points, and refer back to specific context snippets as needed. "
|
74 |
+
"Always be empathetic, concise, and precise in your medical explanations. "
|
75 |
+
"Retain memory of previous user messages to support follow-up interactions.
|
76 |
+
|
77 |
+
"
|
78 |
+
"=== Retrieved Context Start ===
|
79 |
+
"
|
80 |
+
+ context +
|
81 |
+
"
|
82 |
+
=== Retrieved Context End ==="
|
83 |
+
)
|
84 |
+
return {"role": "system", "content": prompt}
|
85 |
|
86 |
st.title("🩺 DocChatter RAG (Streaming & Memory)")
|
87 |
|
|
|
100 |
for msg in st.session_state.chat_history:
|
101 |
st.chat_message(msg['role']).write(msg['content'])
|
102 |
|
103 |
+
# Handle new user input
|
104 |
if user_prompt:
|
105 |
# Echo user
|
106 |
st.chat_message("user").write(user_prompt)
|
107 |
st.session_state.chat_history.append({"role": "user", "content": user_prompt})
|
108 |
|
109 |
+
# Retrieve top-k documents
|
110 |
+
try:
|
111 |
+
docs = retriever.invoke({"query": user_prompt})
|
112 |
+
except Exception:
|
113 |
+
docs = retriever.get_relevant_documents(user_prompt)
|
114 |
context = "\n---\n".join([d.page_content for d in docs])
|
115 |
|
116 |
+
# Build TogetherAI message sequence
|
117 |
messages = [build_system(context)]
|
118 |
for m in st.session_state.chat_history:
|
119 |
messages.append(m)
|
120 |
|
121 |
+
# Stream assistant response
|
122 |
response_container = st.chat_message("assistant")
|
123 |
stream_placeholder = response_container.empty()
|
124 |
answer = ""
|
125 |
|
|
|
126 |
for token in client.chat.completions.create(
|
127 |
model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
128 |
messages=messages,
|
|
|
130 |
temperature=0.1,
|
131 |
stream=True
|
132 |
):
|
133 |
+
try:
|
134 |
+
choice = token.choices[0]
|
135 |
+
delta = getattr(choice.delta, 'content', '')
|
136 |
+
if delta:
|
137 |
+
answer += delta
|
138 |
+
stream_placeholder.write(answer)
|
139 |
+
except (IndexError, AttributeError):
|
140 |
+
# Skip empty or malformed token
|
141 |
+
continue
|
142 |
|
143 |
# Save assistant response
|
144 |
st.session_state.chat_history.append({"role": "assistant", "content": answer})
|