Spaces:

rohitrajpoot
/

reposage-chatbot

Sleeping

App Files Files Community

Rohit Rajpoot commited on 24 days ago

Commit

1a6f50a

1 Parent(s): 36e9004

Add RAG over training.txt for DeepSeek

Browse files

Files changed (2) hide show

app.py +52 -25
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -7,6 +7,9 @@ from assist.transformer_demo import transformer_next
 # DeepSeek imports
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
 st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
 st.title("🤖 RepoSage Unified Demo")
@@ -21,46 +24,70 @@ def load_deepseek():
 deepseek_gen = load_deepseek()
-# User input
-question = st.text_input("Enter your question or prompt below:")
-# Four buttons side by side, with DeepSeek first
-col1, col2, col3, col4 = st.columns(4)
-math_prefix = (
-    "You are an expert math tutor.  Compute the derivative of f(x) = x^2·sin(x) "
-    "step by step using the product rule.  Show each line of work."
-)
 with col1:
     if st.button("DeepSeek-R1 Math Demo"):
-        if not question.strip():
             st.warning("Please enter a prompt first.")
         else:
-            # 1) Build the full math prompt
-            prompt = f"{math_prefix}\n\nf(x) = {question}\n\nSolution:\n"
-            # 2) Call the model deterministically
             with st.spinner("Working it out…"):
-                out = deepseek_gen(
-                    prompt,
-                    max_new_tokens=80,
-                    do_sample=False,      # no random sampling
-                    temperature=0.0       # fully deterministic
-                )
-            # 3) Display the clean, step-by-step answer
             st.code(out[0]["generated_text"], language="text")
 with col2:
-    if st.button("Embedding Q&A"):
-        st.write(embed_chat(question))
 with col3:
-    if st.button("Bayesian Q&A"):
-        st.write(bayes_chat(question))
 with col4:
     if st.button("Transformer Demo"):
-        st.write(transformer_next(question))
 st.markdown("---")
-st.caption("DeepSeek-R1, Embedding, Bayesian & Transformer demos all in one place ✅")

 # DeepSeek imports
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
+# Retrieval imports
+from sentence_transformers import SentenceTransformer
+import torch
 st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
 st.title("🤖 RepoSage Unified Demo")
 deepseek_gen = load_deepseek()
+# Cache and load training corpus passages
+@st.cache_data
+def load_passages(path="training.txt"):
+    text = open(path, encoding="utf8").read()
+    paras = [p.strip() for p in text.split("\n\n") if p.strip()]
+    return paras
+# Cache and embed passages
+@st.cache_resource
+def embed_passages(passages):
+    encoder = SentenceTransformer("all-MiniLM-L6-v2")
+    embeddings = encoder.encode(passages, convert_to_tensor=True)
+    return encoder, passages, embeddings
+# Prepare RAG resources
+_passages = load_passages()
+_encoder, passages, passage_embs = embed_passages(_passages)
+# User input
+title = st.text_input("Enter your question or prompt below:")
+# Define columns for five demos
+col1, col2, col3, col4, col5 = st.columns(5)
+# Math demo in col1
 with col1:
     if st.button("DeepSeek-R1 Math Demo"):
+        if not title.strip():
             st.warning("Please enter a prompt first.")
         else:
+            prompt = f"You are an expert math tutor. Compute the derivative of f(x) = {title} step by step using the product rule. Solution:\n"
             with st.spinner("Working it out…"):
+                out = deepseek_gen(prompt, max_new_tokens=80, do_sample=False, temperature=0.0)
             st.code(out[0]["generated_text"], language="text")
+# RAG-augmented demo in col2
 with col2:
+    if st.button("DeepSeek-R1 RAG Demo"):
+        if not title.strip():
+            st.warning("Please enter a question first.")
+        else:
+            q_emb = _encoder.encode(title, convert_to_tensor=True)
+            sims = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), passage_embs)
+            topk = torch.topk(sims, k=min(3, len(passages))).indices.tolist()
+            context = "\n\n".join(passages[i] for i in topk)
+            prompt = f"Use these notes to answer the question:\n\n{context}\n\nQ: {title}\nA:"
+            with st.spinner("Retrieving & generating…"):
+                out = deepseek_gen(prompt, max_new_tokens=100, do_sample=False)
+            st.write(out[0]["generated_text"])
+# Embedding Q&A in col3
 with col3:
+    if st.button("Embedding Q&A"):
+        st.write(embed_chat(title))
+# Bayesian Q&A in col4
 with col4:
+    if st.button("Bayesian Q&A"):
+        st.write(bayes_chat(title))
+# Transformer Demo in col5
+with col5:
     if st.button("Transformer Demo"):
+        st.write(transformer_next(title))
 st.markdown("---")
+st.caption("DeepSeek-R1 Math, RAG, Embedding, Bayesian & Transformer demos all in one place ✅")

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ streamlit==1.46.0
 typer==0.16.0
 rich==14.0.0
 torch==2.7.1
-transformers

 typer==0.16.0
 rich==14.0.0
 torch==2.7.1
+transformers
+sentence-transformers