Spaces:
Running
Running
Rohit Rajpoot
commited on
Commit
·
1a6f50a
1
Parent(s):
36e9004
Add RAG over training.txt for DeepSeek
Browse files- app.py +52 -25
- requirements.txt +2 -1
app.py
CHANGED
@@ -7,6 +7,9 @@ from assist.transformer_demo import transformer_next
|
|
7 |
|
8 |
# DeepSeek imports
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
|
|
|
|
|
|
|
10 |
|
11 |
st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
|
12 |
st.title("🤖 RepoSage Unified Demo")
|
@@ -21,46 +24,70 @@ def load_deepseek():
|
|
21 |
|
22 |
deepseek_gen = load_deepseek()
|
23 |
|
24 |
-
#
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
#
|
28 |
-
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
|
|
35 |
with col1:
|
36 |
if st.button("DeepSeek-R1 Math Demo"):
|
37 |
-
if not
|
38 |
st.warning("Please enter a prompt first.")
|
39 |
else:
|
40 |
-
|
41 |
-
prompt = f"{math_prefix}\n\nf(x) = {question}\n\nSolution:\n"
|
42 |
-
# 2) Call the model deterministically
|
43 |
with st.spinner("Working it out…"):
|
44 |
-
out = deepseek_gen(
|
45 |
-
prompt,
|
46 |
-
max_new_tokens=80,
|
47 |
-
do_sample=False, # no random sampling
|
48 |
-
temperature=0.0 # fully deterministic
|
49 |
-
)
|
50 |
-
# 3) Display the clean, step-by-step answer
|
51 |
st.code(out[0]["generated_text"], language="text")
|
52 |
|
|
|
53 |
with col2:
|
54 |
-
if st.button("
|
55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
|
|
57 |
with col3:
|
58 |
-
if st.button("
|
59 |
-
st.write(
|
60 |
|
|
|
61 |
with col4:
|
|
|
|
|
|
|
|
|
|
|
62 |
if st.button("Transformer Demo"):
|
63 |
-
st.write(transformer_next(
|
64 |
|
65 |
st.markdown("---")
|
66 |
-
st.caption("DeepSeek-R1, Embedding, Bayesian & Transformer demos all in one place ✅")
|
|
|
7 |
|
8 |
# DeepSeek imports
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
|
10 |
+
# Retrieval imports
|
11 |
+
from sentence_transformers import SentenceTransformer
|
12 |
+
import torch
|
13 |
|
14 |
st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
|
15 |
st.title("🤖 RepoSage Unified Demo")
|
|
|
24 |
|
25 |
deepseek_gen = load_deepseek()
|
26 |
|
27 |
+
# Cache and load training corpus passages
|
28 |
+
@st.cache_data
|
29 |
+
def load_passages(path="training.txt"):
|
30 |
+
text = open(path, encoding="utf8").read()
|
31 |
+
paras = [p.strip() for p in text.split("\n\n") if p.strip()]
|
32 |
+
return paras
|
33 |
+
|
34 |
+
# Cache and embed passages
|
35 |
+
@st.cache_resource
|
36 |
+
def embed_passages(passages):
|
37 |
+
encoder = SentenceTransformer("all-MiniLM-L6-v2")
|
38 |
+
embeddings = encoder.encode(passages, convert_to_tensor=True)
|
39 |
+
return encoder, passages, embeddings
|
40 |
|
41 |
+
# Prepare RAG resources
|
42 |
+
_passages = load_passages()
|
43 |
+
_encoder, passages, passage_embs = embed_passages(_passages)
|
44 |
|
45 |
+
# User input
|
46 |
+
title = st.text_input("Enter your question or prompt below:")
|
47 |
+
|
48 |
+
# Define columns for five demos
|
49 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
50 |
|
51 |
+
# Math demo in col1
|
52 |
with col1:
|
53 |
if st.button("DeepSeek-R1 Math Demo"):
|
54 |
+
if not title.strip():
|
55 |
st.warning("Please enter a prompt first.")
|
56 |
else:
|
57 |
+
prompt = f"You are an expert math tutor. Compute the derivative of f(x) = {title} step by step using the product rule. Solution:\n"
|
|
|
|
|
58 |
with st.spinner("Working it out…"):
|
59 |
+
out = deepseek_gen(prompt, max_new_tokens=80, do_sample=False, temperature=0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
st.code(out[0]["generated_text"], language="text")
|
61 |
|
62 |
+
# RAG-augmented demo in col2
|
63 |
with col2:
|
64 |
+
if st.button("DeepSeek-R1 RAG Demo"):
|
65 |
+
if not title.strip():
|
66 |
+
st.warning("Please enter a question first.")
|
67 |
+
else:
|
68 |
+
q_emb = _encoder.encode(title, convert_to_tensor=True)
|
69 |
+
sims = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), passage_embs)
|
70 |
+
topk = torch.topk(sims, k=min(3, len(passages))).indices.tolist()
|
71 |
+
context = "\n\n".join(passages[i] for i in topk)
|
72 |
+
prompt = f"Use these notes to answer the question:\n\n{context}\n\nQ: {title}\nA:"
|
73 |
+
with st.spinner("Retrieving & generating…"):
|
74 |
+
out = deepseek_gen(prompt, max_new_tokens=100, do_sample=False)
|
75 |
+
st.write(out[0]["generated_text"])
|
76 |
|
77 |
+
# Embedding Q&A in col3
|
78 |
with col3:
|
79 |
+
if st.button("Embedding Q&A"):
|
80 |
+
st.write(embed_chat(title))
|
81 |
|
82 |
+
# Bayesian Q&A in col4
|
83 |
with col4:
|
84 |
+
if st.button("Bayesian Q&A"):
|
85 |
+
st.write(bayes_chat(title))
|
86 |
+
|
87 |
+
# Transformer Demo in col5
|
88 |
+
with col5:
|
89 |
if st.button("Transformer Demo"):
|
90 |
+
st.write(transformer_next(title))
|
91 |
|
92 |
st.markdown("---")
|
93 |
+
st.caption("DeepSeek-R1 Math, RAG, Embedding, Bayesian & Transformer demos all in one place ✅")
|
requirements.txt
CHANGED
@@ -5,4 +5,5 @@ streamlit==1.46.0
|
|
5 |
typer==0.16.0
|
6 |
rich==14.0.0
|
7 |
torch==2.7.1
|
8 |
-
transformers
|
|
|
|
5 |
typer==0.16.0
|
6 |
rich==14.0.0
|
7 |
torch==2.7.1
|
8 |
+
transformers
|
9 |
+
sentence-transformers
|