Rohit Rajpoot commited on
Commit
1a6f50a
·
1 Parent(s): 36e9004

Add RAG over training.txt for DeepSeek

Browse files
Files changed (2) hide show
  1. app.py +52 -25
  2. requirements.txt +2 -1
app.py CHANGED
@@ -7,6 +7,9 @@ from assist.transformer_demo import transformer_next
7
 
8
  # DeepSeek imports
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
 
 
 
10
 
11
  st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
12
  st.title("🤖 RepoSage Unified Demo")
@@ -21,46 +24,70 @@ def load_deepseek():
21
 
22
  deepseek_gen = load_deepseek()
23
 
24
- # User input
25
- question = st.text_input("Enter your question or prompt below:")
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Four buttons side by side, with DeepSeek first
28
- col1, col2, col3, col4 = st.columns(4)
 
29
 
30
- math_prefix = (
31
- "You are an expert math tutor. Compute the derivative of f(x) = x^2·sin(x) "
32
- "step by step using the product rule. Show each line of work."
33
- )
 
34
 
 
35
  with col1:
36
  if st.button("DeepSeek-R1 Math Demo"):
37
- if not question.strip():
38
  st.warning("Please enter a prompt first.")
39
  else:
40
- # 1) Build the full math prompt
41
- prompt = f"{math_prefix}\n\nf(x) = {question}\n\nSolution:\n"
42
- # 2) Call the model deterministically
43
  with st.spinner("Working it out…"):
44
- out = deepseek_gen(
45
- prompt,
46
- max_new_tokens=80,
47
- do_sample=False, # no random sampling
48
- temperature=0.0 # fully deterministic
49
- )
50
- # 3) Display the clean, step-by-step answer
51
  st.code(out[0]["generated_text"], language="text")
52
 
 
53
  with col2:
54
- if st.button("Embedding Q&A"):
55
- st.write(embed_chat(question))
 
 
 
 
 
 
 
 
 
 
56
 
 
57
  with col3:
58
- if st.button("Bayesian Q&A"):
59
- st.write(bayes_chat(question))
60
 
 
61
  with col4:
 
 
 
 
 
62
  if st.button("Transformer Demo"):
63
- st.write(transformer_next(question))
64
 
65
  st.markdown("---")
66
- st.caption("DeepSeek-R1, Embedding, Bayesian & Transformer demos all in one place ✅")
 
7
 
8
  # DeepSeek imports
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextGenerationPipeline
10
+ # Retrieval imports
11
+ from sentence_transformers import SentenceTransformer
12
+ import torch
13
 
14
  st.set_page_config(page_title="RepoSage All-in-One Demo", layout="wide")
15
  st.title("🤖 RepoSage Unified Demo")
 
24
 
25
  deepseek_gen = load_deepseek()
26
 
27
+ # Cache and load training corpus passages
28
+ @st.cache_data
29
+ def load_passages(path="training.txt"):
30
+ text = open(path, encoding="utf8").read()
31
+ paras = [p.strip() for p in text.split("\n\n") if p.strip()]
32
+ return paras
33
+
34
+ # Cache and embed passages
35
+ @st.cache_resource
36
+ def embed_passages(passages):
37
+ encoder = SentenceTransformer("all-MiniLM-L6-v2")
38
+ embeddings = encoder.encode(passages, convert_to_tensor=True)
39
+ return encoder, passages, embeddings
40
 
41
+ # Prepare RAG resources
42
+ _passages = load_passages()
43
+ _encoder, passages, passage_embs = embed_passages(_passages)
44
 
45
+ # User input
46
+ title = st.text_input("Enter your question or prompt below:")
47
+
48
+ # Define columns for five demos
49
+ col1, col2, col3, col4, col5 = st.columns(5)
50
 
51
+ # Math demo in col1
52
  with col1:
53
  if st.button("DeepSeek-R1 Math Demo"):
54
+ if not title.strip():
55
  st.warning("Please enter a prompt first.")
56
  else:
57
+ prompt = f"You are an expert math tutor. Compute the derivative of f(x) = {title} step by step using the product rule. Solution:\n"
 
 
58
  with st.spinner("Working it out…"):
59
+ out = deepseek_gen(prompt, max_new_tokens=80, do_sample=False, temperature=0.0)
 
 
 
 
 
 
60
  st.code(out[0]["generated_text"], language="text")
61
 
62
+ # RAG-augmented demo in col2
63
  with col2:
64
+ if st.button("DeepSeek-R1 RAG Demo"):
65
+ if not title.strip():
66
+ st.warning("Please enter a question first.")
67
+ else:
68
+ q_emb = _encoder.encode(title, convert_to_tensor=True)
69
+ sims = torch.nn.functional.cosine_similarity(q_emb.unsqueeze(0), passage_embs)
70
+ topk = torch.topk(sims, k=min(3, len(passages))).indices.tolist()
71
+ context = "\n\n".join(passages[i] for i in topk)
72
+ prompt = f"Use these notes to answer the question:\n\n{context}\n\nQ: {title}\nA:"
73
+ with st.spinner("Retrieving & generating…"):
74
+ out = deepseek_gen(prompt, max_new_tokens=100, do_sample=False)
75
+ st.write(out[0]["generated_text"])
76
 
77
+ # Embedding Q&A in col3
78
  with col3:
79
+ if st.button("Embedding Q&A"):
80
+ st.write(embed_chat(title))
81
 
82
+ # Bayesian Q&A in col4
83
  with col4:
84
+ if st.button("Bayesian Q&A"):
85
+ st.write(bayes_chat(title))
86
+
87
+ # Transformer Demo in col5
88
+ with col5:
89
  if st.button("Transformer Demo"):
90
+ st.write(transformer_next(title))
91
 
92
  st.markdown("---")
93
+ st.caption("DeepSeek-R1 Math, RAG, Embedding, Bayesian & Transformer demos all in one place ✅")
requirements.txt CHANGED
@@ -5,4 +5,5 @@ streamlit==1.46.0
5
  typer==0.16.0
6
  rich==14.0.0
7
  torch==2.7.1
8
- transformers
 
 
5
  typer==0.16.0
6
  rich==14.0.0
7
  torch==2.7.1
8
+ transformers
9
+ sentence-transformers