amiguel commited on
Commit
8b7373b
Β·
verified Β·
1 Parent(s): 16ad5dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -2,6 +2,7 @@ import streamlit as st
2
  import torch
3
  import os
4
  import tempfile
 
5
  from threading import Thread
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
7
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
@@ -21,12 +22,13 @@ HF_TOKEN = st.secrets["HF_TOKEN"]
21
 
22
  # --- Page Setup ---
23
  st.set_page_config(page_title="Hybrid RAG Chat", page_icon="πŸ€–", layout="centered")
24
- st.title("πŸ€– DigiTwin - Hybrid Search + Streaming")
25
 
26
  # --- Sidebar Upload ---
27
  with st.sidebar:
28
  st.header("πŸ“€ Upload Documents")
29
  uploaded_files = st.file_uploader("PDFs or .txt files only", type=["pdf", "txt"], accept_multiple_files=True)
 
30
  clear_chat = st.button("🧹 Clear Conversation")
31
 
32
  # --- Chat Memory ---
@@ -36,7 +38,7 @@ if "messages" not in st.session_state or clear_chat:
36
  # --- Load LLM ---
37
  @st.cache_resource
38
  def load_model():
39
- model_id = "amiguel/GM_Qwen1.8B_Finetune"
40
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
41
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
42
  return tokenizer, model
@@ -73,9 +75,7 @@ def build_prompt(history, context=""):
73
  for msg in history:
74
  role = "User" if msg["role"] == "user" else "Assistant"
75
  dialog += f"{role}: {msg['content']}\n"
76
- return f"""You are DigiTwin, a highly professional and experienced assistant in inspection, integrity, and maintenance of topside equipment, piping systems, pressure vessels, structures, and safety systems.
77
-
78
- Use the following context to provide expert-level answers.
79
 
80
  Context:
81
  {context}
@@ -84,10 +84,10 @@ Context:
84
  Assistant:"""
85
 
86
  # --- Response Generator ---
87
- def generate_response(prompt):
88
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
89
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
90
- Thread(target=model.generate, kwargs={**inputs, "streamer": streamer, "max_new_tokens": 300}).start()
91
  output = ""
92
  for token in streamer:
93
  output += token
@@ -113,17 +113,39 @@ if query := st.chat_input("Ask DigiTwin anything..."):
113
  st.session_state.messages.append({"role": "user", "content": query})
114
 
115
  context = ""
 
116
  if retriever:
117
- docs = retriever.get_relevant_documents(query)
118
- context = "\n\n".join([doc.page_content for doc in docs])
119
 
120
  full_prompt = build_prompt(st.session_state.messages, context)
121
 
122
  with st.chat_message("assistant", avatar=BOT_AVATAR):
 
123
  container = st.empty()
124
  answer = ""
125
- for chunk in generate_response(full_prompt):
 
126
  answer = chunk
127
  container.markdown(answer + "β–Œ", unsafe_allow_html=True)
128
  container.markdown(answer)
 
 
 
 
 
 
129
  st.session_state.messages.append({"role": "assistant", "content": answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  import os
4
  import tempfile
5
+ import time
6
  from threading import Thread
7
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
8
  from langchain_community.document_loaders import PyPDFLoader, TextLoader
 
22
 
23
  # --- Page Setup ---
24
  st.set_page_config(page_title="Hybrid RAG Chat", page_icon="πŸ€–", layout="centered")
25
+ st.title("πŸ€– DigiTwin Streaming")
26
 
27
  # --- Sidebar Upload ---
28
  with st.sidebar:
29
  st.header("πŸ“€ Upload Documents")
30
  uploaded_files = st.file_uploader("PDFs or .txt files only", type=["pdf", "txt"], accept_multiple_files=True)
31
+ max_tokens = st.slider("🧠 Max Response Tokens", 100, 2048, 512, step=50)
32
  clear_chat = st.button("🧹 Clear Conversation")
33
 
34
  # --- Chat Memory ---
 
38
  # --- Load LLM ---
39
  @st.cache_resource
40
  def load_model():
41
+ model_id = "tiiuae/falcon-7b-instruct"
42
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
43
  model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
44
  return tokenizer, model
 
75
  for msg in history:
76
  role = "User" if msg["role"] == "user" else "Assistant"
77
  dialog += f"{role}: {msg['content']}\n"
78
+ return f"""You are DigiTwin, a highly professional and experienced assistant in inspection, integrity, and maintenance of topside equipment, piping systems, pressure vessels, structures, and safety systems. Use the following context to provide expert-level answers.
 
 
79
 
80
  Context:
81
  {context}
 
84
  Assistant:"""
85
 
86
  # --- Response Generator ---
87
+ def generate_response(prompt, max_tokens):
88
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
89
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
90
+ Thread(target=model.generate, kwargs={**inputs, "streamer": streamer, "max_new_tokens": max_tokens}).start()
91
  output = ""
92
  for token in streamer:
93
  output += token
 
113
  st.session_state.messages.append({"role": "user", "content": query})
114
 
115
  context = ""
116
+ matched_chunks = []
117
  if retriever:
118
+ matched_chunks = retriever.get_relevant_documents(query)
119
+ context = "\n\n".join([doc.page_content for doc in matched_chunks])
120
 
121
  full_prompt = build_prompt(st.session_state.messages, context)
122
 
123
  with st.chat_message("assistant", avatar=BOT_AVATAR):
124
+ start_time = time.time()
125
  container = st.empty()
126
  answer = ""
127
+
128
+ for chunk in generate_response(full_prompt, max_tokens):
129
  answer = chunk
130
  container.markdown(answer + "β–Œ", unsafe_allow_html=True)
131
  container.markdown(answer)
132
+
133
+ end_time = time.time()
134
+ input_tokens = len(tokenizer(full_prompt)["input_ids"])
135
+ output_tokens = len(tokenizer(answer)["input_ids"])
136
+ speed = output_tokens / (end_time - start_time)
137
+
138
  st.session_state.messages.append({"role": "assistant", "content": answer})
139
+
140
+ # RAG Debug Info
141
+ with st.expander("πŸ“Š Response Stats & RAG Debug"):
142
+ st.caption(
143
+ f"πŸ”‘ Input Tokens: {input_tokens} | Output Tokens: {output_tokens} | "
144
+ f"πŸ•’ Speed: {speed:.1f} tokens/sec"
145
+ )
146
+ for i, doc in enumerate(matched_chunks):
147
+ score = getattr(doc, "score", None)
148
+ metadata = doc.metadata if hasattr(doc, "metadata") else {}
149
+ st.markdown(f"**Chunk #{i+1}**")
150
+ st.code(doc.page_content.strip()[:500])
151
+ st.text(f"πŸ” Similarity Score: {score if score else 'N/A'} | Metadata: {metadata}")