DHEIVER commited on
Commit
1ac9fe7
·
verified ·
1 Parent(s): 5f00ba7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -308
app.py CHANGED
@@ -9,15 +9,13 @@ from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain_community.llms import HuggingFaceEndpoint
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_community.retrievers import BM25Retriever
12
- from langchain.retrievers import EnsembleRetriever # Try the original import (it might be in langchain.retrievers)
13
-
14
- #from langchain.chains.query_constructor.base import AttributeInfo # Removed deprecated code
15
- #from langchain.chains import create_query_chain # Removed deprecated code
16
- #from langchain.retrievers.self_query.base import SelfQueryRetriever # Removed deprecated code
17
- #from langchain.chains.query_constructor.schema import FieldInfo # Removed deprecated code
18
  from langchain.retrievers.multi_query import MultiQueryRetriever
19
 
 
20
  api_token = os.getenv("FirstToken")
 
 
21
 
22
  # Available LLM models
23
  list_llm = [
@@ -30,21 +28,22 @@ list_llm_simple = [os.path.basename(llm) for llm in list_llm]
30
  # -----------------------------------------------------------------------------
31
  # Document Loading and Splitting
32
  # -----------------------------------------------------------------------------
33
- def load_doc(list_file_path):
34
  """Load and split PDF documents into chunks."""
 
 
 
35
  loaders = [PyPDFLoader(x) for x in list_file_path]
36
  pages = []
37
- for loader in loaders:
 
38
  pages.extend(loader.load())
39
- text_splitter = RecursiveCharacterTextSplitter(
40
- chunk_size=1024,
41
- chunk_overlap=64
42
- )
43
- doc_splits = text_splitter.split_documents(pages)
44
- return doc_splits
45
 
46
  # -----------------------------------------------------------------------------
47
- # Vector Database Creation (ChromaDB and FAISS)
48
  # -----------------------------------------------------------------------------
49
  def create_chromadb(splits, persist_directory="chroma_db"):
50
  """Create ChromaDB vector database from document splits."""
@@ -54,378 +53,191 @@ def create_chromadb(splits, persist_directory="chroma_db"):
54
  embedding=embeddings,
55
  persist_directory=persist_directory
56
  )
57
- chromadb.persist() # Ensure data is written to disk
58
  return chromadb
59
 
60
  def create_faissdb(splits):
61
  """Create FAISS vector database from document splits."""
62
  embeddings = HuggingFaceEmbeddings()
63
- faissdb = FAISS.from_documents(splits, embeddings)
64
- return faissdb
65
 
66
  # -----------------------------------------------------------------------------
67
- # BM25 Retriever
68
  # -----------------------------------------------------------------------------
69
  def create_bm25_retriever(splits):
70
  """Create BM25 retriever from document splits."""
71
- bm25_retriever = BM25Retriever.from_documents(splits)
72
- bm25_retriever.k = 3 # Number of documents to retrieve
73
- return bm25_retriever
74
-
75
- # -----------------------------------------------------------------------------
76
- # MultiQueryRetriever
77
- # -----------------------------------------------------------------------------
78
- def create_multi_query_retriever(llm, vector_db, num_queries=3):
79
- """
80
- Create a MultiQueryRetriever.
81
-
82
- Args:
83
- llm: The language model to use for query generation.
84
- vector_db: The vector database to retrieve from.
85
- num_queries: The number of diverse queries to generate.
86
-
87
- Returns:
88
- A MultiQueryRetriever instance.
89
- """
90
- retriever = MultiQueryRetriever.from_llm(
91
- llm=llm, retriever=vector_db.as_retriever(),
92
- output_key="answer",
93
- memory_key="chat_history",
94
- return_messages=True,
95
- verbose=False
96
- )
97
  return retriever
98
 
99
- # -----------------------------------------------------------------------------
100
- # Ensemble Retriever (Combine VectorDB and BM25)
101
- # -----------------------------------------------------------------------------
102
  def create_ensemble_retriever(vector_db, bm25_retriever):
103
- """Create an ensemble retriever combining ChromaDB and BM25."""
104
- ensemble_retriever = EnsembleRetriever(
105
  retrievers=[vector_db.as_retriever(), bm25_retriever],
106
- weights=[0.7, 0.3] # Adjust weights as needed
107
  )
108
- return ensemble_retriever
109
 
110
  # -----------------------------------------------------------------------------
111
  # Initialize Database
112
  # -----------------------------------------------------------------------------
113
  def initialize_database(list_file_obj, progress=gr.Progress()):
114
- """Initialize the document database."""
115
- list_file_path = [x.name for x in list_file_obj if x is not None]
116
- doc_splits = load_doc(list_file_path)
117
-
118
- # Create vector databases and retrievers
119
- chromadb = create_chromadb(doc_splits)
120
- bm25_retriever = create_bm25_retriever(doc_splits)
121
-
122
- # Create ensemble retriever
123
- ensemble_retriever = create_ensemble_retriever(chromadb, bm25_retriever)
124
-
125
- return ensemble_retriever, "Database created successfully!"
126
 
127
  # -----------------------------------------------------------------------------
128
  # Initialize LLM Chain
129
  # -----------------------------------------------------------------------------
130
- def initialize_llmchain(llm_model, temperature, max_tokens, top_k, retriever, progress=gr.Progress()):
131
- """Initialize the language model chain."""
132
- llm = HuggingFaceEndpoint(
133
- repo_id=llm_model,
134
- huggingfacehub_api_token=api_token,
135
- temperature=temperature,
136
- max_new_tokens=max_tokens,
137
- top_k=top_k,
138
- task="text-generation"
139
- )
140
-
141
- memory = ConversationBufferMemory(
142
- memory_key="chat_history",
143
- output_key='answer',
144
- return_messages=True
145
- )
146
-
147
- qa_chain = ConversationalRetrievalChain.from_llm(
148
- llm,
149
- retriever=retriever,
150
- chain_type="stuff",
151
- memory=memory,
152
- return_source_documents=True,
153
- verbose=False,
154
- )
155
- return qa_chain
 
156
 
157
  # -----------------------------------------------------------------------------
158
  # Initialize LLM
159
  # -----------------------------------------------------------------------------
160
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, retriever, progress=gr.Progress()):
161
  """Initialize the Language Model."""
162
- llm_name = list_llm[llm_option]
163
- print("Selected LLM model:", llm_name)
164
- qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, retriever, progress)
165
- return qa_chain, "Analysis Assistant initialized and ready!"
 
 
 
166
 
167
  # -----------------------------------------------------------------------------
168
  # Chat History Formatting
169
  # -----------------------------------------------------------------------------
170
  def format_chat_history(message, chat_history):
171
  """Format chat history for the model."""
172
- formatted_chat_history = []
173
- for user_message, bot_message in chat_history:
174
- formatted_chat_history.append(f"User: {user_message}")
175
- formatted_chat_history.append(f"Assistant: {bot_message}")
176
- return formatted_chat_history
177
 
178
  # -----------------------------------------------------------------------------
179
  # Conversation Function
180
  # -----------------------------------------------------------------------------
181
  def conversation(qa_chain, message, history, lang):
182
  """Handle conversation and document analysis."""
183
-
184
- # Add language instruction to the message
185
- if lang == "pt":
186
- message += " (Responda em Português)"
187
- else:
188
- message += " (Respond in English)"
189
-
190
- formatted_chat_history = format_chat_history(message, history)
191
- response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
192
- response_answer = response["answer"]
193
-
194
- # Remove the language instruction from the chat history
195
- if "(Respond" in message:
196
- message = message.split(" (Respond")[0]
197
-
198
- if response_answer.find("Helpful Answer:") != -1:
199
- response_answer = response_answer.split("Helpful Answer:")[-1]
200
-
201
- response_sources = response["source_documents"]
202
- response_source1 = response_sources[0].page_content.strip()
203
- response_source2 = response_sources[1].page_content.strip()
204
- response_source3 = response_sources[2].page_content.strip()
205
- response_source1_page = response_sources[0].metadata["page"] + 1
206
- response_source2_page = response_sources[1].metadata["page"] + 1
207
- response_source3_page = response_sources[2].metadata["page"] + 1
208
- new_history = history + [(message, response_answer)]
209
-
210
- return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
211
 
212
  # -----------------------------------------------------------------------------
213
  # Gradio Demo
214
  # -----------------------------------------------------------------------------
215
  def demo():
216
  """Main demo application with enhanced layout."""
217
- theme = gr.themes.Default(
218
- primary_hue="indigo",
219
- secondary_hue="blue",
220
- neutral_hue="slate",
221
- )
222
-
223
- # Custom CSS for advanced layout
224
  custom_css = """
225
  .container {background: #ffffff; padding: 1rem; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);}
226
  .header {text-align: center; margin-bottom: 2rem;}
227
  .header h1 {color: #1a365d; font-size: 2.5rem; margin-bottom: 0.5rem;}
228
- .header p {color: #4a5568; font-size: 1.2rem;}
229
  .section {margin-bottom: 1.5rem; padding: 1rem; background: #f8fafc; border-radius: 8px;}
230
- .control-panel {margin-bottom: 1rem;}
231
- .chat-area {background: white; padding: 1rem; border-radius: 8px;}
232
  """
233
 
234
  with gr.Blocks(theme=theme, css=custom_css) as demo:
235
  retriever = gr.State()
236
  qa_chain = gr.State()
237
- language = gr.State(value="en") # State for language control
238
 
239
- # Header
240
  gr.HTML(
241
- """
242
- <div class="header">
243
- <h1>MetroAssist AI</h1>
244
- <p>Expert System for Metrology Report Analysis</p>
245
- </div>
246
- """
247
  )
248
 
249
  with gr.Row():
250
- # Left Column - Controls
251
  with gr.Column(scale=1):
252
  gr.Markdown("## Document Processing")
253
-
254
- # File Upload Section
255
  with gr.Column(elem_classes="section"):
256
- gr.Markdown("### 📄 Upload Documents")
257
- document = gr.Files(
258
- label="Metrology Reports (PDF)",
259
- file_count="multiple",
260
- file_types=["pdf"]
261
- )
262
  db_btn = gr.Button("Process Documents")
263
- db_progress = gr.Textbox(
264
- value="Ready for documents",
265
- label="Processing Status"
266
- )
267
 
268
- # Model Selection Section
269
  with gr.Column(elem_classes="section"):
270
- gr.Markdown("### 🤖 Model Configuration")
271
- llm_btn = gr.Radio(
272
- choices=list_llm_simple,
273
- label="Select AI Model",
274
- value=list_llm_simple[0],
275
- type="index"
276
- )
277
-
278
- # Language selection button
279
- language_btn = gr.Radio(
280
- choices=["English", "Português"],
281
- label="Response Language",
282
- value="English",
283
- type="value"
284
- )
285
-
286
  with gr.Accordion("Advanced Settings", open=False):
287
- slider_temperature = gr.Slider(
288
- minimum=0.01,
289
- maximum=1.0,
290
- value=0.5,
291
- step=0.1,
292
- label="Analysis Precision"
293
- )
294
- slider_maxtokens = gr.Slider(
295
- minimum=128,
296
- maximum=9192,
297
- value=4096,
298
- step=128,
299
- label="Response Length"
300
- )
301
- slider_topk = gr.Slider(
302
- minimum=1,
303
- maximum=10,
304
- value=3,
305
- step=1,
306
- label="Analysis Diversity"
307
- )
308
-
309
  qachain_btn = gr.Button("Initialize Assistant")
310
- llm_progress = gr.Textbox(
311
- value="Not initialized",
312
- label="Assistant Status"
313
- )
314
 
315
- # Right Column - Chat Interface
316
  with gr.Column(scale=2):
317
  gr.Markdown("## Interactive Analysis")
318
-
319
- # Features Section
320
  with gr.Row():
321
- with gr.Column():
322
- gr.Markdown(
323
- """
324
- ### 📊 Capabilities
325
- - Calibration Analysis
326
- - Standards Compliance
327
- - Uncertainty Evaluation
328
- """
329
- )
330
- with gr.Column():
331
- gr.Markdown(
332
- """
333
- ### 💡 Best Practices
334
- - Ask specific questions
335
- - Include measurement context
336
- - Specify standards
337
- """
338
- )
339
-
340
- # Chat Interface
341
- with gr.Column(elem_classes="chat-area"):
342
- chatbot = gr.Chatbot(
343
- height=400,
344
- label="Analysis Conversation"
345
- )
346
- with gr.Row():
347
- msg = gr.Textbox(
348
- placeholder="Ask about your metrology report...",
349
- label="Query"
350
- )
351
- submit_btn = gr.Button("Send")
352
- clear_btn = gr.ClearButton(
353
- [msg, chatbot],
354
- value="Clear"
355
- )
356
-
357
- # References Section
358
  with gr.Accordion("Document References", open=False):
359
  with gr.Row():
360
- with gr.Column():
361
- doc_source1 = gr.Textbox(label="Reference 1", lines=2)
362
- source1_page = gr.Number(label="Page")
363
- with gr.Column():
364
- doc_source2 = gr.Textbox(label="Reference 2", lines=2)
365
- source2_page = gr.Number(label="Page")
366
- with gr.Column():
367
- doc_source3 = gr.Textbox(label="Reference 3", lines=2)
368
- source3_page = gr.Number(label="Page")
369
-
370
- # Footer
371
- gr.Markdown(
372
- """
373
- ---
374
- ### About MetroAssist AI
375
-
376
- A specialized tool for metrology professionals, providing advanced analysis
377
- of calibration certificates, measurement data, and technical standards compliance.
378
-
379
- **Version 1.0** | © 2024 MetroAssist AI
380
- """
381
- )
382
 
383
  # Event Handlers
384
- language_btn.change(
385
- lambda x: "en" if x == "English" else "pt",
386
- inputs=language_btn,
387
- outputs=language
388
- )
389
-
390
- db_btn.click(
391
- initialize_database,
392
- inputs=[document],
393
- outputs=[retriever, db_progress]
394
- )
395
-
396
- qachain_btn.click(
397
- initialize_LLM,
398
- inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, retriever],
399
- outputs=[qa_chain, llm_progress]
400
- ).then(
401
- lambda: [None, "", 0, "", 0, "", 0],
402
- inputs=None,
403
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
404
- queue=False
405
- )
406
-
407
- msg.submit(
408
- conversation,
409
- inputs=[qa_chain, msg, chatbot, language],
410
- outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
411
- queue=False
412
- )
413
-
414
- submit_btn.click(
415
- conversation,
416
- inputs=[qa_chain, msg, chatbot, language],
417
- outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
418
- queue=False
419
- )
420
-
421
- clear_btn.click(
422
- lambda: [None, "", 0, "", 0, "", 0],
423
- inputs=None,
424
- outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page],
425
- queue=False
426
- )
427
 
428
- demo.queue().launch(debug=True)
429
 
430
  if __name__ == "__main__":
431
- demo()
 
9
  from langchain_community.llms import HuggingFaceEndpoint
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain_community.retrievers import BM25Retriever
12
+ from langchain.retrievers import EnsembleRetriever
 
 
 
 
 
13
  from langchain.retrievers.multi_query import MultiQueryRetriever
14
 
15
+ # Environment variable for API token
16
  api_token = os.getenv("FirstToken")
17
+ if not api_token:
18
+ raise ValueError("Environment variable 'FirstToken' not set. Please set the Hugging Face API token.")
19
 
20
  # Available LLM models
21
  list_llm = [
 
28
  # -----------------------------------------------------------------------------
29
  # Document Loading and Splitting
30
  # -----------------------------------------------------------------------------
31
+ def load_doc(list_file_path, progress=gr.Progress()):
32
  """Load and split PDF documents into chunks."""
33
+ if not list_file_path:
34
+ raise ValueError("No files provided for processing.")
35
+
36
  loaders = [PyPDFLoader(x) for x in list_file_path]
37
  pages = []
38
+ for i, loader in enumerate(loaders):
39
+ progress((i + 1) / len(loaders), "Loading PDFs...")
40
  pages.extend(loader.load())
41
+
42
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
43
+ return text_splitter.split_documents(pages)
 
 
 
44
 
45
  # -----------------------------------------------------------------------------
46
+ # Vector Database Creation
47
  # -----------------------------------------------------------------------------
48
  def create_chromadb(splits, persist_directory="chroma_db"):
49
  """Create ChromaDB vector database from document splits."""
 
53
  embedding=embeddings,
54
  persist_directory=persist_directory
55
  )
 
56
  return chromadb
57
 
58
  def create_faissdb(splits):
59
  """Create FAISS vector database from document splits."""
60
  embeddings = HuggingFaceEmbeddings()
61
+ return FAISS.from_documents(splits, embeddings)
 
62
 
63
  # -----------------------------------------------------------------------------
64
+ # Retrievers
65
  # -----------------------------------------------------------------------------
66
  def create_bm25_retriever(splits):
67
  """Create BM25 retriever from document splits."""
68
+ retriever = BM25Retriever.from_documents(splits)
69
+ retriever.k = 3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  return retriever
71
 
 
 
 
72
  def create_ensemble_retriever(vector_db, bm25_retriever):
73
+ """Create an ensemble retriever combining vector DB and BM25."""
74
+ return EnsembleRetriever(
75
  retrievers=[vector_db.as_retriever(), bm25_retriever],
76
+ weights=[0.7, 0.3]
77
  )
 
78
 
79
  # -----------------------------------------------------------------------------
80
  # Initialize Database
81
  # -----------------------------------------------------------------------------
82
  def initialize_database(list_file_obj, progress=gr.Progress()):
83
+ """Initialize the document database with error handling."""
84
+ try:
85
+ list_file_path = [x.name for x in list_file_obj if x is not None]
86
+ doc_splits = load_doc(list_file_path, progress)
87
+ chromadb = create_chromadb(doc_splits)
88
+ bm25_retriever = create_bm25_retriever(doc_splits)
89
+ ensemble_retriever = create_ensemble_retriever(chromadb, bm25_retriever)
90
+ return ensemble_retriever, "Database created successfully!"
91
+ except Exception as e:
92
+ return None, f"Error initializing database: {str(e)}"
 
 
93
 
94
  # -----------------------------------------------------------------------------
95
  # Initialize LLM Chain
96
  # -----------------------------------------------------------------------------
97
+ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, retriever):
98
+ """Initialize the language model chain with error handling."""
99
+ try:
100
+ llm = HuggingFaceEndpoint(
101
+ repo_id=llm_model,
102
+ huggingfacehub_api_token=api_token,
103
+ temperature=temperature,
104
+ max_new_tokens=max_tokens,
105
+ top_k=top_k,
106
+ task="text-generation"
107
+ )
108
+ memory = ConversationBufferMemory(
109
+ memory_key="chat_history",
110
+ output_key="answer",
111
+ return_messages=True
112
+ )
113
+ qa_chain = ConversationalRetrievalChain.from_llm(
114
+ llm=llm,
115
+ retriever=retriever,
116
+ chain_type="stuff",
117
+ memory=memory,
118
+ return_source_documents=True,
119
+ verbose=False
120
+ )
121
+ return qa_chain
122
+ except Exception as e:
123
+ raise RuntimeError(f"Failed to initialize LLM chain: {str(e)}")
124
 
125
  # -----------------------------------------------------------------------------
126
  # Initialize LLM
127
  # -----------------------------------------------------------------------------
128
  def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, retriever, progress=gr.Progress()):
129
  """Initialize the Language Model."""
130
+ try:
131
+ llm_name = list_llm[llm_option]
132
+ print(f"Selected LLM model: {llm_name}")
133
+ qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, retriever)
134
+ return qa_chain, "Analysis Assistant initialized and ready!"
135
+ except Exception as e:
136
+ return None, f"Error initializing LLM: {str(e)}"
137
 
138
  # -----------------------------------------------------------------------------
139
  # Chat History Formatting
140
  # -----------------------------------------------------------------------------
141
  def format_chat_history(message, chat_history):
142
  """Format chat history for the model."""
143
+ return [f"User: {user_msg}\nAssistant: {bot_msg}" for user_msg, bot_msg in chat_history]
 
 
 
 
144
 
145
  # -----------------------------------------------------------------------------
146
  # Conversation Function
147
  # -----------------------------------------------------------------------------
148
  def conversation(qa_chain, message, history, lang):
149
  """Handle conversation and document analysis."""
150
+ if not qa_chain:
151
+ return None, gr.update(value="Assistant not initialized"), history, "", 0, "", 0, "", 0
152
+
153
+ # Add language instruction
154
+ lang_instruction = " (Responda em Português)" if lang == "pt" else " (Respond in English)"
155
+ query = message + lang_instruction
156
+
157
+ try:
158
+ formatted_chat_history = format_chat_history(message, history)
159
+ response = qa_chain.invoke({"question": query, "chat_history": formatted_chat_history})
160
+ answer = response["answer"].split("Helpful Answer:")[-1].strip() if "Helpful Answer:" in response["answer"] else response["answer"]
161
+
162
+ # Extract sources (handle cases where fewer than 3 documents are returned)
163
+ sources = response["source_documents"]
164
+ source_data = [("Unknown", 0)] * 3
165
+ for i, doc in enumerate(sources[:3]):
166
+ source_data[i] = (doc.page_content.strip(), doc.metadata["page"] + 1)
167
+
168
+ # Update history without the language instruction
169
+ new_history = history + [(message, answer)]
170
+ return (
171
+ qa_chain, gr.update(value=""), new_history,
172
+ source_data[0][0], source_data[0][1],
173
+ source_data[1][0], source_data[1][1],
174
+ source_data[2][0], source_data[2][1]
175
+ )
176
+ except Exception as e:
177
+ return qa_chain, gr.update(value=f"Error: {str(e)}"), history, "", 0, "", 0, "", 0
178
 
179
  # -----------------------------------------------------------------------------
180
  # Gradio Demo
181
  # -----------------------------------------------------------------------------
182
  def demo():
183
  """Main demo application with enhanced layout."""
184
+ theme = gr.themes.Default(primary_hue="indigo", secondary_hue="blue", neutral_hue="slate")
 
 
 
 
 
 
185
  custom_css = """
186
  .container {background: #ffffff; padding: 1rem; border-radius: 8px; box-shadow: 0 1px 3px rgba(0,0,0,0.1);}
187
  .header {text-align: center; margin-bottom: 2rem;}
188
  .header h1 {color: #1a365d; font-size: 2.5rem; margin-bottom: 0.5rem;}
 
189
  .section {margin-bottom: 1.5rem; padding: 1rem; background: #f8fafc; border-radius: 8px;}
 
 
190
  """
191
 
192
  with gr.Blocks(theme=theme, css=custom_css) as demo:
193
  retriever = gr.State()
194
  qa_chain = gr.State()
195
+ language = gr.State(value="en")
196
 
 
197
  gr.HTML(
198
+ '<div class="header"><h1>MetroAssist AI</h1><p>Expert System for Metrology Report Analysis</p></div>'
 
 
 
 
 
199
  )
200
 
201
  with gr.Row():
 
202
  with gr.Column(scale=1):
203
  gr.Markdown("## Document Processing")
 
 
204
  with gr.Column(elem_classes="section"):
205
+ document = gr.Files(label="Metrology Reports (PDF)", file_count="multiple", file_types=["pdf"])
 
 
 
 
 
206
  db_btn = gr.Button("Process Documents")
207
+ db_progress = gr.Textbox(value="Ready for documents", label="Processing Status")
 
 
 
208
 
209
+ gr.Markdown("## Model Configuration")
210
  with gr.Column(elem_classes="section"):
211
+ llm_btn = gr.Radio(choices=list_llm_simple, label="Select AI Model", value=list_llm_simple[0], type="index")
212
+ language_btn = gr.Radio(choices=["English", "Português"], label="Response Language", value="English")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  with gr.Accordion("Advanced Settings", open=False):
214
+ slider_temperature = gr.Slider(0.01, 1.0, value=0.5, step=0.1, label="Analysis Precision")
215
+ slider_maxtokens = gr.Slider(128, 9192, value=4096, step=128, label="Response Length")
216
+ slider_topk = gr.Slider(1, 10, value=3, step=1, label="Analysis Diversity")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  qachain_btn = gr.Button("Initialize Assistant")
218
+ llm_progress = gr.Textbox(value="Not initialized", label="Assistant Status")
 
 
 
219
 
 
220
  with gr.Column(scale=2):
221
  gr.Markdown("## Interactive Analysis")
222
+ chatbot = gr.Chatbot(height=400, label="Analysis Conversation")
 
223
  with gr.Row():
224
+ msg = gr.Textbox(placeholder="Ask about your metrology report...", label="Query")
225
+ submit_btn = gr.Button("Send")
226
+ clear_btn = gr.ClearButton([msg, chatbot], value="Clear")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  with gr.Accordion("Document References", open=False):
228
  with gr.Row():
229
+ doc_source1, source1_page = gr.Textbox(label="Reference 1", lines=2), gr.Number(label="Page")
230
+ doc_source2, source2_page = gr.Textbox(label="Reference 2", lines=2), gr.Number(label="Page")
231
+ doc_source3, source3_page = gr.Textbox(label="Reference 3", lines=2), gr.Number(label="Page")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  # Event Handlers
234
+ language_btn.change(lambda x: "en" if x == "English" else "pt", inputs=language_btn, outputs=language)
235
+ db_btn.click(initialize_database, inputs=[document], outputs=[retriever, db_progress])
236
+ qachain_btn.click(initialize_LLM, inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, retriever], outputs=[qa_chain, llm_progress])
237
+ submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot, language], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])
238
+ msg.submit(conversation, inputs=[qa_chain, msg, chatbot, language], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ demo.launch(debug=True)
241
 
242
  if __name__ == "__main__":
243
+ demo()