Update app.py
Browse files
app.py
CHANGED
@@ -23,9 +23,7 @@ list_llm = [
|
|
23 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
24 |
|
25 |
def load_doc(list_file_path):
|
26 |
-
"""
|
27 |
-
Load and split PDF documents into chunks
|
28 |
-
"""
|
29 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
30 |
pages = []
|
31 |
for loader in loaders:
|
@@ -38,17 +36,13 @@ def load_doc(list_file_path):
|
|
38 |
return doc_splits
|
39 |
|
40 |
def create_db(splits):
|
41 |
-
"""
|
42 |
-
Create vector database from document splits
|
43 |
-
"""
|
44 |
embeddings = HuggingFaceEmbeddings()
|
45 |
vectordb = FAISS.from_documents(splits, embeddings)
|
46 |
return vectordb
|
47 |
|
48 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
49 |
-
"""
|
50 |
-
Initialize the language model chain
|
51 |
-
"""
|
52 |
llm = HuggingFaceEndpoint(
|
53 |
repo_id=llm_model,
|
54 |
huggingfacehub_api_token=api_token,
|
@@ -76,27 +70,21 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
76 |
return qa_chain
|
77 |
|
78 |
def initialize_database(list_file_obj, progress=gr.Progress()):
|
79 |
-
"""
|
80 |
-
Initialize the document database
|
81 |
-
"""
|
82 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
83 |
doc_splits = load_doc(list_file_path)
|
84 |
vector_db = create_db(doc_splits)
|
85 |
return vector_db, "Database created successfully!"
|
86 |
|
87 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
88 |
-
"""
|
89 |
-
Initialize the Language Model
|
90 |
-
"""
|
91 |
llm_name = list_llm[llm_option]
|
92 |
print("Selected LLM model:", llm_name)
|
93 |
qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
|
94 |
return qa_chain, "Analysis Assistant initialized and ready!"
|
95 |
|
96 |
def format_chat_history(message, chat_history):
|
97 |
-
"""
|
98 |
-
Format chat history for the model
|
99 |
-
"""
|
100 |
formatted_chat_history = []
|
101 |
for user_message, bot_message in chat_history:
|
102 |
formatted_chat_history.append(f"User: {user_message}")
|
@@ -104,9 +92,7 @@ def format_chat_history(message, chat_history):
|
|
104 |
return formatted_chat_history
|
105 |
|
106 |
def conversation(qa_chain, message, history):
|
107 |
-
"""
|
108 |
-
Handle conversation and document analysis
|
109 |
-
"""
|
110 |
formatted_chat_history = format_chat_history(message, history)
|
111 |
response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
|
112 |
response_answer = response["answer"]
|
@@ -123,30 +109,18 @@ def conversation(qa_chain, message, history):
|
|
123 |
return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
|
124 |
|
125 |
def demo():
|
126 |
-
"""
|
127 |
-
Main demo application
|
128 |
-
"""
|
129 |
-
# Enhanced theme with professional colors
|
130 |
theme = gr.themes.Default(
|
131 |
primary_hue="indigo",
|
132 |
secondary_hue="blue",
|
133 |
neutral_hue="slate",
|
134 |
-
font=[gr.themes.GoogleFont("Roboto"), "system-ui", "sans-serif"]
|
135 |
)
|
136 |
|
137 |
-
|
138 |
-
.container { max-width: 1200px; margin: auto; }
|
139 |
-
.metadata { font-size: 0.9em; color: #666; }
|
140 |
-
.highlight { background-color: #f0f7ff; padding: 1em; border-radius: 8px; }
|
141 |
-
.warning { color: #e53e3e; }
|
142 |
-
.success { color: #38a169; }
|
143 |
-
"""
|
144 |
-
|
145 |
-
with gr.Blocks(theme=theme, css=css) as demo:
|
146 |
vector_db = gr.State()
|
147 |
qa_chain = gr.State()
|
148 |
|
149 |
-
#
|
150 |
gr.HTML(
|
151 |
"""
|
152 |
<div style='text-align: center; padding: 20px;'>
|
@@ -156,7 +130,7 @@ def demo():
|
|
156 |
"""
|
157 |
)
|
158 |
|
159 |
-
# Marketing
|
160 |
gr.Markdown(
|
161 |
"""
|
162 |
### π Specialized Metrology Analysis
|
@@ -175,47 +149,33 @@ def demo():
|
|
175 |
|
176 |
with gr.Row():
|
177 |
with gr.Column(scale=86):
|
178 |
-
gr.Markdown(
|
179 |
-
|
180 |
-
|
181 |
-
Upload your metrology reports for expert analysis.
|
182 |
-
"""
|
183 |
-
)
|
184 |
with gr.Row():
|
185 |
document = gr.Files(
|
186 |
-
|
187 |
file_count="multiple",
|
188 |
file_types=["pdf"],
|
189 |
interactive=True,
|
190 |
-
label="Upload Metrology Reports (PDF)",
|
191 |
-
info="Accepts multiple PDF files"
|
192 |
)
|
193 |
with gr.Row():
|
194 |
-
db_btn = gr.Button(
|
195 |
-
"Process Documents",
|
196 |
-
variant="primary",
|
197 |
-
size="lg"
|
198 |
-
)
|
199 |
with gr.Row():
|
200 |
db_progress = gr.Textbox(
|
201 |
value="Waiting for documents...",
|
202 |
-
|
203 |
-
container=False
|
204 |
)
|
205 |
|
206 |
-
gr.Markdown(
|
207 |
-
|
208 |
-
|
209 |
-
Select and configure the AI model to best meet your needs.
|
210 |
-
"""
|
211 |
-
)
|
212 |
with gr.Row():
|
213 |
llm_btn = gr.Radio(
|
214 |
-
list_llm_simple,
|
215 |
label="Available AI Models",
|
216 |
value=list_llm_simple[0],
|
217 |
-
type="index"
|
218 |
-
info="Choose the most suitable model for your analysis"
|
219 |
)
|
220 |
|
221 |
with gr.Row():
|
@@ -226,9 +186,7 @@ def demo():
|
|
226 |
maximum=1.0,
|
227 |
value=0.5,
|
228 |
step=0.1,
|
229 |
-
label="Analysis Precision"
|
230 |
-
info="Controls the balance between precision and creativity in analysis",
|
231 |
-
interactive=True
|
232 |
)
|
233 |
with gr.Row():
|
234 |
slider_maxtokens = gr.Slider(
|
@@ -236,9 +194,7 @@ def demo():
|
|
236 |
maximum=9192,
|
237 |
value=4096,
|
238 |
step=128,
|
239 |
-
label="Response Length"
|
240 |
-
info="Defines the level of detail in analyses",
|
241 |
-
interactive=True
|
242 |
)
|
243 |
with gr.Row():
|
244 |
slider_topk = gr.Slider(
|
@@ -246,21 +202,15 @@ def demo():
|
|
246 |
maximum=10,
|
247 |
value=3,
|
248 |
step=1,
|
249 |
-
label="Analysis Diversity"
|
250 |
-
info="Controls the variety of perspectives in analysis",
|
251 |
-
interactive=True
|
252 |
)
|
253 |
|
254 |
with gr.Row():
|
255 |
-
qachain_btn = gr.Button(
|
256 |
-
"Initialize Analysis Assistant",
|
257 |
-
variant="primary",
|
258 |
-
size="lg"
|
259 |
-
)
|
260 |
with gr.Row():
|
261 |
llm_progress = gr.Textbox(
|
262 |
value="Waiting for initialization...",
|
263 |
-
|
264 |
)
|
265 |
|
266 |
with gr.Column(scale=200):
|
@@ -280,8 +230,6 @@ def demo():
|
|
280 |
)
|
281 |
chatbot = gr.Chatbot(
|
282 |
height=505,
|
283 |
-
show_label=True,
|
284 |
-
container=True,
|
285 |
label="Metrology Analysis"
|
286 |
)
|
287 |
|
@@ -289,43 +237,32 @@ def demo():
|
|
289 |
with gr.Row():
|
290 |
doc_source1 = gr.Textbox(
|
291 |
label="Technical Reference 1",
|
292 |
-
lines=2
|
293 |
-
container=True,
|
294 |
-
scale=20
|
295 |
)
|
296 |
-
source1_page = gr.Number(label="Page"
|
297 |
with gr.Row():
|
298 |
doc_source2 = gr.Textbox(
|
299 |
label="Technical Reference 2",
|
300 |
-
lines=2
|
301 |
-
container=True,
|
302 |
-
scale=20
|
303 |
)
|
304 |
-
source2_page = gr.Number(label="Page"
|
305 |
with gr.Row():
|
306 |
doc_source3 = gr.Textbox(
|
307 |
label="Technical Reference 3",
|
308 |
-
lines=2
|
309 |
-
container=True,
|
310 |
-
scale=20
|
311 |
)
|
312 |
-
source3_page = gr.Number(label="Page"
|
313 |
|
314 |
with gr.Row():
|
315 |
msg = gr.Textbox(
|
316 |
placeholder="Enter your question about the metrology report...",
|
317 |
-
container=True,
|
318 |
label="Your Query"
|
319 |
)
|
320 |
with gr.Row():
|
321 |
-
submit_btn = gr.Button(
|
322 |
-
"Submit Query",
|
323 |
-
variant="primary"
|
324 |
-
)
|
325 |
clear_btn = gr.ClearButton(
|
326 |
[msg, chatbot],
|
327 |
-
value="Clear Conversation"
|
328 |
-
variant="secondary"
|
329 |
)
|
330 |
|
331 |
# Footer
|
|
|
23 |
list_llm_simple = [os.path.basename(llm) for llm in list_llm]
|
24 |
|
25 |
def load_doc(list_file_path):
|
26 |
+
"""Load and split PDF documents into chunks"""
|
|
|
|
|
27 |
loaders = [PyPDFLoader(x) for x in list_file_path]
|
28 |
pages = []
|
29 |
for loader in loaders:
|
|
|
36 |
return doc_splits
|
37 |
|
38 |
def create_db(splits):
|
39 |
+
"""Create vector database from document splits"""
|
|
|
|
|
40 |
embeddings = HuggingFaceEmbeddings()
|
41 |
vectordb = FAISS.from_documents(splits, embeddings)
|
42 |
return vectordb
|
43 |
|
44 |
def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
45 |
+
"""Initialize the language model chain"""
|
|
|
|
|
46 |
llm = HuggingFaceEndpoint(
|
47 |
repo_id=llm_model,
|
48 |
huggingfacehub_api_token=api_token,
|
|
|
70 |
return qa_chain
|
71 |
|
72 |
def initialize_database(list_file_obj, progress=gr.Progress()):
|
73 |
+
"""Initialize the document database"""
|
|
|
|
|
74 |
list_file_path = [x.name for x in list_file_obj if x is not None]
|
75 |
doc_splits = load_doc(list_file_path)
|
76 |
vector_db = create_db(doc_splits)
|
77 |
return vector_db, "Database created successfully!"
|
78 |
|
79 |
def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
|
80 |
+
"""Initialize the Language Model"""
|
|
|
|
|
81 |
llm_name = list_llm[llm_option]
|
82 |
print("Selected LLM model:", llm_name)
|
83 |
qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
|
84 |
return qa_chain, "Analysis Assistant initialized and ready!"
|
85 |
|
86 |
def format_chat_history(message, chat_history):
|
87 |
+
"""Format chat history for the model"""
|
|
|
|
|
88 |
formatted_chat_history = []
|
89 |
for user_message, bot_message in chat_history:
|
90 |
formatted_chat_history.append(f"User: {user_message}")
|
|
|
92 |
return formatted_chat_history
|
93 |
|
94 |
def conversation(qa_chain, message, history):
|
95 |
+
"""Handle conversation and document analysis"""
|
|
|
|
|
96 |
formatted_chat_history = format_chat_history(message, history)
|
97 |
response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
|
98 |
response_answer = response["answer"]
|
|
|
109 |
return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
|
110 |
|
111 |
def demo():
|
112 |
+
"""Main demo application"""
|
|
|
|
|
|
|
113 |
theme = gr.themes.Default(
|
114 |
primary_hue="indigo",
|
115 |
secondary_hue="blue",
|
116 |
neutral_hue="slate",
|
|
|
117 |
)
|
118 |
|
119 |
+
with gr.Blocks(theme=theme) as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
vector_db = gr.State()
|
121 |
qa_chain = gr.State()
|
122 |
|
123 |
+
# Header
|
124 |
gr.HTML(
|
125 |
"""
|
126 |
<div style='text-align: center; padding: 20px;'>
|
|
|
130 |
"""
|
131 |
)
|
132 |
|
133 |
+
# Marketing description
|
134 |
gr.Markdown(
|
135 |
"""
|
136 |
### π Specialized Metrology Analysis
|
|
|
149 |
|
150 |
with gr.Row():
|
151 |
with gr.Column(scale=86):
|
152 |
+
gr.Markdown("### π₯ Step 1: Document Loading and Preparation")
|
153 |
+
gr.Markdown("Upload your metrology reports for expert analysis.")
|
154 |
+
|
|
|
|
|
|
|
155 |
with gr.Row():
|
156 |
document = gr.Files(
|
157 |
+
label="Upload Metrology Reports (PDF)",
|
158 |
file_count="multiple",
|
159 |
file_types=["pdf"],
|
160 |
interactive=True,
|
|
|
|
|
161 |
)
|
162 |
with gr.Row():
|
163 |
+
db_btn = gr.Button("Process Documents")
|
|
|
|
|
|
|
|
|
164 |
with gr.Row():
|
165 |
db_progress = gr.Textbox(
|
166 |
value="Waiting for documents...",
|
167 |
+
label="Status"
|
|
|
168 |
)
|
169 |
|
170 |
+
gr.Markdown("### π€ Analysis Engine Configuration")
|
171 |
+
gr.Markdown("Select and configure the AI model to best meet your needs.")
|
172 |
+
|
|
|
|
|
|
|
173 |
with gr.Row():
|
174 |
llm_btn = gr.Radio(
|
175 |
+
choices=list_llm_simple,
|
176 |
label="Available AI Models",
|
177 |
value=list_llm_simple[0],
|
178 |
+
type="index"
|
|
|
179 |
)
|
180 |
|
181 |
with gr.Row():
|
|
|
186 |
maximum=1.0,
|
187 |
value=0.5,
|
188 |
step=0.1,
|
189 |
+
label="Analysis Precision"
|
|
|
|
|
190 |
)
|
191 |
with gr.Row():
|
192 |
slider_maxtokens = gr.Slider(
|
|
|
194 |
maximum=9192,
|
195 |
value=4096,
|
196 |
step=128,
|
197 |
+
label="Response Length"
|
|
|
|
|
198 |
)
|
199 |
with gr.Row():
|
200 |
slider_topk = gr.Slider(
|
|
|
202 |
maximum=10,
|
203 |
value=3,
|
204 |
step=1,
|
205 |
+
label="Analysis Diversity"
|
|
|
|
|
206 |
)
|
207 |
|
208 |
with gr.Row():
|
209 |
+
qachain_btn = gr.Button("Initialize Analysis Assistant")
|
|
|
|
|
|
|
|
|
210 |
with gr.Row():
|
211 |
llm_progress = gr.Textbox(
|
212 |
value="Waiting for initialization...",
|
213 |
+
label="Assistant Status"
|
214 |
)
|
215 |
|
216 |
with gr.Column(scale=200):
|
|
|
230 |
)
|
231 |
chatbot = gr.Chatbot(
|
232 |
height=505,
|
|
|
|
|
233 |
label="Metrology Analysis"
|
234 |
)
|
235 |
|
|
|
237 |
with gr.Row():
|
238 |
doc_source1 = gr.Textbox(
|
239 |
label="Technical Reference 1",
|
240 |
+
lines=2
|
|
|
|
|
241 |
)
|
242 |
+
source1_page = gr.Number(label="Page")
|
243 |
with gr.Row():
|
244 |
doc_source2 = gr.Textbox(
|
245 |
label="Technical Reference 2",
|
246 |
+
lines=2
|
|
|
|
|
247 |
)
|
248 |
+
source2_page = gr.Number(label="Page")
|
249 |
with gr.Row():
|
250 |
doc_source3 = gr.Textbox(
|
251 |
label="Technical Reference 3",
|
252 |
+
lines=2
|
|
|
|
|
253 |
)
|
254 |
+
source3_page = gr.Number(label="Page")
|
255 |
|
256 |
with gr.Row():
|
257 |
msg = gr.Textbox(
|
258 |
placeholder="Enter your question about the metrology report...",
|
|
|
259 |
label="Your Query"
|
260 |
)
|
261 |
with gr.Row():
|
262 |
+
submit_btn = gr.Button("Submit Query")
|
|
|
|
|
|
|
263 |
clear_btn = gr.ClearButton(
|
264 |
[msg, chatbot],
|
265 |
+
value="Clear Conversation"
|
|
|
266 |
)
|
267 |
|
268 |
# Footer
|