Sarath0x8f commited on
Commit
1efad3c
·
verified ·
1 Parent(s): 912b2d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -220
app.py CHANGED
@@ -6,6 +6,7 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
 
9
  import base64
10
 
11
  # Load environment variables
@@ -16,6 +17,32 @@ llm_models = [
16
  "meta-llama/Meta-Llama-3-8B-Instruct",
17
  "mistralai/Mistral-7B-Instruct-v0.2",
18
  "tiiuae/falcon-7b-instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ]
20
 
21
  embed_models = [
@@ -32,7 +59,6 @@ vector_index = None
32
 
33
  # Initialize the parser
34
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
35
-
36
  # Define file extractor with various common extensions
37
  file_extractor = {
38
  '.pdf': parser, # PDF documents
@@ -40,105 +66,62 @@ file_extractor = {
40
  '.doc': parser, # Older Microsoft Word documents
41
  '.txt': parser, # Plain text files
42
  '.csv': parser, # Comma-separated values files
43
- '.xlsx': parser, # Microsoft Excel files
44
- '.pptx': parser, # Microsoft PowerPoint files
45
- '.html': parser, # HTML files
 
 
 
 
 
46
  '.jpg': parser, # JPEG images
47
  '.jpeg': parser, # JPEG images
48
  '.png': parser, # PNG images
 
 
 
 
 
 
49
  '.webp': parser, # WebP images
50
- '.svg': parser, # SVG files
51
  }
52
 
53
- # Markdown content definitions
54
- description = """
55
- ## Welcome to DocBot 📄🤖
56
- DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
57
- - PDF documents
58
- - Word documents (.docx, .doc)
59
- - Text files
60
- - CSV files
61
- - Excel files
62
- - PowerPoint presentations
63
- - HTML files
64
- - Images with text (JPG, PNG, WebP, SVG)
65
- Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
66
- """
67
-
68
- guide = """
69
- ### How to Use DocBot:
70
- 1. **Upload Document**: Choose any supported file format
71
- 2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
72
- 3. **Submit**: Click submit to process your document
73
- 4. **Select LLM**: Choose your preferred language model
74
- 5. **Ask Questions**: Start chatting with your document!
75
- ### Tips:
76
- - Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
77
- - Larger models provide better understanding but take more time
78
- - Be specific in your questions for better results
79
- """
80
-
81
- footer = """
82
- <div style="text-align: center; margin-top: 20px; padding: 20px; border-top: 1px solid #ddd;">
83
- <p>Built with ❤️ using LlamaIndex and Gradio</p>
84
- <div style="display: flex; justify-content: center; gap: 20px; margin-top: 10px;">
85
- <a href="https://github.com" target="_blank">
86
- <img src="data:image/png;base64,{0}" alt="GitHub" style="width: 24px; height: 24px;">
87
- </a>
88
- <a href="https://linkedin.com" target="_blank">
89
- <img src="data:image/png;base64,{1}" alt="LinkedIn" style="width: 24px; height: 24px;">
90
- </a>
91
- <a href="https://your-website.com" target="_blank">
92
- <img src="data:image/png;base64,{2}" alt="Website" style="width: 24px; height: 24px;">
93
- </a>
94
- </div>
95
- </div>
96
- """
97
 
98
  # File processing function
99
  def load_files(file_path: str, embed_model_name: str):
100
  try:
101
- if not file_path:
102
- return "Please select a file first."
103
-
104
- if not embed_model_name:
105
- return "Please select an embedding model."
106
-
107
  global vector_index
108
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
109
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
110
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
111
  print(f"Parsing done for {file_path}")
112
  filename = os.path.basename(file_path)
113
- return f"Ready to answer questions about: {filename}"
114
  except Exception as e:
115
- return f"An error occurred: {str(e)}"
 
116
 
117
  # Function to handle the selected model from dropdown
118
  def set_llm_model(selected_model):
119
  global selected_llm_model_name
120
- if selected_model:
121
- selected_llm_model_name = selected_model
122
- return f"LLM set to: {selected_model}"
 
123
 
124
  # Respond function that uses the globally set selected model
125
  def respond(message, history):
126
  try:
127
- if not vector_index:
128
- return "Please upload and process a document first."
129
-
130
- if not message.strip():
131
- return "Please enter a question."
132
-
133
  # Initialize the LLM with the selected model
134
  llm = HuggingFaceInferenceAPI(
135
  model_name=selected_llm_model_name,
136
- contextWindow=8192,
137
- maxTokens=1024,
138
- temperature=0.3,
139
- topP=0.9,
140
- frequencyPenalty=0.5,
141
- presencePenalty=0.5,
142
  token=os.getenv("TOKEN")
143
  )
144
 
@@ -147,163 +130,56 @@ def respond(message, history):
147
  bot_message = query_engine.query(message)
148
 
149
  print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
150
- return f"**{selected_llm_model_name}:**\n\n{str(bot_message)}"
151
  except Exception as e:
152
- return f" An error occurred: {str(e)}"
 
 
153
 
154
- def encode_image_safe(image_path):
155
- """Safely encode image, return empty string if file doesn't exist"""
156
- try:
157
- if os.path.exists(image_path):
158
- with open(image_path, "rb") as image_file:
159
- return base64.b64encode(image_file.read()).decode('utf-8')
160
- except Exception:
161
- pass
162
- return ""
163
-
164
- # Clear function for file processing components
165
- def clear_file_components():
166
- return None, embed_models[0], ""
167
 
168
- # Encode the images (with fallback for missing images)
169
- github_logo_encoded = encode_image_safe("Images/github-logo.png")
170
- linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
171
- website_logo_encoded = encode_image_safe("Images/ai-logo.png")
172
 
173
  # UI Setup
174
- with gr.Blocks(
175
- theme=gr.themes.Soft(),
176
- css='footer {visibility: hidden}',
177
- title="DocBot - Document Analysis Assistant"
178
- ) as demo:
179
-
180
- gr.Markdown("# DocBot 📄🤖")
181
- gr.Markdown("*Intelligent Document Analysis Assistant*")
182
-
183
  with gr.Tabs():
184
- with gr.TabItem("📖 Introduction"):
185
- gr.Markdown(description)
186
 
187
- with gr.TabItem("🤖 DocBot"):
188
- with gr.Accordion("📋 Quick Start Guide", open=False):
189
- gr.Markdown(guide)
190
-
191
  with gr.Row():
192
  with gr.Column(scale=1):
193
- with gr.Group():
194
- gr.Markdown("### Document Processing")
195
- file_input = gr.File(
196
- file_count="single",
197
- type='filepath',
198
- label="Step 1: Upload Document",
199
- file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']
200
- )
201
-
202
- embed_model_dropdown = gr.Dropdown(
203
- choices=embed_models,
204
- label="Step 2: Select Embedding Model",
205
- interactive=True,
206
- value=embed_models[0]
207
- )
208
-
209
- with gr.Row():
210
- btn = gr.Button("🚀 Process Document", variant='primary', size="lg")
211
- clear_btn = gr.Button("🗑️ Clear", size="lg")
212
-
213
- output = gr.Textbox(
214
- label='Processing Status',
215
- interactive=False,
216
- placeholder="Upload a document and click 'Process Document' to begin..."
217
- )
218
-
219
- with gr.Group():
220
- gr.Markdown("### Model Selection")
221
- llm_model_dropdown = gr.Dropdown(
222
- choices=llm_models,
223
- label="Step 3: Select Language Model",
224
- interactive=True,
225
- value=llm_models[0]
226
- )
227
- llm_status = gr.Textbox(
228
- label="Selected Model",
229
- interactive=False,
230
- value=f"LLM set to: {llm_models[0]}"
231
- )
232
-
233
- with gr.Column(scale=2):
234
- gr.Markdown("### Chat with Your Document")
235
- chatbot = gr.Chatbot(
236
- height=600,
237
- placeholder="Process a document first, then start asking questions!",
238
- show_label=False
239
- )
240
-
241
- msg = gr.Textbox(
242
- placeholder="Step 4: Ask questions about your document...",
243
- container=False,
244
- scale=7
245
- )
246
-
247
  with gr.Row():
248
- submit_btn = gr.Button("Send", variant="primary")
249
- clear_chat_btn = gr.Button("Clear Chat")
250
-
251
- # Add footer if images exist
252
- if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
253
- gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
254
-
255
- # Set up event handlers
256
- def chat_respond(message, history):
257
- if not message.strip():
258
- return history, ""
259
-
260
- response = respond(message, history)
261
- history.append([message, response])
262
- return history, ""
263
-
264
- def clear_chat():
265
- return [], ""
266
-
267
- # Event bindings
268
- llm_model_dropdown.change(
269
- fn=set_llm_model,
270
- inputs=[llm_model_dropdown],
271
- outputs=[llm_status]
272
- )
273
-
274
- btn.click(
275
- fn=load_files,
276
- inputs=[file_input, embed_model_dropdown],
277
- outputs=[output]
278
- )
279
-
280
- submit_btn.click(
281
- fn=chat_respond,
282
- inputs=[msg, chatbot],
283
- outputs=[chatbot, msg]
284
- )
285
-
286
- msg.submit(
287
- fn=chat_respond,
288
- inputs=[msg, chatbot],
289
- outputs=[chatbot, msg]
290
- )
291
-
292
- clear_btn.click(
293
- fn=clear_file_components,
294
- outputs=[file_input, embed_model_dropdown, output]
295
- )
296
-
297
- clear_chat_btn.click(
298
- fn=clear_chat,
299
- outputs=[chatbot, msg]
300
- )
301
 
302
- # Launch the demo
303
  if __name__ == "__main__":
304
- demo.launch(
305
- share=True,
306
- server_name="0.0.0.0",
307
- server_port=7860,
308
- show_error=True
309
- )
 
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
9
+ import markdowm as md
10
  import base64
11
 
12
  # Load environment variables
 
17
  "meta-llama/Meta-Llama-3-8B-Instruct",
18
  "mistralai/Mistral-7B-Instruct-v0.2",
19
  "tiiuae/falcon-7b-instruct",
20
+ # "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
21
+ # "deepseek-ai/deepseek-vl2", ## 54GB > 10GB
22
+ # "deepseek-ai/deepseek-vl2-small", ## 32GB > 10GB
23
+ # "deepseek-ai/deepseek-vl2-tiny", ## high response time
24
+ # "deepseek-ai/deepseek-llm-7b-chat", ## 13GB > 10GB
25
+ # "deepseek-ai/deepseek-math-7b-instruct", ## 13GB > 10GB
26
+ # "deepseek-ai/deepseek-coder-33b-instruct", ## 66GB > 10GB
27
+ # "deepseek-ai/DeepSeek-R1-Zero", ## 688GB > 10GB
28
+ # "mistralai/Mixtral-8x22B-Instruct-v0.1", ## 281GB>10GB
29
+ # "NousResearch/Yarn-Mistral-7b-64k", ## 14GB>10GB
30
+ # "impira/layoutlm-document-qa", ## ERR
31
+ # "Qwen/Qwen1.5-7B", ## 15GB
32
+ # "Qwen/Qwen2.5-3B", ## high response time
33
+ # "google/gemma-2-2b-jpn-it", ## high response time
34
+ # "impira/layoutlm-invoices", ## bad req
35
+ # "google/pix2struct-docvqa-large", ## bad req
36
+ # "google/gemma-7b-it", ## 17GB > 10GB
37
+ # "google/gemma-2b-it", ## high response time
38
+ # "HuggingFaceH4/zephyr-7b-beta", ## high response time
39
+ # "HuggingFaceH4/zephyr-7b-gemma-v0.1", ## bad req
40
+ # "microsoft/phi-2", ## high response time
41
+ # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ## high response time
42
+ # "mosaicml/mpt-7b-instruct", ## 13GB>10GB
43
+ # "google/flan-t5-xxl" ## high respons time
44
+ # "NousResearch/Yarn-Mistral-7b-128k", ## 14GB>10GB
45
+ # "Qwen/Qwen2.5-7B-Instruct", ## 15GB>10GB
46
  ]
47
 
48
  embed_models = [
 
59
 
60
  # Initialize the parser
61
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
 
62
  # Define file extractor with various common extensions
63
  file_extractor = {
64
  '.pdf': parser, # PDF documents
 
66
  '.doc': parser, # Older Microsoft Word documents
67
  '.txt': parser, # Plain text files
68
  '.csv': parser, # Comma-separated values files
69
+ '.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
70
+ '.pptx': parser, # Microsoft PowerPoint files (for slides)
71
+ '.html': parser, # HTML files (web pages)
72
+ # '.rtf': parser, # Rich Text Format files
73
+ # '.odt': parser, # OpenDocument Text files
74
+ # '.epub': parser, # ePub files (e-books)
75
+
76
+ # Image files for OCR processing
77
  '.jpg': parser, # JPEG images
78
  '.jpeg': parser, # JPEG images
79
  '.png': parser, # PNG images
80
+ # '.bmp': parser, # Bitmap images
81
+ # '.tiff': parser, # TIFF images
82
+ # '.tif': parser, # TIFF images (alternative extension)
83
+ # '.gif': parser, # GIF images (can contain text)
84
+
85
+ # Scanned documents in image formats
86
  '.webp': parser, # WebP images
87
+ '.svg': parser, # SVG files (vector format, may contain embedded text)
88
  }
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # File processing function
92
  def load_files(file_path: str, embed_model_name: str):
93
  try:
 
 
 
 
 
 
94
  global vector_index
95
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
96
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
97
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
98
  print(f"Parsing done for {file_path}")
99
  filename = os.path.basename(file_path)
100
+ return f"Ready to give response on {filename}"
101
  except Exception as e:
102
+ return f"An error occurred: {e}"
103
+
104
 
105
  # Function to handle the selected model from dropdown
106
  def set_llm_model(selected_model):
107
  global selected_llm_model_name
108
+ selected_llm_model_name = selected_model # Update the global variable
109
+ # print(f"Model selected: {selected_model_name}")
110
+ # return f"Model set to: {selected_model_name}"
111
+
112
 
113
  # Respond function that uses the globally set selected model
114
  def respond(message, history):
115
  try:
 
 
 
 
 
 
116
  # Initialize the LLM with the selected model
117
  llm = HuggingFaceInferenceAPI(
118
  model_name=selected_llm_model_name,
119
+ contextWindow=8192, # Context window size (typically max length of the model)
120
+ maxTokens=1024, # Tokens per response generation (512-1024 works well for detailed answers)
121
+ temperature=0.3, # Lower temperature for more focused answers (0.2-0.4 for factual info)
122
+ topP=0.9, # Top-p sampling to control diversity while retaining quality
123
+ frequencyPenalty=0.5, # Slight penalty to avoid repetition
124
+ presencePenalty=0.5, # Encourages exploration without digressing too much
125
  token=os.getenv("TOKEN")
126
  )
127
 
 
130
  bot_message = query_engine.query(message)
131
 
132
  print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
133
+ return f"{selected_llm_model_name}:\n{str(bot_message)}"
134
  except Exception as e:
135
+ if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
136
+ return "Please upload a file."
137
+ return f"An error occurred: {e}"
138
 
139
+ def encode_image(image_path):
140
+ with open(image_path, "rb") as image_file:
141
+ return base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
 
 
 
 
 
 
142
 
143
+ # Encode the images
144
+ github_logo_encoded = encode_image("Images/github-logo.png")
145
+ linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
146
+ website_logo_encoded = encode_image("Images/ai-logo.png")
147
 
148
  # UI Setup
149
+ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
150
+ gr.Markdown("# DocBot📄🤖")
 
 
 
 
 
 
 
151
  with gr.Tabs():
152
+ with gr.TabItem("Intro"):
153
+ gr.Markdown(md.description)
154
 
155
+ with gr.TabItem("DocBot"):
156
+ with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
157
+ guid = gr.Markdown(md.guide)
 
158
  with gr.Row():
159
  with gr.Column(scale=1):
160
+ file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
161
+ # gr.Markdown("Dont know what to select check out in Intro tab")
162
+ embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  with gr.Row():
164
+ btn = gr.Button("Submit", variant='primary')
165
+ clear = gr.ClearButton()
166
+ output = gr.Text(label='Vector Index')
167
+ llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
168
+ with gr.Column(scale=3):
169
+ gr.ChatInterface(
170
+ fn=respond,
171
+ chatbot=gr.Chatbot(height=500),
172
+ theme = "soft",
173
+ show_progress='full',
174
+ # cache_mode='lazy',
175
+ textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
176
+ )
177
+ gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
178
+ # Set up Gradio interactions
179
+ llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
180
+ btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
181
+ clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
+ # Launch the demo with a public link option
184
  if __name__ == "__main__":
185
+ demo.launch(share=True) # Correct