Sarath0x8f commited on
Commit
a6ab84c
Β·
verified Β·
1 Parent(s): 0d8557a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -98
app.py CHANGED
@@ -6,7 +6,6 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
9
- import markdowm as md
10
  import base64
11
 
12
  # Load environment variables
@@ -17,32 +16,6 @@ llm_models = [
17
  "meta-llama/Meta-Llama-3-8B-Instruct",
18
  "mistralai/Mistral-7B-Instruct-v0.2",
19
  "tiiuae/falcon-7b-instruct",
20
- # "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
21
- # "deepseek-ai/deepseek-vl2", ## 54GB > 10GB
22
- # "deepseek-ai/deepseek-vl2-small", ## 32GB > 10GB
23
- # "deepseek-ai/deepseek-vl2-tiny", ## high response time
24
- # "deepseek-ai/deepseek-llm-7b-chat", ## 13GB > 10GB
25
- # "deepseek-ai/deepseek-math-7b-instruct", ## 13GB > 10GB
26
- # "deepseek-ai/deepseek-coder-33b-instruct", ## 66GB > 10GB
27
- # "deepseek-ai/DeepSeek-R1-Zero", ## 688GB > 10GB
28
- # "mistralai/Mixtral-8x22B-Instruct-v0.1", ## 281GB>10GB
29
- # "NousResearch/Yarn-Mistral-7b-64k", ## 14GB>10GB
30
- # "impira/layoutlm-document-qa", ## ERR
31
- # "Qwen/Qwen1.5-7B", ## 15GB
32
- # "Qwen/Qwen2.5-3B", ## high response time
33
- # "google/gemma-2-2b-jpn-it", ## high response time
34
- # "impira/layoutlm-invoices", ## bad req
35
- # "google/pix2struct-docvqa-large", ## bad req
36
- # "google/gemma-7b-it", ## 17GB > 10GB
37
- # "google/gemma-2b-it", ## high response time
38
- # "HuggingFaceH4/zephyr-7b-beta", ## high response time
39
- # "HuggingFaceH4/zephyr-7b-gemma-v0.1", ## bad req
40
- # "microsoft/phi-2", ## high response time
41
- # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ## high response time
42
- # "mosaicml/mpt-7b-instruct", ## 13GB>10GB
43
- # "google/flan-t5-xxl" ## high respons time
44
- # "NousResearch/Yarn-Mistral-7b-128k", ## 14GB>10GB
45
- # "Qwen/Qwen2.5-7B-Instruct", ## 15GB>10GB
46
  ]
47
 
48
  embed_models = [
@@ -59,6 +32,7 @@ vector_index = None
59
 
60
  # Initialize the parser
61
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
 
62
  # Define file extractor with various common extensions
63
  file_extractor = {
64
  '.pdf': parser, # PDF documents
@@ -66,62 +40,109 @@ file_extractor = {
66
  '.doc': parser, # Older Microsoft Word documents
67
  '.txt': parser, # Plain text files
68
  '.csv': parser, # Comma-separated values files
69
- '.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
70
- '.pptx': parser, # Microsoft PowerPoint files (for slides)
71
- '.html': parser, # HTML files (web pages)
72
- # '.rtf': parser, # Rich Text Format files
73
- # '.odt': parser, # OpenDocument Text files
74
- # '.epub': parser, # ePub files (e-books)
75
-
76
- # Image files for OCR processing
77
  '.jpg': parser, # JPEG images
78
  '.jpeg': parser, # JPEG images
79
  '.png': parser, # PNG images
80
- # '.bmp': parser, # Bitmap images
81
- # '.tiff': parser, # TIFF images
82
- # '.tif': parser, # TIFF images (alternative extension)
83
- # '.gif': parser, # GIF images (can contain text)
84
-
85
- # Scanned documents in image formats
86
  '.webp': parser, # WebP images
87
- '.svg': parser, # SVG files (vector format, may contain embedded text)
88
  }
89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # File processing function
92
  def load_files(file_path: str, embed_model_name: str):
93
  try:
 
 
 
 
 
 
94
  global vector_index
95
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
96
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
97
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
98
  print(f"Parsing done for {file_path}")
99
  filename = os.path.basename(file_path)
100
- return f"Ready to give response on {filename}"
101
  except Exception as e:
102
- return f"An error occurred: {e}"
103
-
104
 
105
  # Function to handle the selected model from dropdown
106
  def set_llm_model(selected_model):
107
  global selected_llm_model_name
108
- selected_llm_model_name = selected_model # Update the global variable
109
- # print(f"Model selected: {selected_model_name}")
110
- # return f"Model set to: {selected_model_name}"
111
-
112
 
113
  # Respond function that uses the globally set selected model
114
  def respond(message, history):
115
  try:
 
 
 
 
 
 
116
  # Initialize the LLM with the selected model
117
  llm = HuggingFaceInferenceAPI(
118
  model_name=selected_llm_model_name,
119
- contextWindow=8192, # Context window size (typically max length of the model)
120
- maxTokens=1024, # Tokens per response generation (512-1024 works well for detailed answers)
121
- temperature=0.3, # Lower temperature for more focused answers (0.2-0.4 for factual info)
122
- topP=0.9, # Top-p sampling to control diversity while retaining quality
123
- frequencyPenalty=0.5, # Slight penalty to avoid repetition
124
- presencePenalty=0.5, # Encourages exploration without digressing too much
125
  token=os.getenv("TOKEN")
126
  )
127
 
@@ -130,58 +151,151 @@ def respond(message, history):
130
  bot_message = query_engine.query(message)
131
 
132
  print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
133
- return f"{selected_llm_model_name}:\n{str(bot_message)}"
134
  except Exception as e:
135
- if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
136
- return "Please upload a file."
137
- return f"An error occurred: {e}"
138
 
139
- def encode_image(image_path):
140
- with open(image_path, "rb") as image_file:
141
- return base64.b64encode(image_file.read()).decode('utf-8')
142
-
143
- # Encode the images
144
- github_logo_encoded = encode_image("Images/github-logo.png")
145
- linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
146
- website_logo_encoded = encode_image("Images/ai-logo.png")
 
147
 
148
- # theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]),
 
 
 
149
 
150
  # UI Setup
151
- with gr.Blocks(css='footer {visibility: hidden}') as demo:
152
- gr.Markdown("# DocBotπŸ“„πŸ€–")
 
 
 
 
 
 
 
153
  with gr.Tabs():
154
- with gr.TabItem("Intro"):
155
- gr.Markdown(md.description)
156
 
157
- with gr.TabItem("DocBot"):
158
- with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
159
- guid = gr.Markdown(md.guide)
 
160
  with gr.Row():
161
  with gr.Column(scale=1):
162
- file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
163
- # gr.Markdown("Dont know what to select check out in Intro tab")
164
- embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
165
- with gr.Row():
166
- btn = gr.Button("Submit", variant='primary')
167
- clear = gr.ClearButton()
168
- output = gr.Text(label='Vector Index')
169
- llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
170
- with gr.Column(scale=3):
171
- gr.ChatInterface(
172
- fn=respond,
173
- chatbot=gr.Chatbot(height=500),
174
- # theme = "soft",
175
- show_progress='full',
176
- # cache_mode='lazy',
177
- textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  )
179
- gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
180
- # Set up Gradio interactions
181
- llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
182
- btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
183
- clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
- # Launch the demo with a public link option
186
  if __name__ == "__main__":
187
- demo.launch(share=True)
 
 
 
 
 
 
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
 
9
  import base64
10
 
11
  # Load environment variables
 
16
  "meta-llama/Meta-Llama-3-8B-Instruct",
17
  "mistralai/Mistral-7B-Instruct-v0.2",
18
  "tiiuae/falcon-7b-instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  ]
20
 
21
  embed_models = [
 
32
 
33
  # Initialize the parser
34
  parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
35
+
36
  # Define file extractor with various common extensions
37
  file_extractor = {
38
  '.pdf': parser, # PDF documents
 
40
  '.doc': parser, # Older Microsoft Word documents
41
  '.txt': parser, # Plain text files
42
  '.csv': parser, # Comma-separated values files
43
+ '.xlsx': parser, # Microsoft Excel files
44
+ '.pptx': parser, # Microsoft PowerPoint files
45
+ '.html': parser, # HTML files
 
 
 
 
 
46
  '.jpg': parser, # JPEG images
47
  '.jpeg': parser, # JPEG images
48
  '.png': parser, # PNG images
 
 
 
 
 
 
49
  '.webp': parser, # WebP images
50
+ '.svg': parser, # SVG files
51
  }
52
 
53
+ # Markdown content definitions
54
+ description = """
55
+ ## Welcome to DocBot πŸ“„πŸ€–
56
+
57
+ DocBot is an intelligent document analysis tool that can help you extract insights from various document formats including:
58
+ - PDF documents
59
+ - Word documents (.docx, .doc)
60
+ - Text files
61
+ - CSV files
62
+ - Excel files
63
+ - PowerPoint presentations
64
+ - HTML files
65
+ - Images with text (JPG, PNG, WebP, SVG)
66
+
67
+ Simply upload your document, select your preferred embedding model and LLM, then start asking questions!
68
+ """
69
+
70
+ guide = """
71
+ ### How to Use DocBot:
72
+
73
+ 1. **Upload Document**: Choose any supported file format
74
+ 2. **Select Embedding Model**: Choose from available embedding models (BAAI/bge-small-en-v1.5 is recommended for most cases)
75
+ 3. **Submit**: Click submit to process your document
76
+ 4. **Select LLM**: Choose your preferred language model
77
+ 5. **Ask Questions**: Start chatting with your document!
78
+
79
+ ### Tips:
80
+ - Smaller embedding models (like bge-small-en-v1.5) are faster but may be less accurate
81
+ - Larger models provide better understanding but take more time
82
+ - Be specific in your questions for better results
83
+ """
84
+
85
+ footer = """
86
+ <div style="text-align: center; margin-top: 20px; padding: 20px; border-top: 1px solid #ddd;">
87
+ <p>Built with ❀️ using LlamaIndex and Gradio</p>
88
+ <div style="display: flex; justify-content: center; gap: 20px; margin-top: 10px;">
89
+ <a href="https://github.com" target="_blank">
90
+ <img src="data:image/png;base64,{0}" alt="GitHub" style="width: 24px; height: 24px;">
91
+ </a>
92
+ <a href="https://linkedin.com" target="_blank">
93
+ <img src="data:image/png;base64,{1}" alt="LinkedIn" style="width: 24px; height: 24px;">
94
+ </a>
95
+ <a href="https://your-website.com" target="_blank">
96
+ <img src="data:image/png;base64,{2}" alt="Website" style="width: 24px; height: 24px;">
97
+ </a>
98
+ </div>
99
+ </div>
100
+ """
101
 
102
  # File processing function
103
  def load_files(file_path: str, embed_model_name: str):
104
  try:
105
+ if not file_path:
106
+ return "Please select a file first."
107
+
108
+ if not embed_model_name:
109
+ return "Please select an embedding model."
110
+
111
  global vector_index
112
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
113
  embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
114
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
115
  print(f"Parsing done for {file_path}")
116
  filename = os.path.basename(file_path)
117
+ return f"βœ… Ready to answer questions about: {filename}"
118
  except Exception as e:
119
+ return f"❌ An error occurred: {str(e)}"
 
120
 
121
  # Function to handle the selected model from dropdown
122
  def set_llm_model(selected_model):
123
  global selected_llm_model_name
124
+ if selected_model:
125
+ selected_llm_model_name = selected_model
126
+ return f"LLM set to: {selected_model}"
 
127
 
128
  # Respond function that uses the globally set selected model
129
  def respond(message, history):
130
  try:
131
+ if not vector_index:
132
+ return "Please upload and process a document first."
133
+
134
+ if not message.strip():
135
+ return "Please enter a question."
136
+
137
  # Initialize the LLM with the selected model
138
  llm = HuggingFaceInferenceAPI(
139
  model_name=selected_llm_model_name,
140
+ contextWindow=8192,
141
+ maxTokens=1024,
142
+ temperature=0.3,
143
+ topP=0.9,
144
+ frequencyPenalty=0.5,
145
+ presencePenalty=0.5,
146
  token=os.getenv("TOKEN")
147
  )
148
 
 
151
  bot_message = query_engine.query(message)
152
 
153
  print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
154
+ return f"**{selected_llm_model_name}:**\n\n{str(bot_message)}"
155
  except Exception as e:
156
+ return f"❌ An error occurred: {str(e)}"
 
 
157
 
158
+ def encode_image_safe(image_path):
159
+ """Safely encode image, return empty string if file doesn't exist"""
160
+ try:
161
+ if os.path.exists(image_path):
162
+ with open(image_path, "rb") as image_file:
163
+ return base64.b64encode(image_file.read()).decode('utf-8')
164
+ except Exception:
165
+ pass
166
+ return ""
167
 
168
+ # Encode the images (with fallback for missing images)
169
+ github_logo_encoded = encode_image_safe("Images/github-logo.png")
170
+ linkedin_logo_encoded = encode_image_safe("Images/linkedin-logo.png")
171
+ website_logo_encoded = encode_image_safe("Images/ai-logo.png")
172
 
173
  # UI Setup
174
+ with gr.Blocks(
175
+ theme=gr.themes.Soft(),
176
+ css='footer {visibility: hidden}',
177
+ title="DocBot - Document Analysis Assistant"
178
+ ) as demo:
179
+
180
+ gr.Markdown("# DocBot πŸ“„πŸ€–")
181
+ gr.Markdown("*Intelligent Document Analysis Assistant*")
182
+
183
  with gr.Tabs():
184
+ with gr.TabItem("πŸ“– Introduction"):
185
+ gr.Markdown(description)
186
 
187
+ with gr.TabItem("πŸ€– DocBot"):
188
+ with gr.Accordion("πŸ“‹ Quick Start Guide", open=False):
189
+ gr.Markdown(guide)
190
+
191
  with gr.Row():
192
  with gr.Column(scale=1):
193
+ with gr.Group():
194
+ gr.Markdown("### Document Processing")
195
+ file_input = gr.File(
196
+ file_count="single",
197
+ type='filepath',
198
+ label="Step 1: Upload Document",
199
+ file_types=['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']
200
+ )
201
+
202
+ embed_model_dropdown = gr.Dropdown(
203
+ choices=embed_models,
204
+ label="Step 2: Select Embedding Model",
205
+ interactive=True,
206
+ value=embed_models[0]
207
+ )
208
+
209
+ with gr.Row():
210
+ btn = gr.Button("πŸš€ Process Document", variant='primary', size="lg")
211
+ clear = gr.ClearButton("πŸ—‘οΈ Clear", size="lg")
212
+
213
+ output = gr.Textbox(
214
+ label='Processing Status',
215
+ interactive=False,
216
+ placeholder="Upload a document and click 'Process Document' to begin..."
217
+ )
218
+
219
+ with gr.Group():
220
+ gr.Markdown("### Model Selection")
221
+ llm_model_dropdown = gr.Dropdown(
222
+ choices=llm_models,
223
+ label="Step 3: Select Language Model",
224
+ interactive=True,
225
+ value=llm_models[0]
226
+ )
227
+ llm_status = gr.Textbox(
228
+ label="Selected Model",
229
+ interactive=False,
230
+ value=f"LLM set to: {llm_models[0]}"
231
+ )
232
+
233
+ with gr.Column(scale=2):
234
+ gr.Markdown("### Chat with Your Document")
235
+ chatbot = gr.Chatbot(
236
+ height=600,
237
+ placeholder="Process a document first, then start asking questions!",
238
+ show_label=False
239
+ )
240
+
241
+ msg = gr.Textbox(
242
+ placeholder="Step 4: Ask questions about your document...",
243
+ container=False,
244
+ scale=7
245
  )
246
+
247
+ with gr.Row():
248
+ submit_btn = gr.Button("Send", variant="primary")
249
+ clear_chat = gr.ClearButton([msg, chatbot], value="Clear Chat")
250
+
251
+ # Add footer if images exist
252
+ if any([github_logo_encoded, linkedin_logo_encoded, website_logo_encoded]):
253
+ gr.HTML(footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
254
+
255
+ # Set up event handlers
256
+ def chat_respond(message, history):
257
+ if not message.strip():
258
+ return history, ""
259
+
260
+ response = respond(message, history)
261
+ history.append([message, response])
262
+ return history, ""
263
+
264
+ # Event bindings
265
+ llm_model_dropdown.change(
266
+ fn=set_llm_model,
267
+ inputs=[llm_model_dropdown],
268
+ outputs=[llm_status]
269
+ )
270
+
271
+ btn.click(
272
+ fn=load_files,
273
+ inputs=[file_input, embed_model_dropdown],
274
+ outputs=[output]
275
+ )
276
+
277
+ submit_btn.click(
278
+ fn=chat_respond,
279
+ inputs=[msg, chatbot],
280
+ outputs=[chatbot, msg]
281
+ )
282
+
283
+ msg.submit(
284
+ fn=chat_respond,
285
+ inputs=[msg, chatbot],
286
+ outputs=[chatbot, msg]
287
+ )
288
+
289
+ clear.click(
290
+ lambda: [None, None, ""],
291
+ outputs=[file_input, embed_model_dropdown, output]
292
+ )
293
 
294
+ # Launch the demo
295
  if __name__ == "__main__":
296
+ demo.launch(
297
+ share=True,
298
+ server_name="0.0.0.0",
299
+ server_port=7860,
300
+ show_error=True
301
+ )