aidevhund commited on
Commit
6fc434e
·
verified ·
1 Parent(s): fc74eab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -35
app.py CHANGED
@@ -17,24 +17,6 @@ llm_models = [
17
  "meta-llama/Meta-Llama-3-8B-Instruct",
18
  "mistralai/Mistral-7B-Instruct-v0.2",
19
  "tiiuae/falcon-7b-instruct",
20
- # "mistralai/Mixtral-8x22B-Instruct-v0.1", ## 281GB>10GB
21
- # "NousResearch/Yarn-Mistral-7b-64k", ## 14GB>10GB
22
- # "impira/layoutlm-document-qa", ## ERR
23
- # "Qwen/Qwen1.5-7B", ## 15GB
24
- # "Qwen/Qwen2.5-3B", ## high response time
25
- # "google/gemma-2-2b-jpn-it", ## high response time
26
- # "impira/layoutlm-invoices", ## bad req
27
- # "google/pix2struct-docvqa-large", ## bad req
28
- # "google/gemma-7b-it", ## 17GB > 10GB
29
- # "google/gemma-2b-it", ## high response time
30
- # "HuggingFaceH4/zephyr-7b-beta", ## high response time
31
- # "HuggingFaceH4/zephyr-7b-gemma-v0.1", ## bad req
32
- # "microsoft/phi-2", ## high response time
33
- # "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ## high response time
34
- # "mosaicml/mpt-7b-instruct", ## 13GB>10GB
35
- # "google/flan-t5-xxl" ## high respons time
36
- # "NousResearch/Yarn-Mistral-7b-128k", ## 14GB>10GB
37
- # "Qwen/Qwen2.5-7B-Instruct", ## 15GB>10GB
38
  ]
39
 
40
  embed_models = [
@@ -61,18 +43,12 @@ file_extractor = {
61
  '.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
62
  '.pptx': parser, # Microsoft PowerPoint files (for slides)
63
  '.html': parser, # HTML files (web pages)
64
- # '.rtf': parser, # Rich Text Format files
65
- # '.odt': parser, # OpenDocument Text files
66
- # '.epub': parser, # ePub files (e-books)
67
 
68
  # Image files for OCR processing
69
  '.jpg': parser, # JPEG images
70
  '.jpeg': parser, # JPEG images
71
  '.png': parser, # PNG images
72
- # '.bmp': parser, # Bitmap images
73
- # '.tiff': parser, # TIFF images
74
- # '.tif': parser, # TIFF images (alternative extension)
75
- # '.gif': parser, # GIF images (can contain text)
76
 
77
  # Scanned documents in image formats
78
  '.webp': parser, # WebP images
@@ -132,20 +108,15 @@ def encode_image(image_path):
132
  with open(image_path, "rb") as image_file:
133
  return base64.b64encode(image_file.read()).decode('utf-8')
134
 
135
- # Encode the images
136
- github_logo_encoded = encode_image("Images/github-logo.png")
137
- linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
138
- website_logo_encoded = encode_image("Images/ai-logo.png")
139
-
140
  # UI Setup
141
  with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
142
- gr.Markdown("# DocBot📄🤖")
143
  with gr.Tabs():
144
- with gr.TabItem("Intro"):
145
  gr.Markdown(md.description)
146
 
147
- with gr.TabItem("DocBot"):
148
- with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
149
  guid = gr.Markdown(md.guide)
150
  with gr.Row():
151
  with gr.Column(scale=1):
@@ -164,7 +135,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]),
164
  theme = "soft",
165
  show_progress='full',
166
  # cache_mode='lazy',
167
- textbox=gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
168
  )
169
  gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
170
  # Set up Gradio interactions
 
17
  "meta-llama/Meta-Llama-3-8B-Instruct",
18
  "mistralai/Mistral-7B-Instruct-v0.2",
19
  "tiiuae/falcon-7b-instruct",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  ]
21
 
22
  embed_models = [
 
43
  '.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
44
  '.pptx': parser, # Microsoft PowerPoint files (for slides)
45
  '.html': parser, # HTML files (web pages)
 
 
 
46
 
47
  # Image files for OCR processing
48
  '.jpg': parser, # JPEG images
49
  '.jpeg': parser, # JPEG images
50
  '.png': parser, # PNG images
51
+
 
 
 
52
 
53
  # Scanned documents in image formats
54
  '.webp': parser, # WebP images
 
108
  with open(image_path, "rb") as image_file:
109
  return base64.b64encode(image_file.read()).decode('utf-8')
110
 
 
 
 
 
 
111
  # UI Setup
112
  with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
113
+ gr.Markdown("# HundAI QA📄")
114
  with gr.Tabs():
115
+ with gr.TabItem("Introduction"):
116
  gr.Markdown(md.description)
117
 
118
+ with gr.TabItem("Chatbot"):
119
+ with gr.Accordion("IMPORTANT: READ ME FIRST", open=False):
120
  guid = gr.Markdown(md.guide)
121
  with gr.Row():
122
  with gr.Column(scale=1):
 
135
  theme = "soft",
136
  show_progress='full',
137
  # cache_mode='lazy',
138
+ textbox=gr.Textbox(placeholder="Ask me any questions on the uploaded document!", container=False)
139
  )
140
  gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
141
  # Set up Gradio interactions