Update app.py
Browse files
app.py
CHANGED
@@ -17,24 +17,6 @@ llm_models = [
|
|
17 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
18 |
"mistralai/Mistral-7B-Instruct-v0.2",
|
19 |
"tiiuae/falcon-7b-instruct",
|
20 |
-
# "mistralai/Mixtral-8x22B-Instruct-v0.1", ## 281GB>10GB
|
21 |
-
# "NousResearch/Yarn-Mistral-7b-64k", ## 14GB>10GB
|
22 |
-
# "impira/layoutlm-document-qa", ## ERR
|
23 |
-
# "Qwen/Qwen1.5-7B", ## 15GB
|
24 |
-
# "Qwen/Qwen2.5-3B", ## high response time
|
25 |
-
# "google/gemma-2-2b-jpn-it", ## high response time
|
26 |
-
# "impira/layoutlm-invoices", ## bad req
|
27 |
-
# "google/pix2struct-docvqa-large", ## bad req
|
28 |
-
# "google/gemma-7b-it", ## 17GB > 10GB
|
29 |
-
# "google/gemma-2b-it", ## high response time
|
30 |
-
# "HuggingFaceH4/zephyr-7b-beta", ## high response time
|
31 |
-
# "HuggingFaceH4/zephyr-7b-gemma-v0.1", ## bad req
|
32 |
-
# "microsoft/phi-2", ## high response time
|
33 |
-
# "TinyLlama/TinyLlama-1.1B-Chat-v1.0", ## high response time
|
34 |
-
# "mosaicml/mpt-7b-instruct", ## 13GB>10GB
|
35 |
-
# "google/flan-t5-xxl" ## high respons time
|
36 |
-
# "NousResearch/Yarn-Mistral-7b-128k", ## 14GB>10GB
|
37 |
-
# "Qwen/Qwen2.5-7B-Instruct", ## 15GB>10GB
|
38 |
]
|
39 |
|
40 |
embed_models = [
|
@@ -61,18 +43,12 @@ file_extractor = {
|
|
61 |
'.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
|
62 |
'.pptx': parser, # Microsoft PowerPoint files (for slides)
|
63 |
'.html': parser, # HTML files (web pages)
|
64 |
-
# '.rtf': parser, # Rich Text Format files
|
65 |
-
# '.odt': parser, # OpenDocument Text files
|
66 |
-
# '.epub': parser, # ePub files (e-books)
|
67 |
|
68 |
# Image files for OCR processing
|
69 |
'.jpg': parser, # JPEG images
|
70 |
'.jpeg': parser, # JPEG images
|
71 |
'.png': parser, # PNG images
|
72 |
-
|
73 |
-
# '.tiff': parser, # TIFF images
|
74 |
-
# '.tif': parser, # TIFF images (alternative extension)
|
75 |
-
# '.gif': parser, # GIF images (can contain text)
|
76 |
|
77 |
# Scanned documents in image formats
|
78 |
'.webp': parser, # WebP images
|
@@ -132,20 +108,15 @@ def encode_image(image_path):
|
|
132 |
with open(image_path, "rb") as image_file:
|
133 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
134 |
|
135 |
-
# Encode the images
|
136 |
-
github_logo_encoded = encode_image("Images/github-logo.png")
|
137 |
-
linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
|
138 |
-
website_logo_encoded = encode_image("Images/ai-logo.png")
|
139 |
-
|
140 |
# UI Setup
|
141 |
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
|
142 |
-
gr.Markdown("#
|
143 |
with gr.Tabs():
|
144 |
-
with gr.TabItem("
|
145 |
gr.Markdown(md.description)
|
146 |
|
147 |
-
with gr.TabItem("
|
148 |
-
with gr.Accordion("
|
149 |
guid = gr.Markdown(md.guide)
|
150 |
with gr.Row():
|
151 |
with gr.Column(scale=1):
|
@@ -164,7 +135,7 @@ with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]),
|
|
164 |
theme = "soft",
|
165 |
show_progress='full',
|
166 |
# cache_mode='lazy',
|
167 |
-
textbox=gr.Textbox(placeholder="
|
168 |
)
|
169 |
gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
|
170 |
# Set up Gradio interactions
|
|
|
17 |
"meta-llama/Meta-Llama-3-8B-Instruct",
|
18 |
"mistralai/Mistral-7B-Instruct-v0.2",
|
19 |
"tiiuae/falcon-7b-instruct",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
]
|
21 |
|
22 |
embed_models = [
|
|
|
43 |
'.xlsx': parser, # Microsoft Excel files (requires additional processing for tables)
|
44 |
'.pptx': parser, # Microsoft PowerPoint files (for slides)
|
45 |
'.html': parser, # HTML files (web pages)
|
|
|
|
|
|
|
46 |
|
47 |
# Image files for OCR processing
|
48 |
'.jpg': parser, # JPEG images
|
49 |
'.jpeg': parser, # JPEG images
|
50 |
'.png': parser, # PNG images
|
51 |
+
|
|
|
|
|
|
|
52 |
|
53 |
# Scanned documents in image formats
|
54 |
'.webp': parser, # WebP images
|
|
|
108 |
with open(image_path, "rb") as image_file:
|
109 |
return base64.b64encode(image_file.read()).decode('utf-8')
|
110 |
|
|
|
|
|
|
|
|
|
|
|
111 |
# UI Setup
|
112 |
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
|
113 |
+
gr.Markdown("# HundAI QA📄")
|
114 |
with gr.Tabs():
|
115 |
+
with gr.TabItem("Introduction"):
|
116 |
gr.Markdown(md.description)
|
117 |
|
118 |
+
with gr.TabItem("Chatbot"):
|
119 |
+
with gr.Accordion("IMPORTANT: READ ME FIRST", open=False):
|
120 |
guid = gr.Markdown(md.guide)
|
121 |
with gr.Row():
|
122 |
with gr.Column(scale=1):
|
|
|
135 |
theme = "soft",
|
136 |
show_progress='full',
|
137 |
# cache_mode='lazy',
|
138 |
+
textbox=gr.Textbox(placeholder="Ask me any questions on the uploaded document!", container=False)
|
139 |
)
|
140 |
gr.HTML(md.footer.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded))
|
141 |
# Set up Gradio interactions
|