Sarath0x8f commited on
Commit
7463824
·
verified ·
1 Parent(s): 930f0f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -110
app.py CHANGED
@@ -6,132 +6,73 @@ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
9
- import markdowm as md
10
- import base64
11
 
12
  # Load environment variables
13
  load_dotenv()
14
 
15
- llm_models = [
16
- "mistralai/Mixtral-8x7B-Instruct-v0.1",
17
- "meta-llama/Meta-Llama-3-8B-Instruct",
18
- "mistralai/Mistral-7B-Instruct-v0.2",
19
- "tiiuae/falcon-7b-instruct",
20
- ]
21
 
22
- embed_models = [
23
- "BAAI/bge-small-en-v1.5",
24
- "NeuML/pubmedbert-base-embeddings",
25
- "BAAI/llm-embedder",
26
- "BAAI/bge-large-en"
27
- ]
28
 
29
- # Global state
30
- selected_llm_model_name = llm_models[0]
31
- selected_embed_model_name = embed_models[0]
32
- vector_index = None
33
 
34
- # Parser setup
35
- parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
36
- file_extractor = {ext: parser for ext in ['.pdf', '.docx', '.doc', '.txt', '.csv', '.xlsx', '.pptx', '.html', '.jpg', '.jpeg', '.png', '.webp', '.svg']}
37
 
38
- def load_files(file_path: str, embed_model_name: str):
39
- global vector_index
40
  try:
 
41
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
42
- embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
43
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
 
44
  filename = os.path.basename(file_path)
45
- return f"Ready to give response on {filename}"
46
  except Exception as e:
47
- return f"An error occurred: {e}"
48
-
49
- def set_llm_model(selected_model):
50
- global selected_llm_model_name
51
- selected_llm_model_name = selected_model
52
- return f"Model set to: {selected_model}"
53
 
54
  def respond(message, history):
55
  try:
56
- llm = HuggingFaceInferenceAPI(
57
- model_name=selected_llm_model_name,
58
- contextWindow=8192,
59
- maxTokens=1024,
60
- temperature=0.3,
61
- topP=0.9,
62
- frequencyPenalty=0.5,
63
- presencePenalty=0.5,
64
- token=os.getenv("TOKEN")
65
- )
66
- if vector_index is not None:
67
- query_engine = vector_index.as_query_engine(llm=llm)
68
- bot_message = str(query_engine.query(message))
69
- history.append((message, bot_message))
70
- print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {bot_message}\n")
71
- return bot_message, history
72
- else:
73
- return "Please upload a file first.", history
74
  except Exception as e:
75
- return f"An error occurred: {e}", history
76
-
77
- def encode_image(image_path):
78
- with open(image_path, "rb") as image_file:
79
- return base64.b64encode(image_file.read()).decode('utf-8')
80
-
81
- # Encoded logos
82
- github_logo_encoded = encode_image("Images/github-logo.png")
83
- linkedin_logo_encoded = encode_image("Images/linkedin-logo.png")
84
- website_logo_encoded = encode_image("Images/ai-logo.png")
85
-
86
- # Markdown placeholders
87
- description = "### Welcome to **DocBot** - Ask Questions Based on Your Uploaded Documents"
88
- guide = "> Step 1: Upload\n> Step 2: Select Embedding\n> Step 3: Select LLM\n> Step 4: Ask Questions"
89
- footer = """
90
- <center>
91
- <a href="https://github.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>&nbsp;
92
- <a href="https://linkedin.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>&nbsp;
93
- <a href="https://yourwebsite.com" target="_blank"><img src="data:image/png;base64,{}" height="30"/></a>
94
- </center>
95
- """.format(github_logo_encoded, linkedin_logo_encoded, website_logo_encoded)
96
-
97
- # Gradio UI
98
- with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
99
- gr.Markdown("# DocBot")
100
- with gr.Tabs():
101
- with gr.TabItem("Intro"):
102
- gr.Markdown(description)
103
-
104
- with gr.TabItem("DocBot"):
105
- with gr.Accordion("=== IMPORTANT: READ ME FIRST ===", open=False):
106
- gr.Markdown(guide)
107
-
108
- with gr.Row():
109
- with gr.Column(scale=1):
110
- file_input = gr.File(file_count="single", type='filepath', label="Step-1: Upload document")
111
- embed_model_dropdown = gr.Dropdown(embed_models, label="Step-2: Select Embedding", interactive=True)
112
- with gr.Row():
113
- btn = gr.Button("Submit", variant='primary')
114
- clear = gr.ClearButton()
115
- output = gr.Text(label='Vector Index')
116
- llm_model_dropdown = gr.Dropdown(llm_models, label="Step-3: Select LLM", interactive=True)
117
- model_selected_output = gr.Text(label="Model selected")
118
-
119
- with gr.Column(scale=3):
120
- chatbot_ui = gr.Chatbot(height=500)
121
- message = gr.Textbox(placeholder="Step-4: Ask me questions on the uploaded document!", container=False)
122
- submit_btn = gr.Button("Send")
123
-
124
- # Bind logic
125
- llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown, outputs=model_selected_output)
126
- btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
127
- clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
128
-
129
- # Chat logic
130
- state = gr.State([])
131
- submit_btn.click(fn=respond, inputs=[message, state], outputs=[chatbot_ui, state])
132
- message.submit(fn=respond, inputs=[message, state], outputs=[chatbot_ui, state])
133
-
134
- gr.HTML(footer)
135
-
136
  if __name__ == "__main__":
137
  demo.launch(share=True)
 
6
  import os
7
  from dotenv import load_dotenv
8
  import gradio as gr
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
+ # Initialize the LLM and parser
14
+ llm = HuggingFaceInferenceAPI(
15
+ model_name="meta-llama/Meta-Llama-3-8B-Instruct",
16
+ token=os.getenv("TOKEN")
17
+ )
 
18
 
19
+ parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
20
+ file_extractor = {'.pdf': parser, '.docx': parser, '.doc': parser}
 
 
 
 
21
 
22
+ # Embedding model and index initialization (to be populated by uploaded files)
23
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
 
 
24
 
25
+ # Global variable to store documents loaded from user-uploaded files
26
+ vector_index = None
 
27
 
28
+ # File processing function
29
+ def load_files(file_path: str):
30
  try:
31
+ global vector_index
32
  document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
 
33
  vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
34
+ print(f"parsing done {file_path}")
35
  filename = os.path.basename(file_path)
36
+ return f"Ready to give response on give {filename}"
37
  except Exception as e:
38
+ return f"An error occurred {e}"
 
 
 
 
 
39
 
40
  def respond(message, history):
41
  try:
42
+ query_engine = vector_index.as_query_engine(llm=llm)
43
+ bot_message = query_engine.query(message)
44
+ # output = ""
45
+ # for chr in bot_message:
46
+ # output += chr
47
+ # yield output
48
+ print(f"{datetime.now()}::message=>{str(bot_message)}")
49
+ return str(bot_message)
 
 
 
 
 
 
 
 
 
 
50
  except Exception as e:
51
+ if e == "'NoneType' object has no attribute 'as_query_engine'":
52
+ return "upload file"
53
+ return f"an error occurred {e}"
54
+
55
+ # UI Setup
56
+ with gr.Blocks() as demo:
57
+ with gr.Row():
58
+ with gr.Column(scale=1):
59
+ file_input = gr.File(file_count="single", type='filepath')
60
+ with gr.Column():
61
+ clear = gr.ClearButton()
62
+ btn = gr.Button("Submit", variant='primary')
63
+ output = gr.Text(label='Vector Index')
64
+ with gr.Column(scale=2):
65
+ gr.ChatInterface(fn=respond,
66
+ chatbot=gr.Chatbot(height=500),
67
+ textbox=gr.Textbox(placeholder="Ask me a yes or no question", container=False, scale=7),
68
+ examples=["summarize the document"]
69
+ )
70
+
71
+ # Action on button click to process file and load into index
72
+ btn.click(fn=load_files, inputs=file_input, outputs=output)
73
+ clear.click(lambda: [None]*2, outputs=[file_input, output])
74
+
75
+
76
+ # Launch the demo with public link option
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if __name__ == "__main__":
78
  demo.launch(share=True)