File size: 4,267 Bytes
f20c3fa 1958913 fc74eab e5a36e7 fc74eab 1958913 fc74eab 1958913 fc74eab 1958913 fc74eab 1958913 fc74eab 1958913 fc74eab 0da7d1e fc74eab 6fc434e fc74eab 6fc434e fc74eab 763ebc4 fc74eab 1958913 fc74eab 6fc434e fc74eab 763ebc4 fc74eab 1958913 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import markdowm as md
import base64
# Load environment variables
load_dotenv()
# Mapping for display names and actual model names
llm_display_names = {
"tiiuae/falcon-7b-instruct": "HundAI",
"mistralai/Mixtral-8x7B-Instruct-v0.1": "Mixtral-8x7B",
"meta-llama/Meta-Llama-3-8B-Instruct": "Meta-Llama-3",
"mistralai/Mistral-7B-Instruct-v0.2": "Mistral-7B",
}
embed_models = [
"BAAI/bge-small-en-v1.5", # 33.4M
"NeuML/pubmedbert-base-embeddings",
"BAAI/llm-embedder", # 109M
"BAAI/bge-large-en" # 335M
]
# Reverse mapping to retrieve original names
llm_reverse_mapping = {v: k for k, v in llm_display_names.items()}
# Update UI to use display names
def set_llm_model(display_name):
global selected_llm_model_name
# Retrieve the original model name using the reverse mapping
selected_llm_model_name = llm_reverse_mapping.get(display_name, display_name)
print(f"Model selected: {selected_llm_model_name}")
# Respond function remains unchanged
def respond(message, history):
try:
# Initialize the LLM with the selected model
llm = HuggingFaceInferenceAPI(
model_name=selected_llm_model_name, # Use the backend model name
contextWindow=8192,
maxTokens=1024,
temperature=0.3,
topP=0.9,
frequencyPenalty=0.5,
presencePenalty=0.5,
token=os.getenv("TOKEN")
)
# Set up the query engine with the selected LLM
query_engine = vector_index.as_query_engine(llm=llm)
bot_message = query_engine.query(message)
print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
return f"{selected_llm_model_name}:\n{str(bot_message)}"
except Exception as e:
if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
return "Please upload a file."
return f"An error occurred: {e}"
# UI Setup
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
gr.Markdown("")
with gr.Tabs():
with gr.TabItem("Introduction"):
gr.Markdown(md.description)
with gr.TabItem("Chatbot"):
with gr.Accordion("IMPORTANT: READ ME FIRST", open=False):
guid = gr.Markdown(md.guide)
with gr.Row():
with gr.Column(scale=1):
file_input = gr.File(file_count="single", type='filepath', label="Upload document")
embed_model_dropdown = gr.Dropdown(embed_models, label="Select Embedding", interactive=True)
with gr.Row():
btn = gr.Button("Submit", variant='primary')
clear = gr.ClearButton()
output = gr.Text(label='Vector Index')
# Use display names for LLM dropdown
llm_model_dropdown = gr.Dropdown(
list(llm_display_names.values()), # Display names
label="Select LLM",
interactive=True
)
with gr.Column(scale=3):
gr.ChatInterface(
fn=respond,
chatbot=gr.Chatbot(height=500),
show_progress='full',
textbox=gr.Textbox(placeholder="Ask me any questions on the uploaded document!", container=False)
)
# Set up Gradio interactions
llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])
# Launch the demo with a public link option
if __name__ == "__main__":
demo.launch()
|