File size: 6,243 Bytes
fc74eab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fc434e
fc74eab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fc434e
fc74eab
6fc434e
fc74eab
 
6fc434e
 
fc74eab
 
 
763ebc4
fc74eab
763ebc4
fc74eab
 
 
 
763ebc4
fc74eab
 
 
 
 
 
 
6fc434e
fc74eab
763ebc4
fc74eab
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
from datetime import datetime
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_parse import LlamaParse
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import os
from dotenv import load_dotenv
import gradio as gr
import markdowm as md
import base64

# Load environment variables
load_dotenv()

llm_models = [
    "mistralai/Mixtral-8x7B-Instruct-v0.1",
    "meta-llama/Meta-Llama-3-8B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.2",
    "tiiuae/falcon-7b-instruct",
]

embed_models = [
    "BAAI/bge-small-en-v1.5",  # 33.4M
    "NeuML/pubmedbert-base-embeddings",
    "BAAI/llm-embedder", # 109M
    "BAAI/bge-large-en" # 335M
]

# Global variable for selected model
selected_llm_model_name = llm_models[0]  # Default to the first model in the list
selected_embed_model_name = embed_models[0] # Default to the first model in the list
vector_index = None

# Initialize the parser
parser = LlamaParse(api_key=os.getenv("LLAMA_INDEX_API"), result_type='markdown')
# Define file extractor with various common extensions
file_extractor = {
    '.pdf': parser,  # PDF documents
    '.docx': parser,  # Microsoft Word documents
    '.doc': parser,  # Older Microsoft Word documents
    '.txt': parser,  # Plain text files
    '.csv': parser,  # Comma-separated values files
    '.xlsx': parser,  # Microsoft Excel files (requires additional processing for tables)
    '.pptx': parser,  # Microsoft PowerPoint files (for slides)
    '.html': parser,  # HTML files (web pages)

    # Image files for OCR processing
    '.jpg': parser,  # JPEG images
    '.jpeg': parser,  # JPEG images
    '.png': parser,  # PNG images


    # Scanned documents in image formats
    '.webp': parser,  # WebP images
    '.svg': parser,  # SVG files (vector format, may contain embedded text)
}


# File processing function
def load_files(file_path: str, embed_model_name: str):
    try:
        global vector_index
        document = SimpleDirectoryReader(input_files=[file_path], file_extractor=file_extractor).load_data()
        embed_model = HuggingFaceEmbedding(model_name=embed_model_name)
        vector_index = VectorStoreIndex.from_documents(document, embed_model=embed_model)
        print(f"Parsing done for {file_path}")
        filename = os.path.basename(file_path)
        return f"Ready to give response on {filename}"
    except Exception as e:
        return f"An error occurred: {e}"


# Function to handle the selected model from dropdown
def set_llm_model(selected_model):
    global selected_llm_model_name
    selected_llm_model_name = selected_model  # Update the global variable
    # print(f"Model selected: {selected_model_name}")
    # return f"Model set to: {selected_model_name}"


# Respond function that uses the globally set selected model
def respond(message, history):
    try:
        # Initialize the LLM with the selected model
        llm = HuggingFaceInferenceAPI(
            model_name=selected_llm_model_name,
            contextWindow=8192,  # Context window size (typically max length of the model)
            maxTokens=1024,  # Tokens per response generation (512-1024 works well for detailed answers)
            temperature=0.3,  # Lower temperature for more focused answers (0.2-0.4 for factual info)
            topP=0.9,  # Top-p sampling to control diversity while retaining quality
            frequencyPenalty=0.5,  # Slight penalty to avoid repetition
            presencePenalty=0.5,  # Encourages exploration without digressing too much
            token=os.getenv("TOKEN")
        )

        # Set up the query engine with the selected LLM
        query_engine = vector_index.as_query_engine(llm=llm)
        bot_message = query_engine.query(message)

        print(f"\n{datetime.now()}:{selected_llm_model_name}:: {message} --> {str(bot_message)}\n")
        return f"{selected_llm_model_name}:\n{str(bot_message)}"
    except Exception as e:
        if str(e) == "'NoneType' object has no attribute 'as_query_engine'":
            return "Please upload a file."
        return f"An error occurred: {e}"

def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# UI Setup
with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto Mono")]), css='footer {visibility: hidden}') as demo:
    gr.Markdown("# HundAI QA📄")
    with gr.Tabs():
        with gr.TabItem("Introduction"):
            gr.Markdown(md.description)

        with gr.TabItem("Chatbot"):
            with gr.Accordion("IMPORTANT: READ ME FIRST", open=False):
                guid = gr.Markdown(md.guide)
            with gr.Row():
                with gr.Column(scale=1):
                    file_input = gr.File(file_count="single", type='filepath', label="Upload document")
                    # gr.Markdown("Dont know what to select check out in Intro tab")
                    embed_model_dropdown = gr.Dropdown(embed_models, label="Select Embedding", interactive=True)
                    with gr.Row():
                        btn = gr.Button("Submit", variant='primary')
                        clear = gr.ClearButton()
                    output = gr.Text(label='Vector Index')
                    llm_model_dropdown = gr.Dropdown(llm_models, label="Select LLM", interactive=True)
                with gr.Column(scale=3):
                    gr.ChatInterface(
                        fn=respond,
                        chatbot=gr.Chatbot(height=500),
                        theme = "soft",
                        show_progress='full',
                        # cache_mode='lazy',
                        textbox=gr.Textbox(placeholder="Ask me any questions on the uploaded document!", container=False)
                    )
    
    # Set up Gradio interactions
    llm_model_dropdown.change(fn=set_llm_model, inputs=llm_model_dropdown)
    btn.click(fn=load_files, inputs=[file_input, embed_model_dropdown], outputs=output)
    clear.click(lambda: [None] * 3, outputs=[file_input, embed_model_dropdown, output])

# Launch the demo with a public link option
if __name__ == "__main__":
    demo.launch()