File size: 11,889 Bytes
f0df96e
 
 
 
 
 
 
e71e68a
 
f0df96e
 
 
 
 
 
 
 
 
 
46d0a50
e71e68a
 
 
 
46d0a50
e71e68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbab016
 
 
 
 
 
 
 
 
e71e68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbab016
e71e68a
 
 
 
 
cbab016
e71e68a
 
 
 
 
 
 
 
 
 
 
 
cbab016
e71e68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbab016
e71e68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import gradio as gr # used for UI dev
import os # Built-in model to get/use the token for running huggingface source model which requires token to run
from typing import List, Dict # Built-in model 
from langchain.text_splitter import ( # Text splitting strategies 
    RecursiveCharacterTextSplitter,#Text splitting strategies  
    CharacterTextSplitter,#Text splitting strategies 
    TokenTextSplitter#Text splitting strategies 
)

from langchain_community.vectorstores import FAISS, Chroma, Qdrant # Vector database
from langchain_community.document_loaders import PyPDFLoader # Convert PDF to TEXT
from langchain.chains import ConversationalRetrievalChain # Entire retrival chain for conversation
from langchain_community.embeddings import HuggingFaceEmbeddings # Words to no
from langchain_huggingface import HuggingFaceEndpoint # API for generative model
from langchain.memory import ConversationBufferMemory # Chat History

list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"] # list of model
list_llm_simple = [os.path.basename(llm) for llm in list_llm] # display purpose
api_token = os.getenv("HF_TOKEN") # getting token
# Defining Chunk sizes
CHUNK_SIZES = {
    "small": {"recursive": 512, "fixed": 512, "token": 256},
    "medium": {"recursive": 1024, "fixed": 1024, "token": 512}
}
# passing Strategy , Chunk size , overlap
def get_text_splitter(strategy: str, chunk_size: int = 1024, chunk_overlap: int = 64):
    splitters = {
        "recursive": RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        ),
        "fixed": CharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        ),
        "token": TokenTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap
        )
    }
    return splitters.get(strategy)

# def get_text_splitter(strategy, chunk_size=1024, chunk_overlap=64):
#     if strategy == "recursive":
#         return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
#     elif strategy == "fixed":
#         return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
#     elif strategy == "token":
#         return TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
#     return None

def load_doc(list_file_path: List[str], splitting_strategy: str, chunk_size: str):
    chunk_size_value = CHUNK_SIZES[chunk_size][splitting_strategy]
    loaders = [PyPDFLoader(x) for x in list_file_path]
    pages = []
    for loader in loaders:
        pages.extend(loader.load())
    
    text_splitter = get_text_splitter(splitting_strategy, chunk_size_value)
    doc_splits = text_splitter.split_documents(pages)
    return doc_splits

def create_db(splits, db_choice: str = "faiss"):
    embeddings = HuggingFaceEmbeddings()
    db_creators = {
        "faiss": lambda: FAISS.from_documents(splits, embeddings),
        "chroma": lambda: Chroma.from_documents(splits, embeddings),
        "qdrant": lambda: Qdrant.from_documents(
            splits,
            embeddings,
            location=":memory:",   # In memory database for qdrant 
            collection_name="pdf_docs"
        )
    }
    return db_creators[db_choice]()

# Initialize Vector DB
def initialize_database(list_file_obj, splitting_strategy, chunk_size, db_choice, progress=gr.Progress()):
    """Initialize vector database with error handling"""
    try:
        if not list_file_obj:
            return None, "No files uploaded. Please upload PDF documents first."
            
        list_file_path = [x.name for x in list_file_obj if x is not None]
        if not list_file_path:
            return None, "No valid files found. Please upload PDF documents."
            
        doc_splits = load_doc(list_file_path, splitting_strategy, chunk_size)
        if not doc_splits:
            return None, "No content extracted from documents." 
            
        vector_db = create_db(doc_splits, db_choice)
        return vector_db, f"Database created successfully using {splitting_strategy} splitting and {db_choice} vector database!"
        
    except Exception as e:
        return None, f"Error creating database: {str(e)}"

def initialize_llmchain(llm_choice, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
    """Initialize LLM chain with error handling"""
    try:
        if vector_db is None:
            return None, "Please create vector database first."
            
        llm_model = list_llm[llm_choice]
        
        llm = HuggingFaceEndpoint(
            repo_id=llm_model,
            huggingfacehub_api_token=api_token,
            temperature=temperature,
            max_new_tokens=max_tokens,
            top_k=top_k
        )
        # Temporary memory
        memory = ConversationBufferMemory(
            memory_key="chat_history",
            output_key='answer',
            return_messages=True
        )

        retriever = vector_db.as_retriever()
        qa_chain = ConversationalRetrievalChain.from_llm(
            llm,
            retriever=retriever,
            memory=memory,
            return_source_documents=True
        )
        return qa_chain, "LLM initialized successfully!"
        
    except Exception as e:
        return None, f"Error initializing LLM: {str(e)}"

def conversation(qa_chain, message, history):
    """Conversation function returning all required outputs"""
    response = qa_chain.invoke({
        "question": message,
        "chat_history": [(hist[0], hist[1]) for hist in history]
    })
    
    response_answer = response["answer"]
    if "Helpful Answer:" in response_answer:
        response_answer = response_answer.split("Helpful Answer:")[-1]
    
    sources = response["source_documents"][:3]
    source_contents = []
    source_pages = []
    
    for source in sources:
        source_contents.append(source.page_content.strip())
        source_pages.append(source.metadata.get("page", 0) + 1)
    
    while len(source_contents) < 3:
        source_contents.append("")
        source_pages.append(0)
    
    return (
        qa_chain,  
        gr.update(value=""), 
        history + [(message, response_answer)], 
        source_contents[0],  
        source_pages[0],   
        source_contents[1],  
        source_pages[1],    
        source_contents[2], 
        source_pages[2]     
    )

def demo():
    with gr.Blocks(theme=gr.themes.Default(primary_hue="red", secondary_hue="pink", neutral_hue="sky")) as demo:
        vector_db = gr.State()
        qa_chain = gr.State()
        
        gr.HTML("<center><h1>RAG PDF Chatbot</h1></center>")
        
        with gr.Column(scale=86):
            gr.Markdown("<b>Step 1 - Configure and Initialize RAG Pipeline</b>")
            with gr.Row():
                document = gr.Files(
                    height=300,
                    file_count="multiple",
                    file_types=["pdf"],
                    interactive=True,
                    label="Upload PDF documents"
                )
            
            with gr.Row():
                splitting_strategy = gr.Radio(
                    ["recursive", "fixed", "token"],
                    label="Text Splitting Strategy",
                    value="recursive"
                )
                db_choice = gr.Radio(
                    ["faiss", "chroma", "qdrant"],
                    label="Vector Database",
                    value="faiss"
                )
                chunk_size = gr.Radio(
                    ["small", "medium"],
                    label="Chunk Size",
                    value="medium"
                )
            
            with gr.Row():
                db_btn = gr.Button("Create vector database")
                db_progress = gr.Textbox(
                    value="Not initialized",
                    show_label=False
                )
            
            gr.Markdown("<b>Step 2 - Configure LLM</b>")
            with gr.Row():
                llm_choice = gr.Radio(
                    list_llm_simple,
                    label="Available LLMs",
                    value=list_llm_simple[0],
                    type="index"
                )
            
            with gr.Row():
                with gr.Accordion("LLM Parameters", open=False):
                    temperature = gr.Slider(
                        minimum=0.01,
                        maximum=1.0,
                        value=0.5,
                        step=0.1,
                        label="Temperature"
                    )
                    max_tokens = gr.Slider(
                        minimum=128,
                        maximum=4096,
                        value=2048,
                        step=128,
                        label="Max Tokens"
                    )
                    top_k = gr.Slider(
                        minimum=1,
                        maximum=10,
                        value=3,
                        step=1,
                        label="Top K"
                    )
            
            with gr.Row():
                init_llm_btn = gr.Button("Initialize LLM")
                llm_progress = gr.Textbox(
                    value="Not initialized",
                    show_label=False
                )
        
        with gr.Column(scale=200):
            gr.Markdown("<b>Step 3 - Chat with Documents</b>")
            chatbot = gr.Chatbot(height=505)
            
            with gr.Accordion("Source References", open=False):
                with gr.Row():
                    source1 = gr.Textbox(label="Source 1", lines=2)
                    page1 = gr.Number(label="Page")
                with gr.Row():
                    source2 = gr.Textbox(label="Source 2", lines=2)
                    page2 = gr.Number(label="Page")
                with gr.Row():
                    source3 = gr.Textbox(label="Source 3", lines=2)
                    page3 = gr.Number(label="Page")
            
            with gr.Row():
                msg = gr.Textbox(
                    placeholder="Ask a question",
                    show_label=False
                )
            with gr.Row():
                submit_btn = gr.Button("Submit")
                clear_btn = gr.ClearButton(
                    [msg, chatbot],
                    value="Clear Chat"
                )

        # Event handlers
        db_btn.click(
            initialize_database,
            inputs=[document, splitting_strategy, chunk_size, db_choice],
            outputs=[vector_db, db_progress]
        ).then(
            lambda x: gr.update(interactive=True) if x[0] is not None else gr.update(interactive=False),
            inputs=[vector_db],
            outputs=[init_llm_btn]
        )
        
        init_llm_btn.click(
            initialize_llmchain,
            inputs=[llm_choice, temperature, max_tokens, top_k, vector_db],
            outputs=[qa_chain, llm_progress]
        ).then(
            lambda x: gr.update(interactive=True) if x[0] is not None else gr.update(interactive=False),
            inputs=[qa_chain],
            outputs=[msg]
        )
        
        msg.submit(
            conversation,
            inputs=[qa_chain, msg, chatbot],
            outputs=[qa_chain, msg, chatbot, source1, page1, source2, page2, source3, page3]
        )
        
        submit_btn.click(
            conversation,
            inputs=[qa_chain, msg, chatbot],
            outputs=[qa_chain, msg, chatbot, source1, page1, source2, page2, source3, page3]
        )

        clear_btn.click(
            lambda: [None, "", 0, "", 0, "", 0],
            outputs=[chatbot, source1, page1, source2, page2, source3, page3]
        )

    demo.queue().launch(debug=True)

if __name__ == "__main__":
    demo()