File size: 5,315 Bytes
55db43f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from groq import Groq
import requests
from bs4 import BeautifulSoup
import time  


client = Groq(api_key="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")
embedding_model = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def process_pdf_with_langchain(pdf_path, progress_callback):
    # progress_callback("Initializing PDF processing... 0%")
    time.sleep(0.5)
    loader = PyPDFLoader(pdf_path)
    # progress_callback("Loading PDF... 20%")
    documents = loader.load()
    time.sleep(0.5)
    # progress_callback("Splitting documents... 50%")
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    split_documents = text_splitter.split_documents(documents)
    time.sleep(0.5)
    # progress_callback("Creating vector store... 80%")
    vectorstore = FAISS.from_documents(split_documents, embedding_model)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    progress_callback("Processing complete! 100%")
    return retriever


def scrape_google_search(query, num_results=3):

    headers = {"User-Agent": "Mozilla/5.0"}
    search_url = f"https://www.google.com/search?q={query}"
    response = requests.get(search_url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    results = []
    for g in soup.find_all('div', class_='tF2Cxc')[:num_results]:
        title = g.find('h3').text
        link = g.find('a')['href']
        results.append(f"{title}: {link}")
    return "\n".join(results)


def generate_response(query, retriever=None, use_web_search=False):

    knowledge = ""

    if retriever:
        relevant_docs = retriever.get_relevant_documents(query)
        knowledge += "\n".join([doc.page_content for doc in relevant_docs])

    if use_web_search:
        web_results = scrape_google_search(query)
        knowledge += f"\n\nWeb Search Results:\n{web_results}"

    chat_history = memory.load_memory_variables({}).get("chat_history", "")
    context = (
        f"This is a conversation with ParvizGPT, an AI model designed by Amir Mahdi Parviz from Kermanshah University of Technology (KUT), "
        f"to help with tasks like answering questions in Persian, providing recommendations, and decision-making."
    )
    if knowledge:
        context += f"\n\nRelevant Knowledge:\n{knowledge}"
    if chat_history:
        context += f"\n\nChat History:\n{chat_history}"

    context += f"\n\nYou: {query}\nParvizGPT:"

    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": context}],
        model="llama-3.3-70b-versatile",
    )
    response = chat_completion.choices[0].message.content.strip()

    memory.save_context({"input": query}, {"output": response})
    return response

def upload_and_process(file, progress_display):
    try:
        global retriever
        progress_updates = []
        
        retriever = process_pdf_with_langchain(file.name, lambda msg: progress_updates.append(msg))

        return "\n".join(progress_updates), "File uploaded and processed successfully."
    except Exception as e:
        return "", f"Error processing file: {e}"

def gradio_interface(user_message, chat_box, enable_web_search=False):
    global retriever
    response = generate_response(user_message, retriever=retriever, use_web_search=enable_web_search)
    chat_box.append(("You", user_message))
    chat_box.append(("ParvizGPT", response))
    return chat_box

def clear_memory():
    memory.clear()
    return []

retriever = None
with gr.Blocks() as interface:
    gr.Markdown("## ParvizGPT")
    with gr.Row():
        chat_box = gr.Chatbot(label="Chat History", value=[])
    with gr.Row():
        user_message = gr.Textbox(
            label="Your Message",
            placeholder="Type your message here and press Enter...",
            lines=1,
            interactive=True,
        )
    with gr.Row():
        clear_memory_btn = gr.Button("Clear Memory", interactive=True)
        enable_web_search = gr.Checkbox(label="🌐Enable Web Search", value=False, interactive=True)
    with gr.Row():
        pdf_upload = gr.UploadButton(label="📄 Upload Your PDF", file_types=[".pdf"])
        progress_display = gr.Textbox(label="Progress", placeholder="Progress updates will appear here", interactive=True)
    with gr.Row():
        submit_btn = gr.Button("Submit")
    pdf_upload.upload(upload_and_process, inputs=[pdf_upload, progress_display], outputs=[progress_display])

    submit_btn.click(gradio_interface, inputs=[user_message, chat_box, enable_web_search], outputs=chat_box)
    user_message.submit(gradio_interface, inputs=[user_message, chat_box, enable_web_search], outputs=chat_box)
    clear_memory_btn.click(clear_memory, inputs=[], outputs=chat_box)

interface.launch()