File size: 4,820 Bytes
ec95782
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from groq import Groq
import requests
from bs4 import BeautifulSoup

client = Groq(api_key="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")
embedding_model = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def process_pdf_with_langchain(pdf_path):

    loader = PyPDFLoader(pdf_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    split_documents = text_splitter.split_documents(documents)

    vectorstore = FAISS.from_documents(split_documents, embedding_model)
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
    return retriever

SERPAPI_KEY = "8a20e83850a3be0a0b4e3aed98bd3addbad56e82d52e639e1a692a02d021bca1"

def scrape_google_search(query, num_results=3):
    params = {
        "q": query,
        "hl": "fa",
        "gl": "ir",
        "num": num_results,
        "api_key": SERPAPI_KEY,
    }
    search = GoogleSearch(params)
    results = search.get_dict()

    if "error" in results:
        return f"Error: {results['error']}"

    search_results = []
    for result in results.get("organic_results", []):
        title = result.get("title", "No Title")
        link = result.get("link", "No Link")
        search_results.append(f"{title}: {link}")
    return "\n".join(search_results) if search_results else "No results found"

def generate_response(query, retriever=None, use_web_search=False):

    knowledge = ""

    if retriever:
        relevant_docs = retriever.get_relevant_documents(query)
        knowledge += "\n".join([doc.page_content for doc in relevant_docs])

    if use_web_search:
        web_results = scrape_google_search(query)
        knowledge += f"\n\nWeb Search Results:\n{web_results}"

    chat_history = memory.load_memory_variables({}).get("chat_history", "")
    context = (
        f"This is a conversation with ParvizGPT, an AI model designed by Amir Mahdi Parviz from Kermanshah University of Technology (KUT), "
        f"to help with tasks like answering questions in Persian, providing recommendations, and decision-making."
    )
    if knowledge:
        context += f"\n\nRelevant Knowledge:\n{knowledge}"
    if chat_history:
        context += f"\n\nChat History:\n{chat_history}"

    context += f"\n\nYou: {query}\nParvizGPT:"

    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": context}],
        model="llama-3.3-70b-versatile",
    )
    response = chat_completion.choices[0].message.content.strip()

    memory.save_context({"input": query}, {"output": response})
    return response

def gradio_interface(user_message, chat_box, pdf_file=None, enable_web_search=False):
    global retriever
    if pdf_file is not None:
        try:
            retriever = process_pdf_with_langchain(pdf_file.name)
        except Exception as e:
            return chat_box + [("Error", f"Error processing PDF: {e}")]

    response = generate_response(user_message, retriever=retriever, use_web_search=enable_web_search)
    chat_box.append(("You", user_message))
    chat_box.append(("ParvizGPT", response))
    return chat_box

def clear_memory():
    memory.clear()
    return []

retriever = None
with gr.Blocks() as interface:
    gr.Markdown("## ParvizGPT")
    # with gr.Row():
    chat_box = gr.Chatbot(label="Chat History", value=[])

# with gr.Row():
    user_message = gr.Textbox(
        label="Your Message",
        placeholder="Type your message here and press Enter...",
        lines=1,
        interactive=True,
    )
    enable_web_search = gr.Checkbox(label="🌐Enable Web Search", value=False)

# with gr.Row():
    clear_memory_btn = gr.Button("Clear Memory", interactive=True)
    # enable_web_search = gr.Checkbox(label="🌐Enable Web Search", value=False, interactive=True)
    pdf_file = gr.File(label="Upload PDF for Context (Optional)", type="filepath", interactive=True , scale=1)

    submit_btn = gr.Button("Submit")
    submit_btn.click(gradio_interface, inputs=[user_message, chat_box, pdf_file, enable_web_search], outputs=chat_box)
    user_message.submit(gradio_interface, inputs=[user_message, chat_box, pdf_file, enable_web_search], outputs=chat_box)
    clear_memory_btn.click(clear_memory, inputs=[], outputs=chat_box)

interface.launch()