File size: 6,240 Bytes
7bf2580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings  # Updated for Persian embeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from groq import Groq
import requests
from bs4 import BeautifulSoup
from serpapi import GoogleSearch
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

client = Groq(api_key="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")

embedding_model = HuggingFaceEmbeddings(model_name="HooshvareLab/bert-fa-base-uncased")

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

def process_pdf_with_langchain(pdf_path):
    try:
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        split_documents = text_splitter.split_documents(documents)

        vectorstore = FAISS.from_documents(split_documents, embedding_model)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
        return retriever
    except Exception as e:
        logger.error(f"Error processing PDF: {e}")
        raise

SERPAPI_KEY = "8a20e83850a3be0a0b4e3aed98bd3addbad56e82d52e639e1a692a02d021bca1"

def scrape_google_search(query, num_results=3):
    try:
        params = {
            "q": query,
            "hl": "fa",
            "gl": "ir",
            "num": num_results,
            "api_key": SERPAPI_KEY,
        }
        search = GoogleSearch(params)
        results = search.get_dict()

        if "error" in results:
            return f"Error: {results['error']}"

        search_results = []
        for result in results.get("organic_results", []):
            title = result.get("title", "No Title")
            link = result.get("link", "No Link")
            search_results.append(f"{title}: {link}")
        return "\n".join(search_results) if search_results else "No results found"
    except Exception as e:
        logger.error(f"Error scraping Google search: {e}")
        return f"Error: {e}"

def scrape_webpage(url):
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text(separator="\n")
        return text.strip()
    except Exception as e:
        logger.error(f"Error scraping webpage {url}: {e}")
        return f"Error: {e}"

def generate_response(query, retriever=None, use_web_search=False, scrape_web=False):
    try:
        knowledge = ""

        if retriever:
            relevant_docs = retriever.get_relevant_documents(query)
            knowledge += "\n".join([doc.page_content for doc in relevant_docs])

        if use_web_search:
            web_results = scrape_google_search(query)
            knowledge += f"\n\nWeb Search Results:\n{web_results}"

        if scrape_web:
            urls = [word for word in query.split() if word.startswith("http://") or word.startswith("https://")]
            for url in urls:
                webpage_content = scrape_webpage(url)
                knowledge += f"\n\nWebpage Content from {url}:\n{webpage_content}"

        chat_history = memory.load_memory_variables({}).get("chat_history", "")
        context = (
            f"This is a conversation with ParvizGPT, an AI model designed by Amir Mahdi Parviz from Kermanshah University of Technology (KUT), "
            f"to help with tasks like answering questions in Persian, providing recommendations, and decision-making."
        )
        if knowledge:
            context += f"\n\nRelevant Knowledge:\n{knowledge}"
        if chat_history:
            context += f"\n\nChat History:\n{chat_history}"

        context += f"\n\nYou: {query}\nParvizGPT:"

        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": context}],
            model=  "gemma2-9b-it" #"llama-3.3-70b-versatile",
        )
        response = chat_completion.choices[0].message.content.strip()

        memory.save_context({"input": query}, {"output": response})
        return response
    except Exception as e:
        logger.error(f"Error generating response: {e}")
        return f"Error: {e}"

def gradio_interface(user_message, chat_box, pdf_file=None, enable_web_search=False, scrape_web=False):
    global retriever
    if pdf_file is not None:
        try:
            retriever = process_pdf_with_langchain(pdf_file.name)
        except Exception as e:
            return chat_box + [("Error", f"Error processing PDF: {e}")]

    response = generate_response(user_message, retriever=retriever, use_web_search=enable_web_search, scrape_web=scrape_web)
    chat_box.append(("You", user_message))
    chat_box.append(("ParvizGPT", response))
    return chat_box

def clear_memory():
    memory.clear()
    return []

retriever = None

with gr.Blocks() as interface:
    gr.Markdown("## ParvizGPT")
    chat_box = gr.Chatbot(label="Chat History", value=[])

    user_message = gr.Textbox(
        label="Your Message",
        placeholder="Type your message here and press Enter...",
        lines=1,
        interactive=True,
    )
    enable_web_search = gr.Checkbox(label="🌐Enable Web Search", value=False)
    scrape_web = gr.Checkbox(label="🌍Scrape Webpages", value=False)

    clear_memory_btn = gr.Button("Clear Memory", interactive=True)
    pdf_file = gr.File(label="Upload PDF for Context (Optional)", type="filepath", interactive=True , scale=1)

    submit_btn = gr.Button("Submit")
    submit_btn.click(gradio_interface, inputs=[user_message, chat_box, pdf_file, enable_web_search, scrape_web], outputs=chat_box)
    user_message.submit(gradio_interface, inputs=[user_message, chat_box, pdf_file, enable_web_search, scrape_web], outputs=chat_box)
    clear_memory_btn.click(clear_memory, inputs=[], outputs=chat_box)

interface.launch()