Spaces:
Running
Running
File size: 5,315 Bytes
55db43f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from groq import Groq
import requests
from bs4 import BeautifulSoup
import time
client = Groq(api_key="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")
embedding_model = SentenceTransformerEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
def process_pdf_with_langchain(pdf_path, progress_callback):
# progress_callback("Initializing PDF processing... 0%")
time.sleep(0.5)
loader = PyPDFLoader(pdf_path)
# progress_callback("Loading PDF... 20%")
documents = loader.load()
time.sleep(0.5)
# progress_callback("Splitting documents... 50%")
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_documents = text_splitter.split_documents(documents)
time.sleep(0.5)
# progress_callback("Creating vector store... 80%")
vectorstore = FAISS.from_documents(split_documents, embedding_model)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
progress_callback("Processing complete! 100%")
return retriever
def scrape_google_search(query, num_results=3):
headers = {"User-Agent": "Mozilla/5.0"}
search_url = f"https://www.google.com/search?q={query}"
response = requests.get(search_url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
results = []
for g in soup.find_all('div', class_='tF2Cxc')[:num_results]:
title = g.find('h3').text
link = g.find('a')['href']
results.append(f"{title}: {link}")
return "\n".join(results)
def generate_response(query, retriever=None, use_web_search=False):
knowledge = ""
if retriever:
relevant_docs = retriever.get_relevant_documents(query)
knowledge += "\n".join([doc.page_content for doc in relevant_docs])
if use_web_search:
web_results = scrape_google_search(query)
knowledge += f"\n\nWeb Search Results:\n{web_results}"
chat_history = memory.load_memory_variables({}).get("chat_history", "")
context = (
f"This is a conversation with ParvizGPT, an AI model designed by Amir Mahdi Parviz from Kermanshah University of Technology (KUT), "
f"to help with tasks like answering questions in Persian, providing recommendations, and decision-making."
)
if knowledge:
context += f"\n\nRelevant Knowledge:\n{knowledge}"
if chat_history:
context += f"\n\nChat History:\n{chat_history}"
context += f"\n\nYou: {query}\nParvizGPT:"
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": context}],
model="llama-3.3-70b-versatile",
)
response = chat_completion.choices[0].message.content.strip()
memory.save_context({"input": query}, {"output": response})
return response
def upload_and_process(file, progress_display):
try:
global retriever
progress_updates = []
retriever = process_pdf_with_langchain(file.name, lambda msg: progress_updates.append(msg))
return "\n".join(progress_updates), "File uploaded and processed successfully."
except Exception as e:
return "", f"Error processing file: {e}"
def gradio_interface(user_message, chat_box, enable_web_search=False):
global retriever
response = generate_response(user_message, retriever=retriever, use_web_search=enable_web_search)
chat_box.append(("You", user_message))
chat_box.append(("ParvizGPT", response))
return chat_box
def clear_memory():
memory.clear()
return []
retriever = None
with gr.Blocks() as interface:
gr.Markdown("## ParvizGPT")
with gr.Row():
chat_box = gr.Chatbot(label="Chat History", value=[])
with gr.Row():
user_message = gr.Textbox(
label="Your Message",
placeholder="Type your message here and press Enter...",
lines=1,
interactive=True,
)
with gr.Row():
clear_memory_btn = gr.Button("Clear Memory", interactive=True)
enable_web_search = gr.Checkbox(label="🌐Enable Web Search", value=False, interactive=True)
with gr.Row():
pdf_upload = gr.UploadButton(label="📄 Upload Your PDF", file_types=[".pdf"])
progress_display = gr.Textbox(label="Progress", placeholder="Progress updates will appear here", interactive=True)
with gr.Row():
submit_btn = gr.Button("Submit")
pdf_upload.upload(upload_and_process, inputs=[pdf_upload, progress_display], outputs=[progress_display])
submit_btn.click(gradio_interface, inputs=[user_message, chat_box, enable_web_search], outputs=chat_box)
user_message.submit(gradio_interface, inputs=[user_message, chat_box, enable_web_search], outputs=chat_box)
clear_memory_btn.click(clear_memory, inputs=[], outputs=chat_box)
interface.launch() |