File size: 3,223 Bytes
bd29fc5
0af6850
bd29fc5
 
 
 
 
0af6850
 
bd29fc5
 
 
 
a94ff47
bd29fc5
4fd36e5
bd29fc5
 
0af6850
bd29fc5
a94ff47
 
bd29fc5
 
 
 
 
 
 
 
 
a94ff47
bd29fc5
 
 
a94ff47
0af6850
bd29fc5
 
 
 
 
17ae6bf
bd29fc5
 
17ae6bf
bd29fc5
 
 
 
 
 
17ae6bf
bd29fc5
0af6850
 
bd29fc5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter


embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
vector_store = InMemoryVectorStore(embeddings)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
model = ChatGroq(api_key="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", model_name="deepseek-r1-distill-llama-70b")

def process_file(file_path):

    if not file_path:
        return None

    file_extension = os.path.splitext(file_path)[1].lower()

    try:
        if file_extension == ".pdf":
            from pypdf import PdfReader
            reader = PdfReader(file_path)
            return "\n".join(page.extract_text() for page in reader.pages)
        elif file_extension == ".txt":
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read()
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
    except Exception as e:
        raise RuntimeError(f"Error processing file: {str(e)}")

def answer_query(query, file_path):

    try:
        file_content = process_file(file_path) if file_path else None
        if file_content:
            file_docs = [Document(page_content=file_content, metadata={"source": "uploaded_file"})]
            file_splits = text_splitter.split_documents(file_docs)
            vector_store.add_documents(file_splits)

        retrieved_docs = vector_store.similarity_search(query, k=2)
        knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)

        response = model.invoke(
            f"You are ParvizGPT, an AI assistant created by Amir Mahdi Parviz, a student at Kermanshah University of Technology (KUT). "
            f"Your primary purpose is to assist users by answering their questions in **Persian (Farsi)**. "
            f"Always respond in Persian unless explicitly asked to respond in another language."
            f"Related Information:\n{knowledge}\n\nQuestion:{query}\nAnswer:"
        )

        return response.content

    except Exception as e:
        return f"Error: {str(e)}"

def chat_with_bot(query, file):

    file_path = file.name if file else None
    response = answer_query(query, file_path)
    return response

with gr.Blocks() as demo:
    gr.Markdown("Parviz Rager")
    gr.Markdown("فایل خود را آپلود کنید (PDF یا TXT) و سوالات خود را بپرسید.")

    with gr.Row():
        file_input = gr.File(label="فایل خود را آپلود کنید (PDF یا TXT)", file_types=[".pdf", ".txt"])
        query_input = gr.Textbox(label="سوال خود را وارد کنید", placeholder="مثلاً: معایب سرمایه‌گذاری در صندوق فیروزه موفقیت چیست؟")

    submit_button = gr.Button("ارسال")
    output = gr.Textbox(label="پاسخ", interactive=False)

    submit_button.click(fn=chat_with_bot, inputs=[query_input, file_input], outputs=output)

demo.launch()