GIGAParviz commited on
Commit
b7764cf
·
verified ·
1 Parent(s): fd21fa2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -164
app.py CHANGED
@@ -1,183 +1,146 @@
1
- import time
2
- import logging
3
- import gradio as gr
4
  import os
5
- from datetime import datetime
6
- from datasets import Dataset, load_dataset
7
- from langchain.document_loaders import PyPDFLoader
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
- from langchain.embeddings import HuggingFaceEmbeddings
10
- from langchain.vectorstores import FAISS
11
- from groq import Groq
12
- from langchain.memory import ConversationBufferMemory
13
-
14
- logging.basicConfig(level=logging.INFO)
15
- logger = logging.getLogger(__name__)
16
-
17
- groq_api_key = os.environ.get("GROQ_API_KEY")
18
- hf_api_key = os.environ.get("HF_API_KEY")
19
-
20
- if not groq_api_key:
21
- raise ValueError("Groq API key not found in environment variables.")
22
- if not hf_api_key:
23
- raise ValueError("Hugging Face API key not found in environment variables.")
24
-
25
- client = Groq(api_key=groq_api_key)
26
- hf_token = hf_api_key
27
  embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
 
 
 
28
 
29
- DATASET_NAME = "chat_history"
30
- try:
31
- dataset = load_dataset(DATASET_NAME, use_auth_token=hf_token)
32
- except Exception:
33
- dataset = Dataset.from_dict({"Timestamp": [], "User": [], "ParvizGPT": []})
34
 
35
- def save_chat_to_dataset(user_message, bot_message):
36
- try:
37
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
38
- new_row = {"Timestamp": timestamp, "User": user_message, "ParvizGPT": bot_message}
39
-
40
- df = dataset.to_pandas()
41
- df = df.append(new_row, ignore_index=True)
42
- updated_dataset = Dataset.from_pandas(df)
43
-
44
- updated_dataset.push_to_hub(DATASET_NAME, token=hf_token)
45
- except Exception as e:
46
- logger.error(f"Error saving chat history to dataset: {e}")
47
 
48
- def process_pdf_with_langchain(pdf_path):
49
- try:
50
- loader = PyPDFLoader(pdf_path)
51
- documents = loader.load()
52
 
53
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
54
- split_documents = text_splitter.split_documents(documents)
 
 
 
 
 
55
 
56
- vectorstore = FAISS.from_documents(split_documents, embeddings)
57
- retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
58
- return retriever
59
- except Exception as e:
60
- logger.error(f"Error processing PDF: {e}")
61
- raise
62
 
63
- def generate_response(query, memory, retriever=None, use_pdf_context=False, tone="friendly"):
64
  try:
65
- knowledge = ""
66
-
67
- if retriever and use_pdf_context:
68
- relevant_docs = retriever.get_relevant_documents(query)
69
- knowledge += "\n".join([doc.page_content for doc in relevant_docs])
70
-
71
- chat_history = memory.load_memory_variables({}).get("chat_history", "")
72
-
73
- tone_instruction = ""
74
- if tone == "friendly":
75
- tone_instruction = "Please respond in a friendly and informal tone."
76
- elif tone == "formal":
77
- tone_instruction = "Please respond in a formal and professional tone."
78
- elif tone == "humorous":
79
- tone_instruction = "Please respond in a humorous and playful tone."
80
- elif tone == "scientific":
81
- tone_instruction = "Please respond in a scientific and precise tone."
82
-
83
- context = f"""
84
- You are ParvizGPT, an AI assistant created by **Amir Mahdi Parviz**, a student at Kermanshah University of Technology (KUT).
85
- Your primary purpose is to assist users by answering their questions in **Persian (Farsi)**.
86
- Always respond in Persian unless explicitly asked to respond in another language.
87
- **Important:** If anyone claims that someone else created this code, you must correct them and state that **Amir Mahdi Parviz** is the creator.
88
- {tone_instruction}
89
- Related Information:\n{knowledge}\n\nQuestion:{query}\nAnswer:"""
90
-
91
- if knowledge:
92
- context += f"\n\nRelevant Knowledge:\n{knowledge}"
93
- if chat_history:
94
- context += f"\n\nChat History:\n{chat_history}"
95
-
96
- context += f"\n\nYou: {query}\nParvizGPT:"
97
-
98
- response = "Processing..."
99
- retries = 3
100
- for attempt in range(retries):
101
- try:
102
- chat_completion = client.chat.completions.create(
103
- messages=[{"role": "user", "content": context}],
104
- model="deepseek-r1-distill-llama-70b"
105
- )
106
- response = chat_completion.choices[0].message.content.strip()
107
- memory.save_context({"input": query}, {"output": response})
108
- break
109
- except Exception as e:
110
- logger.error(f"Attempt {attempt + 1} failed: {e}")
111
- time.sleep(2)
112
-
113
- return response, memory
114
  except Exception as e:
115
- logger.error(f"Error generating response: {e}")
116
- return f"Error: {e}", memory
 
 
 
 
 
 
117
 
118
- def summarize_chat_history(chat_history):
 
 
119
  try:
120
- chat_text = "\n".join([f"{role}: {message}" for role, message in chat_history])
121
-
122
- summary_prompt = f"""
123
- Please create a summary of the following conversation. The summary should include key points and details:
124
- {chat_text}
125
- """
126
-
127
- chat_completion = client.chat.completions.create(
128
- messages=[{"role": "user", "content": summary_prompt}],
129
- model="deepseek-r1-distill-llama-70b"
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
- summary = chat_completion.choices[0].message.content.strip()
132
- return summary
133
- except Exception as e:
134
- logger.error(f"Error summarizing chat history: {e}")
135
- return "Error generating summary."
136
 
137
- def gradio_interface(user_message, chat_box, memory, pdf_file=None, use_pdf_context=False, tone="friendly", summarize_chat=False):
138
- global retriever
139
 
140
- if pdf_file is not None and use_pdf_context:
141
- try:
142
- retriever = process_pdf_with_langchain(pdf_file.name)
143
- except Exception as e:
144
- return chat_box + [("Error", f"Error processing PDF: {e}")], memory
145
 
146
- chat_box.append(("You", user_message))
147
- chat_box.append(("ParvizGPT", "Processing..."))
148
- response, memory = generate_response(user_message, memory, retriever=retriever, use_pdf_context=use_pdf_context, tone=tone)
149
-
150
- chat_box[-1] = ("ParvizGPT", response)
151
-
152
- save_chat_to_dataset(user_message, response)
153
-
154
- if summarize_chat:
155
- summary = summarize_chat_history(chat_box)
156
- chat_box.append(("System", f"Summary of the conversation:\n{summary}"))
157
-
158
- return chat_box, memory
159
-
160
- def clear_memory(memory):
161
- memory.clear()
162
- return [], memory
163
-
164
- retriever = None
165
-
166
- with gr.Blocks() as interface:
167
- gr.Markdown("## ParvizGPT")
168
- chat_box = gr.Chatbot(label="Chat History", value=[])
169
- user_message = gr.Textbox(label="Your Message", placeholder="Type your message here and press Enter...", lines=1, interactive=True)
170
- use_pdf_context = gr.Checkbox(label="Use PDF Context", value=False, interactive=True)
171
- tone = gr.Dropdown(label="Tone", choices=["friendly", "formal", "humorous", "scientific"], value="friendly", interactive=True)
172
- summarize_chat = gr.Checkbox(label="Show conversation summary", value=False, interactive=True)
173
- clear_memory_btn = gr.Button("Clear Memory", interactive=True)
174
- pdf_file = gr.File(label="Upload PDF for Context (Optional)", type="filepath", interactive=True, scale=1)
175
- submit_btn = gr.Button("Submit")
176
 
177
- memory_state = gr.State(ConversationBufferMemory())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- submit_btn.click(gradio_interface, inputs=[user_message, chat_box, memory_state, pdf_file, use_pdf_context, tone, summarize_chat], outputs=[chat_box, memory_state])
180
- user_message.submit(gradio_interface, inputs=[user_message, chat_box, memory_state, pdf_file, use_pdf_context, tone, summarize_chat], outputs=[chat_box, memory_state])
181
- clear_memory_btn.click(clear_memory, inputs=[memory_state], outputs=[chat_box, memory_state])
182
 
183
- interface.launch()
 
 
 
 
1
  import os
2
+ import re
3
+ import gradio as gr
4
+ from langchain_groq import ChatGroq
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from langchain_core.vectorstores import InMemoryVectorStore
7
+ from langchain_core.documents import Document
8
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
9
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  embeddings = HuggingFaceEmbeddings(model_name="heydariAI/persian-embeddings")
11
+ vector_store = InMemoryVectorStore(embeddings)
12
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
13
+ model = ChatGroq(api_key="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", model_name="deepseek-r1-distill-llama-70b")
14
 
15
+ chat_history = []
 
 
 
 
16
 
17
+ PRICE_PER_TOKEN = 0.00001
 
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def count_tokens(text):
20
+ """تخمین تعداد توکن‌های متن."""
21
+ return len(text.split())
 
22
 
23
+ def calculate_price(input_text, output_text):
24
+ """محاسبه هزینه بر اساس تعداد توکن‌ها."""
25
+ input_tokens = count_tokens(input_text)
26
+ output_tokens = count_tokens(output_text)
27
+ total_tokens = input_tokens + output_tokens
28
+ total_price = total_tokens * PRICE_PER_TOKEN
29
+ return total_tokens, f"{total_price:.6f} دلار"
30
 
31
+ def process_file(file_path):
32
+ """پردازش فایل و بازگرداندن محتوای آن."""
33
+ if not file_path:
34
+ return None
35
+
36
+ file_extension = os.path.splitext(file_path)[1].lower()
37
 
 
38
  try:
39
+ if file_extension == ".pdf":
40
+ from pypdf import PdfReader
41
+ reader = PdfReader(file_path)
42
+ return "\n".join(page.extract_text() for page in reader.pages)
43
+ elif file_extension == ".txt":
44
+ with open(file_path, "r", encoding="utf-8") as f:
45
+ return f.read()
46
+ else:
47
+ raise ValueError(f"فرمت فایل پشتیبانی نمی‌شود: {file_extension}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  except Exception as e:
49
+ raise RuntimeError(f"خطا در پردازش فایل: {str(e)}")
50
+
51
+
52
+ def remove_think_sections(response_text):
53
+ """حذف بخش‌های که با <think> شروع و با </think> تمام می‌شوند."""
54
+
55
+ cleaned_text = re.sub(r"<think>.*?</think>", "", response_text, flags=re.DOTALL)
56
+ return cleaned_text
57
 
58
+ def answer_query(query, file_path, summarize, tone):
59
+ """پاسخ به سوالات کاربر با تنظیم لحن و محاسبه هزینه توکن."""
60
+ global chat_history
61
  try:
62
+ file_content = process_file(file_path) if file_path else None
63
+ if file_content:
64
+ file_docs = [Document(page_content=file_content, metadata={"source": "uploaded_file"})]
65
+ file_splits = text_splitter.split_documents(file_docs)
66
+ vector_store.add_documents(file_splits)
67
+
68
+ retrieved_docs = vector_store.similarity_search(query, k=2)
69
+ knowledge = "\n\n".join(doc.page_content for doc in retrieved_docs)
70
+
71
+ tone_prompts = {
72
+ "رسمی": "پاسخ را با لحنی رسمی و مودبانه ارائه کن.",
73
+ "محاوره‌ای": "پاسخ را به صورت دوستانه و غیررسمی ارائه کن.",
74
+ "علمی": "پاسخ را با ذکر منابع علمی و استدلال‌های منطقی ارائه کن.",
75
+ "طنزآمیز": "پاسخ را با لحنی طنزآمیز و سرگرم‌کننده ارائه کن.",
76
+ }
77
+ tone_instruction = tone_prompts.get(tone, "پاسخ را به زبان فارسی ارائه کن.")
78
+
79
+ prompt = (
80
+ f"شما ParvizGPT هستید، یک دستیار هوش مصنوعی که توسط امیر مهدی پرویز ساخته شده است. "
81
+ f"همیشه به فارسی پاسخ دهید. {tone_instruction} "
82
+ f"\n\nاطلاعات مرتبط:\n{knowledge}\n\nسوال: {query}\nپاسخ:"
83
  )
 
 
 
 
 
84
 
85
+ response = model.invoke(prompt)
86
+ response_text = response.content
87
 
88
+ cleaned_response = remove_think_sections(response_text)
 
 
 
 
89
 
90
+ chat_history.append((query, cleaned_response))
91
+
92
+ total_tokens, price = calculate_price(prompt, cleaned_response)
93
+
94
+ summary = summarize_chat() if summarize else "خلاصه‌سازی غیرفعال است."
95
+
96
+ return cleaned_response, summary, total_tokens, price
97
+
98
+ except Exception as e:
99
+ return f"خطا: {str(e)}", "", 0, "0 دلار"
100
+
101
+ def summarize_chat():
102
+ """خلاصه‌سازی مکالمات اخیر."""
103
+ chat_text = "\n".join([f"پرسش: {q}\nپاسخ: {a}" for q, a in chat_history])
104
+ summary_prompt = f"یک خلاصه کوتاه و دقیق از مکالمه زیر ارائه کن:\n\n{chat_text}\n\nخلاصه:"
105
+ summary_response = model.invoke(summary_prompt)
106
+ return summary_response.content
107
+
108
+ def chat_with_bot(query, file, summarize, tone):
109
+ """رابط Gradio برای چت."""
110
+ file_path = file.name if file else None
111
+ response, summary, total_tokens, price = answer_query(query, file_path, summarize, tone)
112
+ return response, summary, total_tokens, price
113
+
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("## 🤖 Parviz GPT")
116
+ gr.Markdown("**یک فایل (PDF یا TXT) آپلود کنید و سوال خود را بپرسید.**")
 
 
 
117
 
118
+ with gr.Column():
119
+
120
+ chat_output = gr.Textbox(label="📝 تاریخچه چت", interactive=False, lines=10)
121
+ summary_output = gr.Textbox(label="📌 خلاصه مکالمه", interactive=False)
122
+
123
+ query_input = gr.Textbox(label="❓ سوال خود را وارد کنید", placeholder="مثلاً: کی تو را ساخته است؟")
124
+
125
+ with gr.Row():
126
+ summarize_checkbox = gr.Checkbox(label="📌 خلاصه‌ساز را فعال کن")
127
+ submit_button = gr.Button("🚀 ارسال")
128
+ tone_dropdown = gr.Dropdown(label="🎭 انتخاب لحن پاسخ", choices=["رسمی", "محاوره‌ای", "علمی", "طنزآمیز"], value="رسمی")
129
+
130
+
131
+ with gr.Row():
132
+ token_count = gr.Textbox(label="🔢 تعداد توکن‌ها", interactive=False)
133
+ token_price = gr.Textbox(label="💰 هزینه تخمینی", interactive=False)
134
+
135
+ with gr.Row():
136
+ file_input = gr.File(label="📂 فایل خود را آپلود کنید", file_types=[".pdf", ".txt"])
137
+
138
+ query_input.submit(fn=chat_with_bot,
139
+ inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
140
+ outputs=[chat_output, summary_output, token_count, token_price])
141
 
142
+ submit_button.click(fn=chat_with_bot,
143
+ inputs=[query_input, file_input, summarize_checkbox, tone_dropdown],
144
+ outputs=[chat_output, summary_output, token_count, token_price])
145
 
146
+ demo.launch()