Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
warnings.filterwarnings('ignore')
|
3 |
+
import transformers
|
4 |
+
import gradio as gr
|
5 |
+
from transformers import pipeline
|
6 |
+
|
7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
8 |
+
import torch
|
9 |
+
|
10 |
+
model_id = "meta-llama/Llama-2-7b-chat-hf"
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
12 |
+
model = AutoModelForCausalLM.from_pretrained(
|
13 |
+
model_id,
|
14 |
+
device_map="auto",
|
15 |
+
torch_dtype="auto"
|
16 |
+
)
|
17 |
+
|
18 |
+
# تحميل LLM آخر للتلخيص
|
19 |
+
summarizer =pipeline("summarization", model="facebook/bart-large-cnn")
|
20 |
+
|
21 |
+
def generate_response(user_input: str, context_summary: str = "") -> str:
|
22 |
+
prompt = f"""<s>[INST] <<SYS>>
|
23 |
+
You are a helpful assistant.
|
24 |
+
<</SYS>>
|
25 |
+
|
26 |
+
Previous summary of conversation:
|
27 |
+
{context_summary}
|
28 |
+
|
29 |
+
Current user input:
|
30 |
+
{user_input}
|
31 |
+
[/INST]
|
32 |
+
"""
|
33 |
+
|
34 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
35 |
+
|
36 |
+
output = model.generate(
|
37 |
+
**inputs,
|
38 |
+
max_new_tokens=256,
|
39 |
+
pad_token_id=tokenizer.eos_token_id,
|
40 |
+
temperature=0.7, # ← يتحكم في عشوائية الردود
|
41 |
+
top_p=0.9, # ← خوارزمية nucleus sampling
|
42 |
+
do_sample=True # ← فعّل العشوائية
|
43 |
+
)
|
44 |
+
|
45 |
+
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
46 |
+
return response[len(prompt):].strip()
|
47 |
+
|
48 |
+
# تعريف chat_history كمخزن داخلي
|
49 |
+
chat_history = []
|
50 |
+
|
51 |
+
def gradio_chat(user_input):
|
52 |
+
global chat_history # نُشير إلى المتغيّر الخارجي
|
53 |
+
|
54 |
+
# استخراج الملخص السابق إن وجد
|
55 |
+
context_summary = chat_history[-1]["summary"] if chat_history else ""
|
56 |
+
|
57 |
+
# توليد الرد من الموديل
|
58 |
+
response = generate_response(user_input, context_summary)
|
59 |
+
|
60 |
+
# تلخيص الرد
|
61 |
+
summary = summarizer(
|
62 |
+
response,
|
63 |
+
max_length=100,
|
64 |
+
min_length=30,
|
65 |
+
do_sample=False
|
66 |
+
)[0]["summary_text"]
|
67 |
+
|
68 |
+
# حفظ في الذاكرة
|
69 |
+
chat_history.append({
|
70 |
+
"user_input": user_input,
|
71 |
+
"response": response,
|
72 |
+
"summary": summary
|
73 |
+
})
|
74 |
+
|
75 |
+
# تجهيز الشكل لعرض المحادثة
|
76 |
+
chat_display = [(entry["user_input"], entry["response"]) for entry in chat_history]
|
77 |
+
|
78 |
+
return chat_display
|
79 |
+
# واجهة Gradio
|
80 |
+
with gr.Blocks() as demo:
|
81 |
+
gr.Markdown("## 🤖 AI Chatbot with Memory\nKeep chatting and watch it remember!")
|
82 |
+
|
83 |
+
chatbot = gr.Chatbot()
|
84 |
+
msg = gr.Textbox(placeholder="Type your message and press Enter...")
|
85 |
+
|
86 |
+
# عند الإرسال، تُنفذ الدالة وتُحدث الـchatbot
|
87 |
+
msg.submit(fn=gradio_chat, inputs=msg, outputs=chatbot, scroll_to_output=True)
|
88 |
+
|
89 |
+
# بعد الإرسال، يتم مسح الـTextbox تلقائيًا
|
90 |
+
msg.submit(lambda: "", None, msg)
|
91 |
+
|
92 |
+
# تشغيل الواجهة
|
93 |
+
demo.launch()
|