Spaces:
Sleeping
Sleeping
switched to chat engine
Browse files- backend.py +16 -5
backend.py
CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import hf_hub_download
|
|
12 |
from llama_cpp import Llama
|
13 |
import spaces
|
14 |
from huggingface_hub import login
|
|
|
15 |
|
16 |
|
17 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
@@ -78,17 +79,15 @@ def handle_query(query_str, chathistory):
|
|
78 |
|
79 |
try:
|
80 |
# Create a streaming query engine
|
81 |
-
query_engine = index.as_query_engine(text_qa_template=text_qa_template, streaming=False, similarity_top_k=1)
|
82 |
|
83 |
# Execute the query
|
84 |
streaming_response = query_engine.query(query_str)
|
85 |
|
86 |
r = streaming_response.response
|
87 |
cleaned_result = r.replace("<end_of_turn>", "").strip()
|
88 |
-
yield cleaned_result
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
# Stream the response
|
93 |
"""outputs = []
|
94 |
for text in streaming_response.response_gen:
|
@@ -96,6 +95,18 @@ def handle_query(query_str, chathistory):
|
|
96 |
outputs.append(str(text))
|
97 |
yield "".join(outputs)"""
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
except Exception as e:
|
101 |
yield f"Error processing query: {str(e)}"
|
|
|
12 |
from llama_cpp import Llama
|
13 |
import spaces
|
14 |
from huggingface_hub import login
|
15 |
+
from llama_index.core.memory import ChatMemoryBuffer
|
16 |
|
17 |
|
18 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
|
79 |
|
80 |
try:
|
81 |
# Create a streaming query engine
|
82 |
+
"""query_engine = index.as_query_engine(text_qa_template=text_qa_template, streaming=False, similarity_top_k=1)
|
83 |
|
84 |
# Execute the query
|
85 |
streaming_response = query_engine.query(query_str)
|
86 |
|
87 |
r = streaming_response.response
|
88 |
cleaned_result = r.replace("<end_of_turn>", "").strip()
|
89 |
+
yield cleaned_result"""
|
90 |
+
|
|
|
|
|
91 |
# Stream the response
|
92 |
"""outputs = []
|
93 |
for text in streaming_response.response_gen:
|
|
|
95 |
outputs.append(str(text))
|
96 |
yield "".join(outputs)"""
|
97 |
|
98 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
|
99 |
+
chat_engine = index.as_chat_engine(
|
100 |
+
chat_mode="context",
|
101 |
+
memory=memory,
|
102 |
+
system_prompt=(
|
103 |
+
"Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. "
|
104 |
+
),
|
105 |
+
)
|
106 |
+
|
107 |
+
response = chat_engine.chat(query_str)
|
108 |
+
yield response
|
109 |
+
|
110 |
|
111 |
except Exception as e:
|
112 |
yield f"Error processing query: {str(e)}"
|