Spaces:
Sleeping
Sleeping
update
Browse files- routes/llm.py +22 -25
routes/llm.py
CHANGED
@@ -2,7 +2,6 @@ import json
|
|
2 |
import logging
|
3 |
import os
|
4 |
from typing import Annotated, AsyncGenerator, List, Optional
|
5 |
-
import asyncio
|
6 |
|
7 |
from fastapi import APIRouter, Depends, HTTPException
|
8 |
from fastapi.responses import StreamingResponse
|
@@ -71,16 +70,13 @@ def insert_search_results_to_message(
|
|
71 |
return False
|
72 |
|
73 |
def try_insert_search_results(
|
74 |
-
chat_request: ChatRequest, search_results:
|
75 |
) -> bool:
|
76 |
-
i = 0
|
77 |
for msg in reversed(chat_request.history):
|
78 |
-
if msg.role == "user"
|
79 |
-
msg.searchResults = search_results
|
80 |
-
msg.searchEntities =
|
81 |
-
|
82 |
-
if i == len(search_results):
|
83 |
-
return True
|
84 |
return False
|
85 |
|
86 |
def try_insert_reasoning(
|
@@ -125,7 +121,6 @@ def collapse_history_to_first_message(chat_request: ChatRequest) -> ChatRequest:
|
|
125 |
searchResults=''
|
126 |
)
|
127 |
return ChatRequest(history=[new_message])
|
128 |
-
|
129 |
|
130 |
async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prompt: str,
|
131 |
predict_params: LlmPredictParams,
|
@@ -136,6 +131,17 @@ async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prom
|
|
136 |
Генератор для стриминга ответа LLM через SSE.
|
137 |
"""
|
138 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
qe_result = await dialogue_service.get_qe_result(request.history)
|
140 |
try_insert_reasoning(request, qe_result.debug_message)
|
141 |
|
@@ -164,22 +170,13 @@ async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prom
|
|
164 |
dataset = dataset_service.get_current_dataset()
|
165 |
if dataset is None:
|
166 |
raise HTTPException(status_code=400, detail="Dataset not found")
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
170 |
text_chunks = entity_service.build_text(chunk_ids, scores)
|
171 |
|
172 |
-
|
173 |
-
async def build_text_async(entities):
|
174 |
-
return await asyncio.to_thread(entity_service.build_text, entities)
|
175 |
-
|
176 |
-
# all_text_chunks = [text_chunks] + [entity_service.build_text(entities) for entities in previous_entities]
|
177 |
-
tasks = [build_text_async(entities) for entities in previous_entities]
|
178 |
-
built_texts = await asyncio.gather(*tasks)
|
179 |
-
|
180 |
-
all_text_chunks = [text_chunks] + built_texts
|
181 |
-
all_entities = [chunk_ids] + previous_entities
|
182 |
-
|
183 |
search_results_event = {
|
184 |
"event": "search_results",
|
185 |
"data": {
|
@@ -191,7 +188,7 @@ async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prom
|
|
191 |
|
192 |
# new_message = f'<search-results>\n{text_chunks}\n</search-results>\n{last_query.content}'
|
193 |
|
194 |
-
try_insert_search_results(request,
|
195 |
except Exception as e:
|
196 |
logger.error(f"Error in SSE chat stream while searching: {str(e)}", stack_info=True)
|
197 |
yield "data: {\"event\": \"error\", \"data\":\""+str(e)+"\" }\n\n"
|
|
|
2 |
import logging
|
3 |
import os
|
4 |
from typing import Annotated, AsyncGenerator, List, Optional
|
|
|
5 |
|
6 |
from fastapi import APIRouter, Depends, HTTPException
|
7 |
from fastapi.responses import StreamingResponse
|
|
|
70 |
return False
|
71 |
|
72 |
def try_insert_search_results(
|
73 |
+
chat_request: ChatRequest, search_results: str,
|
74 |
) -> bool:
|
|
|
75 |
for msg in reversed(chat_request.history):
|
76 |
+
if msg.role == "user":
|
77 |
+
msg.searchResults = search_results
|
78 |
+
msg.searchEntities = []
|
79 |
+
return True
|
|
|
|
|
80 |
return False
|
81 |
|
82 |
def try_insert_reasoning(
|
|
|
121 |
searchResults=''
|
122 |
)
|
123 |
return ChatRequest(history=[new_message])
|
|
|
124 |
|
125 |
async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prompt: str,
|
126 |
predict_params: LlmPredictParams,
|
|
|
131 |
Генератор для стриминга ответа LLM через SSE.
|
132 |
"""
|
133 |
try:
|
134 |
+
old_history = request.history
|
135 |
+
new_history = [Message(
|
136 |
+
role=msg.role,
|
137 |
+
content=msg.content,
|
138 |
+
reasoning=msg.reasoning,
|
139 |
+
searchResults=msg.searchResults[:500] + "..." if msg.searchResults else None,
|
140 |
+
searchEntities=[],
|
141 |
+
) for msg in old_history]
|
142 |
+
request.history = new_history
|
143 |
+
|
144 |
+
|
145 |
qe_result = await dialogue_service.get_qe_result(request.history)
|
146 |
try_insert_reasoning(request, qe_result.debug_message)
|
147 |
|
|
|
170 |
dataset = dataset_service.get_current_dataset()
|
171 |
if dataset is None:
|
172 |
raise HTTPException(status_code=400, detail="Dataset not found")
|
173 |
+
_, chunk_ids, scores = entity_service.search_similar(
|
174 |
+
qe_result.search_query,
|
175 |
+
dataset.id,
|
176 |
+
[],
|
177 |
+
)
|
178 |
text_chunks = entity_service.build_text(chunk_ids, scores)
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
search_results_event = {
|
181 |
"event": "search_results",
|
182 |
"data": {
|
|
|
188 |
|
189 |
# new_message = f'<search-results>\n{text_chunks}\n</search-results>\n{last_query.content}'
|
190 |
|
191 |
+
try_insert_search_results(request, text_chunks)
|
192 |
except Exception as e:
|
193 |
logger.error(f"Error in SSE chat stream while searching: {str(e)}", stack_info=True)
|
194 |
yield "data: {\"event\": \"error\", \"data\":\""+str(e)+"\" }\n\n"
|