gufett0 commited on
Commit
ed51056
·
1 Parent(s): 91d2747

switched to chat engine

Browse files
Files changed (2) hide show
  1. backend.py +48 -12
  2. interface.py +7 -3
backend.py CHANGED
@@ -55,27 +55,63 @@ def build_index():
55
 
56
  @spaces.GPU(duration=20)
57
  def handle_query(query_str, chathistory):
58
- index = build_index()
59
 
60
- memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
61
- chat_engine = index.as_chat_engine(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  chat_mode="context",
63
  memory=memory,
64
  system_prompt=(
65
  "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. "
66
- ),
67
- )
68
-
69
- try:
70
  response = chat_engine.stream_chat(query_str)
 
71
  for token in response.response_gen:
72
- if token.strip(): # Only yield non-empty tokens
73
- yield token
 
74
  except Exception as e:
75
  yield f"Error processing query: {str(e)}"
76
- finally:
77
- # You might want to add any cleanup code here
78
- pass
79
 
80
 
81
 
 
55
 
56
  @spaces.GPU(duration=20)
57
  def handle_query(query_str, chathistory):
 
58
 
59
+ index = build_index()
60
+
61
+ qa_prompt_str = (
62
+ "Context information is below.\n"
63
+ "---------------------\n"
64
+ "{context_str}\n"
65
+ "---------------------\n"
66
+ "Given the context information and not prior knowledge, "
67
+ "answer the question: {query_str}\n"
68
+ )
69
+
70
+ # Text QA Prompt
71
+ chat_text_qa_msgs = [
72
+ (
73
+ "system",
74
+ "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. ",
75
+ ),
76
+ ("user", qa_prompt_str),
77
+ ]
78
+ text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)
79
+
80
+ try:
81
+ # Create a streaming query engine
82
+ """query_engine = index.as_query_engine(text_qa_template=text_qa_template, streaming=False, similarity_top_k=1)
83
+
84
+ # Execute the query
85
+ streaming_response = query_engine.query(query_str)
86
+
87
+ r = streaming_response.response
88
+ cleaned_result = r.replace("<end_of_turn>", "").strip()
89
+ yield cleaned_result"""
90
+
91
+ # Stream the response
92
+ """outputs = []
93
+ for text in streaming_response.response_gen:
94
+
95
+ outputs.append(str(text))
96
+ yield "".join(outputs)"""
97
+
98
+ memory = ChatMemoryBuffer.from_defaults(token_limit=1500)
99
+ chat_engine = index.as_chat_engine(
100
  chat_mode="context",
101
  memory=memory,
102
  system_prompt=(
103
  "Sei un assistente italiano di nome Ossy che risponde solo alle domande o richieste pertinenti. "
104
+ ),
105
+ )
106
+
 
107
  response = chat_engine.stream_chat(query_str)
108
+ #response = chat_engine.chat(query_str)
109
  for token in response.response_gen:
110
+ yield token
111
+
112
+
113
  except Exception as e:
114
  yield f"Error processing query: {str(e)}"
 
 
 
115
 
116
 
117
 
interface.py CHANGED
@@ -69,7 +69,11 @@ class GemmaLLMInterface(CustomLLM):
69
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
70
  streamer, generate_kwargs = self._prepare_generation(prompt)
71
 
72
- self.model.generate(**generate_kwargs) # Run synchronously.
 
73
 
74
- for new_token in streamer:
75
- yield CompletionResponse(text=new_token)
 
 
 
 
69
  def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
70
  streamer, generate_kwargs = self._prepare_generation(prompt)
71
 
72
+ t = Thread(target=self.model.generate, kwargs=generate_kwargs)
73
+ t.start()
74
 
75
+ try:
76
+ for new_token in streamer:
77
+ yield CompletionResponse(text=new_token)
78
+ except StopIteration:
79
+ return