Sentinel-AI-Web-Search-Test

Sleeping

Shreyas094 commited on Sep 6, 2024

Commit

5860470

verified ·

1 Parent(s): a20790a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,22 +58,34 @@ After writing the document, please provide a list of sources used in your respon
     # Use Hugging Face API
     client = InferenceClient(model, token=huggingface_token)
-    main_content = ""
     try:
-        for i in range(num_calls):
-            response = await client.chat_completion(
                 messages=[{"role": "user", "content": prompt}],
                 max_tokens=6000,
                 temperature=temperature,
-            )
-            if response.choices and response.choices[0].message:
-                chunk = response.choices[0].message.content
-                main_content += chunk
-                yield main_content, ""
     except Exception as e:
         logging.error(f"Error in get_response_with_search: {str(e)}")
         yield f"An error occurred while processing your request: {str(e)}", ""
 async def respond(message, history, model, temperature, num_calls, use_embeddings):
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")

     # Use Hugging Face API
     client = InferenceClient(model, token=huggingface_token)
+    full_response = ""
     try:
+        for _ in range(num_calls):
+            for response in client.chat_completion(
                 messages=[{"role": "user", "content": prompt}],
                 max_tokens=6000,
                 temperature=temperature,
+                stream=True,
+                top_p=0.9,
+            ):
+                if isinstance(response, dict) and "choices" in response:
+                    for choice in response["choices"]:
+                        if "delta" in choice and "content" in choice["delta"]:
+                            chunk = choice["delta"]["content"]
+                            full_response += chunk
+                            yield full_response, ""
+                else:
+                    logging.error("Unexpected response format or missing attributes in the response object.")
+                    break
     except Exception as e:
         logging.error(f"Error in get_response_with_search: {str(e)}")
         yield f"An error occurred while processing your request: {str(e)}", ""
+    if not full_response:
+        logging.warning("No response generated from the model")
+        yield "No response generated from the model.", ""
 async def respond(message, history, model, temperature, num_calls, use_embeddings):
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")