Spaces:
Running
Running
import cohere | |
import asyncio | |
from typing import AsyncGenerator | |
async def run_model_stream(api_key: str, model: str, prompt: str): | |
""" | |
Run the Cohere model with streaming response. | |
Args: | |
api_key: The API key to use for this request | |
model: The model name to use | |
prompt: The user's input prompt | |
Yields: | |
str: Chunks of the generated response | |
""" | |
try: | |
client = cohere.Client(api_key=api_key) | |
# Create chat message with streaming | |
response = await asyncio.get_event_loop().run_in_executor( | |
None, | |
lambda: client.chat( | |
chat_history=[], | |
message=prompt, | |
model=model, # Use model name directly from MODELS.csv | |
stream=True, | |
temperature=0.7 | |
) | |
) | |
# Process each chunk | |
for event in response: | |
if hasattr(event, 'text') and event.text: | |
# Use asyncio.sleep to prevent blocking | |
await asyncio.sleep(0) | |
yield event.text | |
except Exception as e: | |
raise Exception(f"Error with Cohere API: {str(e)}") | |
async def run_model(api_key: str, model: str, prompt: str) -> str: | |
""" | |
Run the Cohere model with the provided API key and prompt (non-streaming). | |
Args: | |
api_key: The API key to use for this request | |
model: The model name to use | |
prompt: The user's input prompt | |
Returns: | |
str: The generated response | |
""" | |
response = "" | |
async for chunk in run_model_stream(api_key, model, prompt): | |
response += chunk | |
return response |