Spaces:
Running
Running
File size: 1,701 Bytes
6ff1f88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import cohere
import asyncio
from typing import AsyncGenerator
async def run_model_stream(api_key: str, model: str, prompt: str):
"""
Run the Cohere model with streaming response.
Args:
api_key: The API key to use for this request
model: The model name to use
prompt: The user's input prompt
Yields:
str: Chunks of the generated response
"""
try:
client = cohere.Client(api_key=api_key)
# Create chat message with streaming
response = await asyncio.get_event_loop().run_in_executor(
None,
lambda: client.chat(
chat_history=[],
message=prompt,
model=model, # Use model name directly from MODELS.csv
stream=True,
temperature=0.7
)
)
# Process each chunk
for event in response:
if hasattr(event, 'text') and event.text:
# Use asyncio.sleep to prevent blocking
await asyncio.sleep(0)
yield event.text
except Exception as e:
raise Exception(f"Error with Cohere API: {str(e)}")
async def run_model(api_key: str, model: str, prompt: str) -> str:
"""
Run the Cohere model with the provided API key and prompt (non-streaming).
Args:
api_key: The API key to use for this request
model: The model name to use
prompt: The user's input prompt
Returns:
str: The generated response
"""
response = ""
async for chunk in run_model_stream(api_key, model, prompt):
response += chunk
return response |