Spaces:

Shyamnath
/

Inferencing

Sleeping

Inferencing / models /cohere.py

Add complete backend application with all dependencies

6ff1f88 3 months ago

1.7 kB

	import cohere
	import asyncio
	from typing import AsyncGenerator

	async def run_model_stream(api_key: str, model: str, prompt: str):
	"""
	Run the Cohere model with streaming response.

	Args:
	api_key: The API key to use for this request
	model: The model name to use
	prompt: The user's input prompt

	Yields:
	str: Chunks of the generated response
	"""
	try:
	client = cohere.Client(api_key=api_key)

	# Create chat message with streaming
	response = await asyncio.get_event_loop().run_in_executor(
	None,
	lambda: client.chat(
	chat_history=[],
	message=prompt,
	model=model, # Use model name directly from MODELS.csv
	stream=True,
	temperature=0.7
	)
	)

	# Process each chunk
	for event in response:
	if hasattr(event, 'text') and event.text:
	# Use asyncio.sleep to prevent blocking
	await asyncio.sleep(0)
	yield event.text

	except Exception as e:
	raise Exception(f"Error with Cohere API: {str(e)}")

	async def run_model(api_key: str, model: str, prompt: str) -> str:
	"""
	Run the Cohere model with the provided API key and prompt (non-streaming).

	Args:
	api_key: The API key to use for this request
	model: The model name to use
	prompt: The user's input prompt

	Returns:
	str: The generated response
	"""
	response = ""
	async for chunk in run_model_stream(api_key, model, prompt):
	response += chunk
	return response