Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -392,6 +392,92 @@ def get_ascii_weather(location: str):
|
|
392 |
else:
|
393 |
return {"error": f"Unable to fetch weather data. Status code: {response.status_code}"}
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
# Run the API server if this script is executed
|
396 |
if __name__ == "__main__":
|
397 |
import uvicorn
|
|
|
392 |
else:
|
393 |
return {"error": f"Unable to fetch weather data. Status code: {response.status_code}"}
|
394 |
|
395 |
+
|
396 |
+
|
397 |
+
class Model(BaseModel):
|
398 |
+
id: str
|
399 |
+
object: str
|
400 |
+
created: int
|
401 |
+
owned_by: str
|
402 |
+
|
403 |
+
class Message(BaseModel):
|
404 |
+
role: str
|
405 |
+
content: str
|
406 |
+
|
407 |
+
class CompletionRequest(BaseModel):
|
408 |
+
model: str
|
409 |
+
messages: List[Message]
|
410 |
+
|
411 |
+
class CompletionResponse(BaseModel):
|
412 |
+
id: str
|
413 |
+
object: str
|
414 |
+
created: int
|
415 |
+
model: str
|
416 |
+
choices: List[Dict[str, Any]]
|
417 |
+
usage: Dict[str, int]
|
418 |
+
|
419 |
+
models = [
|
420 |
+
{"id": "meta-llama/Meta-Llama-3-70B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"},
|
421 |
+
{"id": "google/gemma-2-27b-it", "object": "model", "created": 1686935002, "owned_by": "meta"},
|
422 |
+
{"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"},
|
423 |
+
{"id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "object": "model", "created": 1686935002, "owned_by": "cognitivecomputations"},
|
424 |
+
{"id": "nvidia/Nemotron-4-340B-Instruct", "object": "model", "created": 1686935002, "owned_by": "nvidia"},
|
425 |
+
{"id": "Qwen/Qwen2-72B-Instruct", "object": "model", "created": 1686935002, "owned_by": "qwen"},
|
426 |
+
{"id": "microsoft/Phi-3-medium-4k-instruct", "object": "model", "created": 1686935002, "owned_by": "microsoft"},
|
427 |
+
{"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"},
|
428 |
+
{"id": "openchat/openchat-3.6-8b", "object": "model", "created": 1686935002, "owned_by": "unknown"},
|
429 |
+
{"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model", "created": 1686935002, "owned_by": "mistral"},
|
430 |
+
{"id": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"},
|
431 |
+
{"id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"},
|
432 |
+
{"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"},
|
433 |
+
{"id": "Qwen/Qwen2-7B-Instruct", "object": "model", "created": 1686935002, "owned_by": "Qwen"},
|
434 |
+
{"id": "meta-llama/Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}
|
435 |
+
|
436 |
+
]
|
437 |
+
|
438 |
+
@app.post("/v1/chat/completions/")
|
439 |
+
def handle_completions(completion_request: CompletionRequest):
|
440 |
+
system_prompt = next((message.content for message in completion_request.messages if message.role == 'system'), None)
|
441 |
+
user_query = next((message.content for message in completion_request.messages if message.role == 'user'), None)
|
442 |
+
|
443 |
+
response_text = generative(query=user_query, system_prompt=system_prompt, model=completion_request.model)
|
444 |
+
|
445 |
+
response = CompletionResponse(
|
446 |
+
id="chatcmpl-1",
|
447 |
+
object="chat.completion",
|
448 |
+
created=1234567890,
|
449 |
+
model=completion_request.model,
|
450 |
+
choices=[{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
|
451 |
+
usage={"prompt_tokens": sum(len(message.content.split()) for message in completion_request.messages), "total_tokens": sum(len(message.content.split()) for message in completion_request.messages) + len(response_text.split())}
|
452 |
+
)
|
453 |
+
return response
|
454 |
+
|
455 |
+
@app.get("/v1/models/")
|
456 |
+
def get_models():
|
457 |
+
return {"object": "list", "data": models}
|
458 |
+
|
459 |
+
@app.post("/v1/completions/")
|
460 |
+
def create_completion(prompt: str, model: str, best_of: int = 1, echo: bool = False, frequency_penalty: float = 0.0):
|
461 |
+
response_text = generative(prompt, "you are an helpful assistant", model)
|
462 |
+
|
463 |
+
response = {
|
464 |
+
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
|
465 |
+
"object": "text_completion",
|
466 |
+
"created": 1589478378,
|
467 |
+
"model": model,
|
468 |
+
"system_fingerprint": "fp_44709d6fcb",
|
469 |
+
"choices": [{"text": response_text, "index": 0, "logprobs": None, "finish_reason": "length"}]
|
470 |
+
}
|
471 |
+
return response
|
472 |
+
|
473 |
+
def generative(system_prompt, query, model):
|
474 |
+
llm = LLM(model=model, system_message=system_prompt)
|
475 |
+
messages = [{"role": "user", "content": query}]
|
476 |
+
response = llm.chat(messages)
|
477 |
+
return response
|
478 |
+
|
479 |
+
|
480 |
+
|
481 |
# Run the API server if this script is executed
|
482 |
if __name__ == "__main__":
|
483 |
import uvicorn
|