fastapi-mixtral-46.7b

Sleeping

App Files Files Community

OjciecTadeusz commited on Nov 19, 2024

Commit

defc45e

verified ·

1 Parent(s): e5928ae

Update main.py

Browse files

Files changed (1) hide show

main.py +36 -159

main.py CHANGED Viewed

@@ -41,13 +41,46 @@ def generate(item: Item):
     )
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=item.stream, details=item.details, return_full_text=item.return_full_text)
-    output = ""
     for response in stream:
-        output += response.token.text
     return output
 @app.post("/generate/")
 async def generate_text(item: Item):
     try:
@@ -56,162 +89,6 @@ async def generate_text(item: Item):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-# from fastapi import FastAPI, HTTPException, Depends
-# from fastapi.security.api_key import APIKeyHeader
-# from pydantic import BaseModel
-# from huggingface_hub import InferenceClient, HfApi
-# from typing import List, Optional
-# import os
-# from dotenv import load_dotenv
-# # Load environment variables
-# load_dotenv()
-# # Initialize FastAPI app
-# app = FastAPI()
-# # Get HuggingFace token from environment variable
-# HF_TOKEN = os.getenv("HF_TOKEN")
-# if not HF_TOKEN:
-#     raise ValueError("HF_TOKEN environment variable is not set")
-# # Setup API key authorization
-# API_KEY_NAME = "Authorization"
-# api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=True)
-# # Initialize HuggingFace client
-# try:
-#     client = InferenceClient(
-#         "mistralai/Mixtral-8x7B-Instruct-v0.1",
-#         token=HF_TOKEN
-#     )
-#     # Verify token is valid
-#     hf_api = HfApi(token=HF_TOKEN)
-#     hf_api.whoami()
-# except Exception as e:
-#     raise ValueError(f"Failed to initialize HuggingFace client: {str(e)}")
-# class ChatMessage(BaseModel):
-#     role: str
-#     content: str
-# class GenerationRequest(BaseModel):
-#     prompt: str
-#     message: Optional[str] = None
-#     system_message: Optional[str] = None
-#     history: Optional[List[ChatMessage]] = None
-#     temperature: Optional[float] = 0.7
-#     top_p: Optional[float] = 0.95
-# def format_prompt(message: str, history: List[ChatMessage] = None, system_message: str = None) -> str:
-#     prompt = ""
-#     if system_message:
-#         prompt += f"<s>[INST] {system_message} [/INST]</s>"
-#     if history:
-#         for msg in history:
-#             if msg.role == "user":
-#                 prompt += f"<s>[INST] {msg.content} [/INST]"
-#             else:
-#                 prompt += f" {msg.content}</s>"
-#     prompt += f"<s>[INST] {message} [/INST]"
-#     return prompt
-# async def verify_token(api_key_header: str = Depends(api_key_header)):
-#     if not api_key_header.startswith("Bearer "):
-#         raise HTTPException(
-#             status_code=401,
-#             detail="Bearer token missing"
-#         )
-#     token = api_key_header.replace("Bearer ", "")
-#     if token != HF_TOKEN:
-#         raise HTTPException(
-#             status_code=401,
-#             detail="Invalid authentication credentials"
-#         )
-#     return token
-# @app.post("/generate/")
-# async def generate_text(
-#     request: GenerationRequest,
-#     token: str = Depends(verify_token)
-# ):
-#     try:
-#         message = request.prompt if request.prompt else request.message
-#         if not message:
-#             return [
-#                 {
-#                     "msg": "MSG!"
-#                 }
-#             ]
-#         formatted_prompt = format_prompt(
-#             message=message,
-#             history=request.history,
-#             system_message=request.system_message
-#         )
-#         response = client.text_generation(
-#             formatted_prompt,
-#             temperature=max(request.temperature, 0.01),
-#             top_p=request.top_p,
-#             max_new_tokens=1048,
-#             do_sample=True,
-#             return_full_text=False
-#         )
-#         if not response:
-#             return [
-#                 {
-#                     "detail": [
-#                         {
-#                             # "type": "server_error",
-#                             "loc": ["server"],
-#                             "msg": "No response received from model",
-#                             "input": None
-#                         }
-#                     ]
-#                 }
-#             ]
-#         # Construct the custom JSON response
-#         return [
-#             {
-#                 "msg": response
-#                 # "msg": [
-#                 #     {
-#                 #         # "type": "success",
-#                 #         # "loc":[
-#                 #         #    "body",
-#                 #         #    "prompt"
-#                 #         # ],
-#                 #         # "loc": ["body"],
-#                 #         # "msg": [
-#                 #         #     response,
-#                 #         #     formatted_prompt
-#                 #         # ],
-#                 #     }
-#                 # ]
-#             }
-#         ]
-#     except Exception as e:
-#         return [
-#             {
-#                 "detail": [
-#                     {
-#                         "type": "server_error",
-#                         "loc": ["server"],
-#                         "msg": f"Error generating response: {str(e)}",
-#                         "input": None
-#                     }
-#                 ]
-#             }
-#         ]
 # @app.get("/health")
 # async def health_check():
 #     return {

     )
     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
+    stream = client.text_generation(
+        formatted_prompt,
+        **generate_kwargs,
+        stream=item.stream,
+        details=item.details,
+        return_full_text=item.return_full_text
+    )
+    output = ""
     for response in stream:
+        # Check if response has the attribute 'token'
+        if hasattr(response, 'token'):
+            output += response.token.text
+        else:
+            output += response  # If not, treat it as a string
     return output
+# def generate(item: Item):
+#     temperature = float(item.temperature)
+#     if temperature < 1e-2:
+#         temperature = 1e-2
+#     top_p = float(item.top_p)
+#     generate_kwargs = dict(
+#         temperature=temperature,
+#         max_new_tokens=1048,
+#         top_p=top_p,
+#         repetition_penalty=1.0,
+#         do_sample=True,
+#         seed=42,
+#     )
+#     formatted_prompt = format_prompt(f"{item.system_prompt}, {item.prompt}", item.history)
+#     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=item.stream, details=item.details, return_full_text=item.return_full_text)
+#     output = ""
+#     for response in stream:
+#         output += response.token.text
+#     return output
 @app.post("/generate/")
 async def generate_text(item: Item):
     try:
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 # @app.get("/health")
 # async def health_check():
 #     return {