Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from huggingface_hub import InferenceClient | |
app = FastAPI() | |
# Define the API URL for the model | |
API_URL = "https://api-inference.huggingface.co/models/meta-llama/CodeLlama-13b-hf" | |
# Define the request model | |
class GenerateRequest(BaseModel): | |
prompt: str | |
instructions: str | |
api_key: str | |
def format_prompt(message, custom_instructions=None): | |
""" | |
Format the prompt with optional custom instructions. | |
""" | |
prompt = "" | |
if custom_instructions: | |
prompt += f"{custom_instructions}\n" | |
prompt += f"{message}" | |
return prompt | |
def CodeLlama(prompt, instructions, api, temperature=0.1, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.0): | |
""" | |
Generate text using the CodeLlama model with specified parameters. | |
""" | |
try: | |
# Ensure parameters are valid | |
temperature = float(temperature) | |
if temperature < 1e-2: | |
temperature = 1e-2 | |
top_p = float(top_p) | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_new_tokens, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
seed=69, | |
) | |
# Format the prompt | |
formatted_prompt = format_prompt(prompt, instructions) | |
# Initialize the client and make the request | |
head = {"Authorization": f"Bearer {api}"} | |
client = InferenceClient(API_URL, headers=head) | |
response = client.text_generation(formatted_prompt, **generate_kwargs) | |
return response | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}") | |
async def generate_text(request: GenerateRequest): | |
""" | |
FastAPI endpoint to generate text based on the input request. | |
""" | |
if not request.prompt or not request.instructions or not request.api_key: | |
raise HTTPException(status_code=400, detail="Missing required fields") | |
try: | |
response = CodeLlama(request.prompt, request.instructions, request.api_key) | |
return {"response": response} | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=f"Error: {str(e)}") | |