from fastapi import FastAPI, HTTPException from pydantic import BaseModel from huggingface_hub import InferenceClient app = FastAPI() # Define the API URL for the model API_URL = "https://api-inference.huggingface.co/models/meta-llama/CodeLlama-13b-hf" # Define the request model class GenerateRequest(BaseModel): prompt: str instructions: str api_key: str def format_prompt(message, custom_instructions=None): """ Format the prompt with optional custom instructions. """ prompt = "" if custom_instructions: prompt += f"{custom_instructions}\n" prompt += f"{message}" return prompt def CodeLlama(prompt, instructions, api, temperature=0.1, max_new_tokens=2000, top_p=0.95, repetition_penalty=1.0): """ Generate text using the CodeLlama model with specified parameters. """ try: # Ensure parameters are valid temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=69, ) # Format the prompt formatted_prompt = format_prompt(prompt, instructions) # Initialize the client and make the request head = {"Authorization": f"Bearer {api}"} client = InferenceClient(API_URL, headers=head) response = client.text_generation(formatted_prompt, **generate_kwargs) return response except Exception as e: raise HTTPException(status_code=500, detail=f"Error: {str(e)}") @app.post("/generate-text") async def generate_text(request: GenerateRequest): """ FastAPI endpoint to generate text based on the input request. """ if not request.prompt or not request.instructions or not request.api_key: raise HTTPException(status_code=400, detail="Missing required fields") try: response = CodeLlama(request.prompt, request.instructions, request.api_key) return {"response": response} except Exception as e: raise HTTPException(status_code=500, detail=f"Error: {str(e)}")