Do0rMaMu commited on
Commit
2b25d5e
·
verified ·
1 Parent(s): 5635287

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +24 -0
main.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM
2
+ from fastapi import FastAPI, Form
3
+ from pydantic import BaseModel
4
+
5
+ # Model loading
6
+ llm = AutoModelForCausalLM.from_pretrained(
7
+ "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
8
+ max_new_tokens=1096,
9
+ threads=3,
10
+ )
11
+
12
+ # Pydantic object
13
+ class Validation(BaseModel):
14
+ user_prompt: str # User's prompt
15
+ system_prompt: str # System's instruction
16
+
17
+ # FastAPI application
18
+ app = FastAPI()
19
+
20
+ # Endpoint for generating responses
21
+ @app.post("/generate_response")
22
+ async def generate_response(item: Validation):
23
+ prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n {item.system_prompt}<|eot_id|> \n <|start_header_id|>user<|end_header_id|>{item.user_prompt} <|eot_id|><|start_header_id|>{assistant}<|end_header_id|>"
24
+ return llm.generate(prompt, do_sample=True) # Adjusted to include the generation method with do_sample if needed