textgeneration / question_paper.py
Yash Sachdeva
smallest llama model
38fc296
raw
history blame
749 Bytes
import transformers
import torch
import os
from fastapi import FastAPI
from llama_cpp import Llama
app = FastAPI()
@app.get("/")
def llama():
llm = Llama(
model_path="./llama-2-7b-chat.Q2_K.gguf"
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
)
output = llm(
"Q: Name the planets in the solar system? A: ", # Prompt
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion
return output["choices"][0]["text"].strip()