File size: 749 Bytes
9bf2007
 
bff48c8
dcd2d54
3e6fc0f
9bf2007
e48a0c0
 
e5e2748
3e6fc0f
9bf2007
e48a0c0
38fc296
e48a0c0
 
 
e5e2748
9bf2007
e48a0c0
 
 
 
 
a0347c2
52de7f3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import transformers
import torch
import os

from fastapi import FastAPI

from llama_cpp import Llama

app = FastAPI()
@app.get("/")
def llama():
    llm = Llama(
      model_path="./llama-2-7b-chat.Q2_K.gguf"
      # n_gpu_layers=-1, # Uncomment to use GPU acceleration
      # seed=1337, # Uncomment to set a specific seed
      # n_ctx=2048, # Uncomment to increase the context window
    )

    output = llm(
      "Q: Name the planets in the solar system? A: ", # Prompt
      max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
      echo=True # Echo the prompt back in the output
    ) # Generate a completion, can also call create_completion

    return output["choices"][0]["text"].strip()