File size: 1,328 Bytes
bf5c1c9
 
 
 
 
a89ce24
bf5c1c9
207c16a
5517f9c
 
bf5c1c9
1d91f3d
66ed14b
6b6d0ef
7a4525a
 
 
 
6b6d0ef
 
7a4525a
207c16a
 
 
bf5c1c9
 
 
5517f9c
bf5c1c9
 
 
c36c2b7
6e0a07a
bf5c1c9
 
 
207c16a
9920987
207c16a
9920987
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from fastapi import FastAPI

import os

from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import torch

app = FastAPI()
name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
customGen = False

# microsoft/DialoGPT-small
# microsoft/DialoGPT-medium
# microsoft/DialoGPT-large

# mistralai/Mixtral-8x7B-Instruct-v0.1

# Load the Hugging Face GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

class req(BaseModel):
  prompt: str
  length: int

@app.get("/")
def read_root():
  return FileResponse(path="templates/index.html", media_type="text/html")

@app.post("/api")
def read_root(data: req):
  print("Prompt:", data.prompt)
  print("Length:", data.length)

  input_text = data.prompt

  # Tokenize the input text
  input_ids = tokenizer.encode(input_text, return_tensors="pt")
  
  # Generate output using the model
  output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
  generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
  
  answer_data = { "answer": generated_text }
  print("Answer:", generated_text)
  
  return answer_data