Spaces:

BeveledCube
/

bevelapi

Sleeping

File size: 1,328 Bytes

bf5c1c9
 
 
 
 
a89ce24
bf5c1c9
207c16a
5517f9c
 
bf5c1c9
1d91f3d
66ed14b
6b6d0ef
7a4525a
 
 
 
6b6d0ef
 
7a4525a
207c16a
 
 
bf5c1c9
 
 
5517f9c
bf5c1c9
 
 
c36c2b7
6e0a07a
bf5c1c9
 
 
207c16a
9920987
207c16a
9920987

from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from fastapi import FastAPI

import os

from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import torch

app = FastAPI()
name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
customGen = False

# microsoft/DialoGPT-small
# microsoft/DialoGPT-medium
# microsoft/DialoGPT-large

# mistralai/Mixtral-8x7B-Instruct-v0.1

# Load the Hugging Face GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

class req(BaseModel):
  prompt: str
  length: int

@app.get("/")
def read_root():
  return FileResponse(path="templates/index.html", media_type="text/html")

@app.post("/api")
def read_root(data: req):
  print("Prompt:", data.prompt)
  print("Length:", data.length)

  input_text = data.prompt

  # Tokenize the input text
  input_ids = tokenizer.encode(input_text, return_tensors="pt")
  
  # Generate output using the model
  output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
  generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
  
  answer_data = { "answer": generated_text }
  print("Answer:", generated_text)
  
  return answer_data