Spaces:
Runtime error
Runtime error
File size: 1,984 Bytes
849a8db e6ae614 849a8db e6ae614 849a8db e6ae614 d625244 e6ae614 d625244 e6ae614 d625244 e6ae614 d625244 e6ae614 faa48c9 e6ae614 c456ddf d625244 e6ae614 b282f54 d625244 e6ae614 d625244 e6ae614 d625244 e6ae614 d625244 e6ae614 d625244 e6ae614 d625244 40e75ca 1f2856e 40e75ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import random
from typing import Optional
from fastapi import FastAPI
from pydantic import BaseModel
from peft import PeftModel
from transformers import LLaMATokenizer, LLaMAForCausalLM, GenerationConfig
app = FastAPI()
tokenizer = LLaMATokenizer.from_pretrained("decapoda-research/llama-7b-hf")
model = LLaMAForCausalLM.from_pretrained(
"decapoda-research/llama-7b-hf",
load_in_8bit=True,
device_map="auto",
)
model = PeftModel.from_pretrained(model, "tloen/alpaca-lora-7b")
class InputPrompt(BaseModel):
instruction: str
input: Optional[str] = None
class OutputResponse(BaseModel):
response: str
@app.post("/evaluate")
def evaluate(input_prompt: InputPrompt):
temperature = 0.9
generation_config = GenerationConfig(
temperature=temperature,
top_p=0.75,
num_beams=1, do_sample=True
)
prompt = generate_prompt(input_prompt.instruction, input_prompt.input)
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].cuda()
generation_output = model.generate(
input_ids=input_ids,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=256
)
for s in generation_output.sequences:
output = tokenizer.decode(s)
return OutputResponse(response=output.split("### Response:")[1].strip())
def generate_prompt(instruction, input=None):
if input:
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Input:
{input}
### Response:"""
else:
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:"""
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|