File size: 2,211 Bytes
591004d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!python
# -*- coding: utf-8 -*-
# @author: Kun

from transformers import TextStreamer

from models.baichuan_hf import max_token, temperature, top_p
from common import torch_gc
from global_config import lang_opt


def get_api_response(model, tokenizer, content: str, max_tokens=None):

    if "en" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.'
    elif "zh1" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.\
                You are must always in Chinese.重要,你需要使用中文与我进行交流。'
    elif "zh2" == lang_opt:
        system_role_content = '你是写小说的好帮手,有创意的助手。'
    else:
        raise Exception(f"not supported language: {lang_opt}")

    print("===> Question:")
    print(content)
    print("<==="+"="*100)

    streamer = TextStreamer(tokenizer, 
                            skip_prompt=True,
                            skip_special_tokens=True
                            )

    # inputs = tokenizer(content, return_tensors='pt')
    inputs = tokenizer("<human>:{}\n<bot>:".format(content), return_tensors='pt')
    # inputs = inputs.to('cuda') # UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cpu') before running `.generate()`.
    inputs = inputs.to('cpu')
    generate_ids = model.generate(**inputs,
                                  max_new_tokens=max_token,
                                  top_p=top_p,
                                  temperature=temperature,
                                  repetition_penalty=1.1,
                                  streamer=streamer,
                                  )
    response = tokenizer.decode(
        generate_ids.cpu()[0], skip_special_tokens=True)

    torch_gc()

    print("===> Generated Text: ")
    print(response)
    print("<==="+"="*100)

    return response