File size: 1,671 Bytes
591004d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!python
# -*- coding: utf-8 -*-
# @author: Kun


from models.vicuna_bin import max_token, temperature, top_p
from common import torch_gc
from global_config import lang_opt


def get_api_response(model, tokenizer, content: str, max_tokens=None):

    if "en" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.'
    elif "zh1" == lang_opt:
        system_role_content = 'You are a helpful and creative assistant for writing novel.\
                You are must always in Chinese.重要,你需要使用中文与我进行交流。'
    elif "zh2" == lang_opt:
        system_role_content = '你是写小说的好帮手,有创意的助手。'
    else:
        raise Exception(f"not supported language: {lang_opt}")

    print("===> Question:")
    print(content)
    print("<==="+"="*100)

    content = content.encode()
    tokens = model.tokenize(content)

    output = b""
    count = 0
    token_count = 10000
    top_k = 40
    repetition_penalty = 1.1
    for token in model.generate(tokens,
                                top_k=top_k,
                                top_p=top_p,
                                temp=temperature,
                                repeat_penalty=repetition_penalty):
        text = model.detokenize([token])
        # print(text)
        output += text

        count += 1
        if count >= token_count or (token == model.token_eos()):
            break

    response = output.decode()
    # print("===> [vicuna][generate] response: {}".format(response))

    torch_gc()

    print("===> Generated Text: ")
    print(response)
    print("<==="+"="*100)

    return response