File size: 1,662 Bytes
2177bd6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c5f382
866a6fe
0e27985
 
 
 
 
 
 
 
 
 
1c5f382
59fbd32
 
 
1c5f382
d8d22a8
 
 
 
 
 
 
 
 
 
 
59fbd32
 
0e27985
551328e
 
9cefaa1
95bd41e
551328e
 
 
 
 
620d0d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import json
import os
import shutil
import requests

import gradio as gr
from huggingface_hub import Repository, InferenceClient

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"

STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]

client = InferenceClient(
    API_URL,
    headers={"Authorization": f"Bearer {HF_TOKEN}"},
)

def query(bot_name, system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    print(temperature, max_new_tokens, top_p, repetition_penalty)
    seed = 42
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        stop_sequences=STOP_SEQUENCES,
        do_sample=True,
        seed=seed,
    )
    print(bot_name)
    print(system_prompt)
    print(user_prompt)
    print('-' * 20)
    prompt = f"System: {system_prompt}\nUser: {user_prompt}\n{bot_name}: "
    stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text

        for stop_str in STOP_SEQUENCES:
            if output.endswith(stop_str):
                output = output[:-len(stop_str)]
                output = output.rstrip()
                #yield output
        #yield output
    print(output)
    print('-' * 20)
    return output

iface = gr.Interface(
    query,
    inputs=["text","text","text","text","text","text","text"],
    outputs="text",
)


iface.queue()
iface.launch()