andyfe commited on
Commit
551328e
·
1 Parent(s): 459810e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -111
app.py CHANGED
@@ -25,122 +25,136 @@ client = InferenceClient(
25
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
26
  )
27
 
28
- def format_prompt(message, history, system_prompt):
29
- prompt = ""
30
- if system_prompt:
31
- prompt += f"System: {system_prompt}\n"
32
- for user_prompt, bot_response in history:
33
- prompt += f"User: {user_prompt}\n"
34
- prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
35
- prompt += f"""User: {message}
36
- Falcon:"""
37
- return prompt
38
-
39
- seed = 42
40
-
41
- def generate(
42
- prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
43
- ):
44
- temperature = float(temperature)
45
- if temperature < 1e-2:
46
- temperature = 1e-2
47
- top_p = float(top_p)
48
- global seed
49
- generate_kwargs = dict(
50
- temperature=temperature,
51
- max_new_tokens=max_new_tokens,
52
- top_p=top_p,
53
- repetition_penalty=repetition_penalty,
54
- stop_sequences=STOP_SEQUENCES,
55
- do_sample=True,
56
- seed=seed,
57
- )
58
- seed = seed + 1
59
- formatted_prompt = format_prompt(prompt, history, system_prompt)
60
-
61
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
62
- output = ""
63
-
64
- for response in stream:
65
- output += response.token.text
66
-
67
- for stop_str in STOP_SEQUENCES:
68
- if output.endswith(stop_str):
69
- output = output[:-len(stop_str)]
70
- output = output.rstrip()
71
- yield output
72
- yield output
73
- return output
74
-
75
-
76
- additional_inputs=[
77
- gr.Textbox("", label="Optional system prompt"),
78
- gr.Slider(
79
- label="Temperature",
80
- value=0.9,
81
- minimum=0.0,
82
- maximum=1.0,
83
- step=0.05,
84
- interactive=True,
85
- info="Higher values produce more diverse outputs",
86
- ),
87
- gr.Slider(
88
- label="Max new tokens",
89
- value=256,
90
- minimum=0,
91
- maximum=8192,
92
- step=64,
93
- interactive=True,
94
- info="The maximum numbers of new tokens",
95
- ),
96
- gr.Slider(
97
- label="Top-p (nucleus sampling)",
98
- value=0.90,
99
- minimum=0.0,
100
- maximum=1,
101
- step=0.05,
102
- interactive=True,
103
- info="Higher values sample more low-probability tokens",
104
- ),
105
- gr.Slider(
106
- label="Repetition penalty",
107
- value=1.2,
108
- minimum=1.0,
109
- maximum=2.0,
110
- step=0.05,
111
- interactive=True,
112
- info="Penalize repeated tokens",
113
- )
114
- ]
115
-
116
-
117
- with gr.Blocks() as demo:
118
- with gr.Row():
119
- with gr.Column(scale=0.4):
120
- gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
121
- with gr.Column():
122
- gr.Markdown(
123
- """# Falcon-180B Demo
124
-
125
- **Chat with [Falcon-180B-Chat](https://huggingface.co/tiiuae/falcon-180b-chat), brainstorm ideas, discuss your holiday plans, and more!**
126
 
127
- ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) and finetuned on a mixture of [Ultrachat](https://huggingface.co/datasets/stingning/ultrachat), [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) and [Airoboros](https://huggingface.co/datasets/jondurbin/airoboros-2.1). [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 3.5 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the [Falcon-180B TII License](https://huggingface.co/spaces/tiiuae/falcon-180b-license/blob/main/LICENSE.txt). It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) for a pretrained model.
128
 
129
- 🧪 This is only a **first experimental preview**: we intend to provide increasingly capable versions of Falcon in the future, based on improved datasets and RLHF/RLAIF.
130
 
131
- 👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
132
 
133
- ➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions!
134
 
135
- ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
136
- """
137
- )
138
 
139
- gr.ChatInterface(
140
- generate,
141
- examples=EXAMPLES,
142
- additional_inputs=additional_inputs,
143
- )
144
 
145
- demo.launch(show_api=True, share=True)
146
  #demo.queue(concurrency_count=100, api_open=False).launch(show_api=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
26
  )
27
 
28
+ # def format_prompt(message, history, system_prompt):
29
+ # prompt = ""
30
+ # if system_prompt:
31
+ # prompt += f"System: {system_prompt}\n"
32
+ # for user_prompt, bot_response in history:
33
+ # prompt += f"User: {user_prompt}\n"
34
+ # prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
35
+ # prompt += f"""User: {message}
36
+ # Falcon:"""
37
+ # return prompt
38
+
39
+ # seed = 42
40
+
41
+ # def generate(
42
+ # prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
43
+ # ):
44
+ # temperature = float(temperature)
45
+ # if temperature < 1e-2:
46
+ # temperature = 1e-2
47
+ # top_p = float(top_p)
48
+ # global seed
49
+ # generate_kwargs = dict(
50
+ # temperature=temperature,
51
+ # max_new_tokens=max_new_tokens,
52
+ # top_p=top_p,
53
+ # repetition_penalty=repetition_penalty,
54
+ # stop_sequences=STOP_SEQUENCES,
55
+ # do_sample=True,
56
+ # seed=seed,
57
+ # )
58
+ # seed = seed + 1
59
+ # formatted_prompt = format_prompt(prompt, history, system_prompt)
60
+
61
+ # stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
62
+ # output = ""
63
+
64
+ # for response in stream:
65
+ # output += response.token.text
66
+
67
+ # for stop_str in STOP_SEQUENCES:
68
+ # if output.endswith(stop_str):
69
+ # output = output[:-len(stop_str)]
70
+ # output = output.rstrip()
71
+ # yield output
72
+ # yield output
73
+ # return output
74
+
75
+
76
+ # additional_inputs=[
77
+ # gr.Textbox("", label="Optional system prompt"),
78
+ # gr.Slider(
79
+ # label="Temperature",
80
+ # value=0.9,
81
+ # minimum=0.0,
82
+ # maximum=1.0,
83
+ # step=0.05,
84
+ # interactive=True,
85
+ # info="Higher values produce more diverse outputs",
86
+ # ),
87
+ # gr.Slider(
88
+ # label="Max new tokens",
89
+ # value=256,
90
+ # minimum=0,
91
+ # maximum=8192,
92
+ # step=64,
93
+ # interactive=True,
94
+ # info="The maximum numbers of new tokens",
95
+ # ),
96
+ # gr.Slider(
97
+ # label="Top-p (nucleus sampling)",
98
+ # value=0.90,
99
+ # minimum=0.0,
100
+ # maximum=1,
101
+ # step=0.05,
102
+ # interactive=True,
103
+ # info="Higher values sample more low-probability tokens",
104
+ # ),
105
+ # gr.Slider(
106
+ # label="Repetition penalty",
107
+ # value=1.2,
108
+ # minimum=1.0,
109
+ # maximum=2.0,
110
+ # step=0.05,
111
+ # interactive=True,
112
+ # info="Penalize repeated tokens",
113
+ # )
114
+ # ]
115
+
116
+
117
+ # with gr.Blocks() as demo:
118
+ # with gr.Row():
119
+ # with gr.Column(scale=0.4):
120
+ # gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
121
+ # with gr.Column():
122
+ # gr.Markdown(
123
+ # """# Falcon-180B Demo
124
+
125
+ # **Chat with [Falcon-180B-Chat](https://huggingface.co/tiiuae/falcon-180b-chat), brainstorm ideas, discuss your holiday plans, and more!**
126
 
127
+ # ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) and finetuned on a mixture of [Ultrachat](https://huggingface.co/datasets/stingning/ultrachat), [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) and [Airoboros](https://huggingface.co/datasets/jondurbin/airoboros-2.1). [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 3.5 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the [Falcon-180B TII License](https://huggingface.co/spaces/tiiuae/falcon-180b-license/blob/main/LICENSE.txt). It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) for a pretrained model.
128
 
129
+ # 🧪 This is only a **first experimental preview**: we intend to provide increasingly capable versions of Falcon in the future, based on improved datasets and RLHF/RLAIF.
130
 
131
+ # 👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
132
 
133
+ # ➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions!
134
 
135
+ # ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
136
+ # """
137
+ # )
138
 
139
+ # gr.ChatInterface(
140
+ # generate,
141
+ # examples=EXAMPLES,
142
+ # additional_inputs=additional_inputs,
143
+ # )
144
 
145
+ #demo.launch(show_api=True, share=True)
146
  #demo.queue(concurrency_count=100, api_open=False).launch(show_api=True)
147
+
148
+ def query(text):
149
+ print(text)
150
+ return text
151
+
152
+ iface = gr.Interface(
153
+ transcribe,
154
+ inputs=["text"],
155
+ outputs="text",
156
+ )
157
+
158
+
159
+ iface.queue()
160
+ iface.launch()