andyfe commited on
Commit
511231c
·
1 Parent(s): 88b6839

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -134
app.py CHANGED
@@ -8,143 +8,15 @@ from huggingface_hub import Repository, InferenceClient
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
11
- BOT_NAME = "Falcon"
12
 
13
  STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
14
 
15
- EXAMPLES = [
16
- ["Hey Falcon! Any recommendations for my holidays in Abu Dhabi?"],
17
- ["What's the Everett interpretation of quantum mechanics?"],
18
- ["Give me a list of the top 10 dive sites you would recommend around the world."],
19
- ["Can you tell me more about deep-water soloing?"],
20
- ["Can you write a short tweet about the release of our latest AI model, Falcon LLM?"]
21
- ]
22
-
23
  client = InferenceClient(
24
  API_URL,
25
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
26
  )
27
 
28
- # def format_prompt(message, history, system_prompt):
29
- # prompt = ""
30
- # if system_prompt:
31
- # prompt += f"System: {system_prompt}\n"
32
- # for user_prompt, bot_response in history:
33
- # prompt += f"User: {user_prompt}\n"
34
- # prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
35
- # prompt += f"""User: {message}
36
- # Falcon:"""
37
- # return prompt
38
-
39
- # seed = 42
40
-
41
- # def generate(
42
- # prompt, history, system_prompt="", temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
43
- # ):
44
- # temperature = float(temperature)
45
- # if temperature < 1e-2:
46
- # temperature = 1e-2
47
- # top_p = float(top_p)
48
- # global seed
49
- # generate_kwargs = dict(
50
- # temperature=temperature,
51
- # max_new_tokens=max_new_tokens,
52
- # top_p=top_p,
53
- # repetition_penalty=repetition_penalty,
54
- # stop_sequences=STOP_SEQUENCES,
55
- # do_sample=True,
56
- # seed=seed,
57
- # )
58
- # seed = seed + 1
59
- # formatted_prompt = format_prompt(prompt, history, system_prompt)
60
-
61
- # stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
62
- # output = ""
63
-
64
- # for response in stream:
65
- # output += response.token.text
66
-
67
- # for stop_str in STOP_SEQUENCES:
68
- # if output.endswith(stop_str):
69
- # output = output[:-len(stop_str)]
70
- # output = output.rstrip()
71
- # yield output
72
- # yield output
73
- # return output
74
-
75
-
76
- # additional_inputs=[
77
- # gr.Textbox("", label="Optional system prompt"),
78
- # gr.Slider(
79
- # label="Temperature",
80
- # value=0.9,
81
- # minimum=0.0,
82
- # maximum=1.0,
83
- # step=0.05,
84
- # interactive=True,
85
- # info="Higher values produce more diverse outputs",
86
- # ),
87
- # gr.Slider(
88
- # label="Max new tokens",
89
- # value=256,
90
- # minimum=0,
91
- # maximum=8192,
92
- # step=64,
93
- # interactive=True,
94
- # info="The maximum numbers of new tokens",
95
- # ),
96
- # gr.Slider(
97
- # label="Top-p (nucleus sampling)",
98
- # value=0.90,
99
- # minimum=0.0,
100
- # maximum=1,
101
- # step=0.05,
102
- # interactive=True,
103
- # info="Higher values sample more low-probability tokens",
104
- # ),
105
- # gr.Slider(
106
- # label="Repetition penalty",
107
- # value=1.2,
108
- # minimum=1.0,
109
- # maximum=2.0,
110
- # step=0.05,
111
- # interactive=True,
112
- # info="Penalize repeated tokens",
113
- # )
114
- # ]
115
-
116
-
117
- # with gr.Blocks() as demo:
118
- # with gr.Row():
119
- # with gr.Column(scale=0.4):
120
- # gr.Image("better_banner.jpeg", elem_id="banner-image", show_label=False)
121
- # with gr.Column():
122
- # gr.Markdown(
123
- # """# Falcon-180B Demo
124
-
125
- # **Chat with [Falcon-180B-Chat](https://huggingface.co/tiiuae/falcon-180b-chat), brainstorm ideas, discuss your holiday plans, and more!**
126
-
127
- # ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) and finetuned on a mixture of [Ultrachat](https://huggingface.co/datasets/stingning/ultrachat), [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus) and [Airoboros](https://huggingface.co/datasets/jondurbin/airoboros-2.1). [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b) is a state-of-the-art large language model built by the [Technology Innovation Institute](https://www.tii.ae) in Abu Dhabi. It is trained on 3.5 trillion tokens (including [RefinedWeb](https://huggingface.co/datasets/tiiuae/falcon-refinedweb)) and available under the [Falcon-180B TII License](https://huggingface.co/spaces/tiiuae/falcon-180b-license/blob/main/LICENSE.txt). It currently holds the 🥇 1st place on the [🤗 Open LLM leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) for a pretrained model.
128
-
129
- # 🧪 This is only a **first experimental preview**: we intend to provide increasingly capable versions of Falcon in the future, based on improved datasets and RLHF/RLAIF.
130
-
131
- # 👀 **Learn more about Falcon LLM:** [falconllm.tii.ae](https://falconllm.tii.ae/)
132
-
133
- # ➡️️ **Intended Use**: this demo is intended to showcase an early finetuning of [Falcon-180B](https://huggingface.co/tiiuae/falcon-180b), to illustrate the impact (and limitations) of finetuning on a dataset of conversations and instructions. We encourage the community to further build upon the base model, and to create even better instruct/chat versions!
134
-
135
- # ⚠️ **Limitations**: the model can and will produce factually incorrect information, hallucinating facts and actions. As it has not undergone any advanced tuning/alignment, it can produce problematic outputs, especially if prompted to do so. Finally, this demo is limited to a session length of about 1,000 words.
136
- # """
137
- # )
138
-
139
- # gr.ChatInterface(
140
- # generate,
141
- # examples=EXAMPLES,
142
- # additional_inputs=additional_inputs,
143
- # )
144
-
145
- #demo.launch(show_api=True, share=True)
146
- #demo.queue(concurrency_count=100, api_open=False).launch(show_api=True)
147
-
148
  def query(system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
149
  print(temperature, max_new_tokens, top_p, repetition_penalty)
150
  seed = 42
@@ -158,10 +30,6 @@ def query(system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p
158
  seed=seed,
159
  )
160
  prompt = f"System: {system_prompt}\nUser: {user_prompt}\nFalcon:"
161
- print(prompt)
162
- print('-----')
163
- #output = client.text_generation(prompt, **generate_kwargs, details=True, return_full_text=False)
164
- #print(output)
165
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
166
  output = ""
167
  for response in stream:
@@ -173,7 +41,6 @@ def query(system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p
173
  output = output.rstrip()
174
  #yield output
175
  #yield output
176
- print(output)
177
  return output
178
 
179
  iface = gr.Interface(
 
8
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
10
  API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
11
+ BOT_NAME = "Medic"
12
 
13
  STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
14
 
 
 
 
 
 
 
 
 
15
  client = InferenceClient(
16
  API_URL,
17
  headers={"Authorization": f"Bearer {HF_TOKEN}"},
18
  )
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def query(system_prompt, user_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
21
  print(temperature, max_new_tokens, top_p, repetition_penalty)
22
  seed = 42
 
30
  seed=seed,
31
  )
32
  prompt = f"System: {system_prompt}\nUser: {user_prompt}\nFalcon:"
 
 
 
 
33
  stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
34
  output = ""
35
  for response in stream:
 
41
  output = output.rstrip()
42
  #yield output
43
  #yield output
 
44
  return output
45
 
46
  iface = gr.Interface(