pseudotensor commited on
Commit
80d4e55
·
1 Parent(s): 1b1628c

Update with h2oGPT hash 8fc21162cdbe751ad32abb13f4e15e090d7af7ce

Browse files
Files changed (2) hide show
  1. app.py +49 -39
  2. client_test.py +56 -28
app.py CHANGED
@@ -83,6 +83,7 @@ def main(
83
  # set to True to load --base_model after client logs in,
84
  # to be able to free GPU memory when model is swapped
85
  login_mode_if_model0: bool = False,
 
86
 
87
  sanitize_user_prompt: bool = True,
88
  sanitize_bot_response: bool = True,
@@ -116,6 +117,9 @@ def main(
116
  # must override share if in spaces
117
  share = False
118
  save_dir = os.getenv('SAVE_DIR', save_dir)
 
 
 
119
 
120
  # get defaults
121
  model_lower = base_model.lower()
@@ -726,12 +730,12 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
726
  placeholder=kwargs['placeholder_input'])
727
  submit_nochat = gr.Button("Submit")
728
  flag_btn_nochat = gr.Button("Flag")
729
- if kwargs['score_model']:
730
- if not kwargs['auto_score']:
731
- with gr.Column():
732
- score_btn_nochat = gr.Button("Score last prompt & response")
733
- score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
734
- else:
735
  score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
736
  col_chat = gr.Column(visible=kwargs['chat'])
737
  with col_chat:
@@ -751,19 +755,19 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
751
  with gr.Row():
752
  clear = gr.Button("New Conversation")
753
  flag_btn = gr.Button("Flag")
754
- if kwargs['score_model']:
755
- if not kwargs['auto_score']: # FIXME: For checkbox model2
756
- with gr.Column():
757
- with gr.Row():
758
- score_btn = gr.Button("Score last prompt & response").style(
759
- full_width=False, size='sm')
760
- score_text = gr.Textbox("Response Score: NA", show_label=False)
761
- score_res2 = gr.Row(visible=False)
762
- with score_res2:
763
- score_btn2 = gr.Button("Score last prompt & response 2").style(
764
- full_width=False, size='sm')
765
- score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
766
- else:
767
  score_text = gr.Textbox("Response Score: NA", show_label=False)
768
  score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
769
  retry = gr.Button("Regenerate")
@@ -942,7 +946,6 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
942
  fun = partial(evaluate,
943
  **kwargs_evaluate)
944
  fun2 = partial(evaluate,
945
- model_state2,
946
  **kwargs_evaluate)
947
 
948
  dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
@@ -1042,25 +1045,31 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1042
  os.environ['TOKENIZERS_PARALLELISM'] = 'true'
1043
  return 'Response Score: {:.1%}'.format(score)
1044
 
 
 
1045
  if kwargs['score_model']:
1046
- score_args = dict(fn=score_last_response,
1047
- inputs=inputs_list + [text_output],
1048
- outputs=[score_text],
1049
- )
1050
- score_args2 = dict(fn=partial(score_last_response, model2=True),
1051
- inputs=inputs_list + [text_output2],
1052
- outputs=[score_text2],
1053
- )
 
 
 
 
1054
 
1055
- score_args_nochat = dict(fn=partial(score_last_response, nochat=True),
1056
- inputs=inputs_list + [text_output_nochat],
1057
- outputs=[score_text_nochat],
1058
- )
1059
- if not kwargs['auto_score']:
1060
- score_event = score_btn.click(**score_args, queue=stream_output, api_name='score') \
1061
- .then(**score_args2, queue=stream_output, api_name='score2')
1062
- score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
1063
- api_name='score_nochat')
1064
 
1065
  def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
1066
  """
@@ -1416,14 +1425,15 @@ body.dark{background:linear-gradient(#0d0d0d,#333333);}"""
1416
  stop_btn.click(lambda: None, None, None,
1417
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
1418
  queue=False, api_name='stop').then(clear_torch_cache)
1419
- demo.load(None,None,None,_js=dark_js)
1420
 
1421
  demo.queue(concurrency_count=1)
1422
  favicon_path = "h2o-logo.svg"
1423
  demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
1424
  favicon_path=favicon_path, prevent_thread_lock=True) # , enable_queue=True)
1425
  print("Started GUI", flush=True)
1426
- demo.block_thread()
 
1427
 
1428
 
1429
  input_args_list = ['model_state']
 
83
  # set to True to load --base_model after client logs in,
84
  # to be able to free GPU memory when model is swapped
85
  login_mode_if_model0: bool = False,
86
+ block_gradio_exit: bool = True,
87
 
88
  sanitize_user_prompt: bool = True,
89
  sanitize_bot_response: bool = True,
 
117
  # must override share if in spaces
118
  share = False
119
  save_dir = os.getenv('SAVE_DIR', save_dir)
120
+ score_model = os.getenv('SCORE_MODEL', score_model)
121
+ if score_model == 'None':
122
+ score_model = ''
123
 
124
  # get defaults
125
  model_lower = base_model.lower()
 
730
  placeholder=kwargs['placeholder_input'])
731
  submit_nochat = gr.Button("Submit")
732
  flag_btn_nochat = gr.Button("Flag")
733
+ if not kwargs['auto_score']:
734
+ with gr.Column(visible=kwargs['score_model']):
735
+ score_btn_nochat = gr.Button("Score last prompt & response")
736
+ score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
737
+ else:
738
+ with gr.Column(visible=kwargs['score_model']):
739
  score_text_nochat = gr.Textbox("Response Score: NA", show_label=False)
740
  col_chat = gr.Column(visible=kwargs['chat'])
741
  with col_chat:
 
755
  with gr.Row():
756
  clear = gr.Button("New Conversation")
757
  flag_btn = gr.Button("Flag")
758
+ if not kwargs['auto_score']: # FIXME: For checkbox model2
759
+ with gr.Column(visible=kwargs['score_model']):
760
+ with gr.Row():
761
+ score_btn = gr.Button("Score last prompt & response").style(
762
+ full_width=False, size='sm')
763
+ score_text = gr.Textbox("Response Score: NA", show_label=False)
764
+ score_res2 = gr.Row(visible=False)
765
+ with score_res2:
766
+ score_btn2 = gr.Button("Score last prompt & response 2").style(
767
+ full_width=False, size='sm')
768
+ score_text2 = gr.Textbox("Response Score2: NA", show_label=False)
769
+ else:
770
+ with gr.Column(visible=kwargs['score_model']):
771
  score_text = gr.Textbox("Response Score: NA", show_label=False)
772
  score_text2 = gr.Textbox("Response Score2: NA", show_label=False, visible=False)
773
  retry = gr.Button("Regenerate")
 
946
  fun = partial(evaluate,
947
  **kwargs_evaluate)
948
  fun2 = partial(evaluate,
 
949
  **kwargs_evaluate)
950
 
951
  dark_mode_btn = gr.Button("Dark Mode", variant="primary").style(
 
1045
  os.environ['TOKENIZERS_PARALLELISM'] = 'true'
1046
  return 'Response Score: {:.1%}'.format(score)
1047
 
1048
+ def noop_score_last_response(*args, **kwargs):
1049
+ return "Response Score: Disabled"
1050
  if kwargs['score_model']:
1051
+ score_fun = score_last_response
1052
+ else:
1053
+ score_fun = noop_score_last_response
1054
+
1055
+ score_args = dict(fn=score_fun,
1056
+ inputs=inputs_list + [text_output],
1057
+ outputs=[score_text],
1058
+ )
1059
+ score_args2 = dict(fn=partial(score_fun, model2=True),
1060
+ inputs=inputs_list + [text_output2],
1061
+ outputs=[score_text2],
1062
+ )
1063
 
1064
+ score_args_nochat = dict(fn=partial(score_fun, nochat=True),
1065
+ inputs=inputs_list + [text_output_nochat],
1066
+ outputs=[score_text_nochat],
1067
+ )
1068
+ if not kwargs['auto_score']:
1069
+ score_event = score_btn.click(**score_args, queue=stream_output, api_name='score') \
1070
+ .then(**score_args2, queue=stream_output, api_name='score2')
1071
+ score_event_nochat = score_btn_nochat.click(**score_args_nochat, queue=stream_output,
1072
+ api_name='score_nochat')
1073
 
1074
  def user(*args, undo=False, sanitize_user_prompt=True, model2=False):
1075
  """
 
1425
  stop_btn.click(lambda: None, None, None,
1426
  cancels=[submit_event_nochat, submit_event, submit_event2, submit_event3],
1427
  queue=False, api_name='stop').then(clear_torch_cache)
1428
+ demo.load(None,None,None, _js=dark_js)
1429
 
1430
  demo.queue(concurrency_count=1)
1431
  favicon_path = "h2o-logo.svg"
1432
  demo.launch(share=kwargs['share'], server_name="0.0.0.0", show_error=True,
1433
  favicon_path=favicon_path, prevent_thread_lock=True) # , enable_queue=True)
1434
  print("Started GUI", flush=True)
1435
+ if kwargs['block_gradio_exit']:
1436
+ demo.block_thread()
1437
 
1438
 
1439
  input_args_list = ['model_state']
client_test.py CHANGED
@@ -13,43 +13,69 @@ Currently, this will force model to be on a single GPU.
13
  Then run this client as:
14
 
15
  python client_test.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  """
17
 
18
  debug = False
19
 
20
  import os
21
  os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
22
- from gradio_client import Client
23
-
24
- client = Client("http://localhost:7860")
25
- if debug:
26
- print(client.view_api(all_endpoints=True))
27
-
28
- instruction = '' # only for chat=True
29
- iinput = '' # only for chat=True
30
- context = ''
31
- # streaming output is supported, loops over and outputs each generation in streaming mode
32
- # but leave stream_output=False for simple input/output mode
33
- stream_output = False
34
- prompt_type = 'human_bot'
35
- temperature = 0.1
36
- top_p = 0.75
37
- top_k = 40
38
- num_beams = 1
39
- max_new_tokens = 50
40
- min_new_tokens = 0
41
- early_stopping = False
42
- max_time = 20
43
- repetition_penalty = 1.0
44
- num_return_sequences = 1
45
- do_sample = True
46
- # only these 2 below used if pass chat=False
47
- chat = False
48
- instruction_nochat = "Who are you?"
49
- iinput_nochat = ''
50
 
51
 
52
  def test_client_basic():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  args = [instruction,
54
  iinput,
55
  context,
@@ -71,12 +97,14 @@ def test_client_basic():
71
  iinput_nochat,
72
  ]
73
  api_name = '/submit_nochat'
 
74
  res = client.predict(
75
  *tuple(args),
76
  api_name=api_name,
77
  )
78
  res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
79
  print(res_dict)
 
80
 
81
 
82
  import markdown # pip install markdown
 
13
  Then run this client as:
14
 
15
  python client_test.py
16
+
17
+
18
+
19
+ For HF spaces:
20
+
21
+ HOST="https://h2oai-h2ogpt-chatbot.hf.space" python client_test.py
22
+
23
+ Result:
24
+
25
+ Loaded as API: https://h2oai-h2ogpt-chatbot.hf.space ✔
26
+ {'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a large language model developed by LAION.'}
27
+
28
+
29
+ For demo:
30
+
31
+ HOST="https://gpt.h2o.ai" python client_test.py
32
+
33
+ Result:
34
+
35
+ Loaded as API: https://gpt.h2o.ai ✔
36
+ {'instruction_nochat': 'Who are you?', 'iinput_nochat': '', 'response': 'I am h2oGPT, a chatbot created by LAION.'}
37
+
38
  """
39
 
40
  debug = False
41
 
42
  import os
43
  os.environ['HF_HUB_DISABLE_TELEMETRY'] = '1'
44
+
45
+
46
+ def get_client():
47
+ from gradio_client import Client
48
+
49
+ client = Client(os.getenv('HOST', "http://localhost:7860"))
50
+ if debug:
51
+ print(client.view_api(all_endpoints=True))
52
+ return client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
 
55
  def test_client_basic():
56
+ instruction = '' # only for chat=True
57
+ iinput = '' # only for chat=True
58
+ context = ''
59
+ # streaming output is supported, loops over and outputs each generation in streaming mode
60
+ # but leave stream_output=False for simple input/output mode
61
+ stream_output = False
62
+ prompt_type = 'human_bot'
63
+ temperature = 0.1
64
+ top_p = 0.75
65
+ top_k = 40
66
+ num_beams = 1
67
+ max_new_tokens = 50
68
+ min_new_tokens = 0
69
+ early_stopping = False
70
+ max_time = 20
71
+ repetition_penalty = 1.0
72
+ num_return_sequences = 1
73
+ do_sample = True
74
+ # only these 2 below used if pass chat=False
75
+ chat = False
76
+ instruction_nochat = "Who are you?"
77
+ iinput_nochat = ''
78
+
79
  args = [instruction,
80
  iinput,
81
  context,
 
97
  iinput_nochat,
98
  ]
99
  api_name = '/submit_nochat'
100
+ client = get_client()
101
  res = client.predict(
102
  *tuple(args),
103
  api_name=api_name,
104
  )
105
  res_dict = dict(instruction_nochat=instruction_nochat, iinput_nochat=iinput_nochat, response=md_to_text(res))
106
  print(res_dict)
107
+ return res_dict
108
 
109
 
110
  import markdown # pip install markdown