ramalMr commited on
Commit
fceacf5
·
verified ·
1 Parent(s): 923f75f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -12
app.py CHANGED
@@ -1,18 +1,31 @@
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
 
 
3
 
4
  client = InferenceClient(
5
  "mistralai/Mixtral-8x7B-Instruct-v0.1"
6
  )
7
 
8
-
9
  def format_prompt(message, history):
10
- prompt = "<s>"
11
- for user_prompt, bot_response in history:
12
- prompt += f"[INST] {user_prompt} [/INST]"
13
- prompt += f" {bot_response}</s> "
14
- prompt += f"[INST] {message} [/INST]"
15
- return prompt
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def generate(
18
  prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, files=None
@@ -34,8 +47,12 @@ def generate(
34
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
35
 
36
  if files is not None:
37
- for file in files:
38
- formatted_prompt += f"\n\nFile content: {file.decode()}"
 
 
 
 
39
 
40
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
41
  output = ""
@@ -45,13 +62,17 @@ def generate(
45
  yield output
46
  return output
47
 
48
-
49
  additional_inputs=[
50
  gr.Textbox(
51
  label="System Prompt",
52
  max_lines=1,
53
  interactive=True,
54
  ),
 
 
 
 
 
55
  gr.Slider(
56
  label="Temperature",
57
  value=0.9,
@@ -96,8 +117,6 @@ additional_inputs=[
96
  )
97
  ]
98
 
99
-
100
-
101
  gr.ChatInterface(
102
  fn=generate,
103
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
 
1
  from huggingface_hub import InferenceClient
2
  import gradio as gr
3
+ import re
4
+ from nltk.tokenize import sent_tokenize
5
 
6
  client = InferenceClient(
7
  "mistralai/Mixtral-8x7B-Instruct-v0.1"
8
  )
9
 
 
10
  def format_prompt(message, history):
11
+ prompt = "<s>"
12
+ for user_prompt, bot_response in history:
13
+ prompt += f"[INST] {user_prompt} [/INST]"
14
+ prompt += f" {bot_response}</s> "
15
+ prompt += f"[INST] {message} [/INST]"
16
+ return prompt
17
+
18
+ def tokenize_sentences(file_content):
19
+ sentences = sent_tokenize(file_content.decode())
20
+ return sentences
21
+
22
+ def generate_synthetic_data(prompt, sentences):
23
+ synthetic_data = []
24
+ for sentence in sentences:
25
+ # Apply the prompt instructions to generate synthetic data from the sentence
26
+ synthetic_sentence = f"{prompt}: {sentence}"
27
+ synthetic_data.append(synthetic_sentence)
28
+ return "\n".join(synthetic_data)
29
 
30
  def generate(
31
  prompt, history, system_prompt, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, files=None
 
47
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
48
 
49
  if files is not None:
50
+ file_contents = [file.decode() for file in files]
51
+ sentences = []
52
+ for content in file_contents:
53
+ sentences.extend(tokenize_sentences(content))
54
+ synthetic_data = generate_synthetic_data(prompt, sentences)
55
+ formatted_prompt += f"\n\nSynthetic data: {synthetic_data}"
56
 
57
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
58
  output = ""
 
62
  yield output
63
  return output
64
 
 
65
  additional_inputs=[
66
  gr.Textbox(
67
  label="System Prompt",
68
  max_lines=1,
69
  interactive=True,
70
  ),
71
+ gr.Textbox(
72
+ label="Prompt for Synthetic Data Generation",
73
+ max_lines=1,
74
+ interactive=True,
75
+ ),
76
  gr.Slider(
77
  label="Temperature",
78
  value=0.9,
 
117
  )
118
  ]
119
 
 
 
120
  gr.ChatInterface(
121
  fn=generate,
122
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),