rishikasharma commited on
Commit
4d1b6b3
·
verified ·
1 Parent(s): 8ab9a1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -9
app.py CHANGED
@@ -1,10 +1,51 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def respond(
@@ -39,10 +80,6 @@ def respond(
39
  response += token
40
  yield response
41
 
42
-
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
  demo = gr.ChatInterface(
47
  respond,
48
  additional_inputs=[
@@ -61,4 +98,4 @@ demo = gr.ChatInterface(
61
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from datasets import load_dataset
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ from transformers import Trainer, TrainingArguments
6
 
7
+ model_name = "HuggingFaceH4/zephyr-7b-beta"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForCausalLM.from_pretrained(model_name)
10
+
11
+ dataset = load_dataset("json", data_files="data.json", split = "train")
12
+
13
+ # Tokenize the dataset
14
+ def preprocess_function(examples):
15
+ inputs = [example['input'] for example in examples]
16
+ targets = [examples['output'] for example in examples]
17
+ model_inputs = tokenizer(inputs, padding=True, truncation=True)
18
+ labels = tokenizer(targets, padding=True, truncation=True).input_ids
19
+ model_inputs['labels'] = labels
20
+ return model_inputs
21
+
22
+ tokenized_datasets = dataset.map(preprocess_function, batched = True)
23
+
24
+ training_args = TrainingArguments(
25
+ output_dir = "./results",
26
+ evaluation_strategy = "epoch",
27
+ learning_rate = 2e-5,
28
+ per_device_train_batch_size = 3,
29
+ weight_decay = 0.01,
30
+ )
31
+
32
+ trainer = Trainer(
33
+ model = model,
34
+ args = training_args,
35
+ train_dataset = tokenized_datasets["train"],
36
+ eval_dataset = tokenized_datasets["validation"],
37
+ )
38
+
39
+ # Start fine-tuning
40
+ trainer.train()
41
+
42
+ trainer.evaluate()
43
+
44
+ model.save_pretrained("./fine_tuned_model")
45
+ tokenizer.save_pretrained("./fine_tuned_model")
46
+
47
+
48
+ client = InferenceClient("./fine_tuned_model")
49
 
50
 
51
  def respond(
 
80
  response += token
81
  yield response
82
 
 
 
 
 
83
  demo = gr.ChatInterface(
84
  respond,
85
  additional_inputs=[
 
98
 
99
 
100
  if __name__ == "__main__":
101
+ demo.launch()