Text Generation
Transformers
English
Inference Endpoints
vivek2001123 commited on
Commit
18ac6c3
·
1 Parent(s): af215e8

Upload inference_lawyergpt_finetune_falcon7b_indian_law_data (1).py

Browse files
inference_lawyergpt_finetune_falcon7b_indian_law_data (1).py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Inference_LawyerGPT_Finetune_falcon7b_Indian_Law_Data.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1NpBtrGAcXsmoSmM5Sr-INiE5-tU9D37n
8
+
9
+ ### Install requirements
10
+
11
+ First, run the cells below to install the requirements:
12
+ """
13
+
14
+ !nvidia-smi
15
+
16
+ !pip install -Uqqq pip --progress-bar off
17
+ !pip install -qqq bitsandbytes==0.39.0
18
+ !pip install -qqq torch--2.0.1 --progress-bar off
19
+ !pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
20
+ !pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f --progress-bar off
21
+ !pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
22
+ !pip install -qqq datasets==2.12.0 --progress-bar off
23
+ !pip install -qqq loralib==0.1.1 --progress-bar off
24
+ !pip install einops
25
+
26
+ import os
27
+ # from pprint import pprint
28
+ # import json
29
+
30
+ import bitsandbytes as bnb
31
+ import pandas as pd
32
+ import torch
33
+ import torch.nn as nn
34
+ import transformers
35
+ from datasets import load_dataset
36
+ from huggingface_hub import notebook_login
37
+ from peft import (
38
+ LoraConfig,
39
+ PeftConfig,
40
+ get_peft_model,
41
+ prepare_model_for_kbit_training,
42
+ )
43
+ from transformers import (
44
+ AutoConfig,
45
+ AutoModelForCausalLM,
46
+ AutoTokenizer,
47
+ BitsAndBytesConfig,
48
+ )
49
+
50
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
51
+
52
+ notebook_login()
53
+ #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
54
+
55
+ """### Load dataset"""
56
+
57
+ from datasets import load_dataset
58
+
59
+ dataset_name = "nisaar/Lawyer_GPT_India"
60
+ #dataset_name = "patrick11434/TEST_LLM_DATASET"
61
+ dataset = load_dataset(dataset_name, split="train")
62
+
63
+ """## Load adapters from the Hub
64
+
65
+ You can also directly load adapters from the Hub using the commands below:
66
+ """
67
+
68
+ from peft import *
69
+
70
+ #change peft_model_id
71
+ bnb_config = BitsAndBytesConfig(
72
+ load_in_4bit=True,
73
+ load_4bit_use_double_quant=True,
74
+ bnb_4bit_quant_type="nf4",
75
+ bnb_4bit_compute_dtype=torch.bfloat16,
76
+ )
77
+
78
+ peft_model_id = "nisaar/falcon7b-Indian_Law_150Prompts"
79
+ config = PeftConfig.from_pretrained(peft_model_id)
80
+ model = AutoModelForCausalLM.from_pretrained(
81
+ config.base_model_name_or_path,
82
+ return_dict=True,
83
+ quantization_config=bnb_config,
84
+ device_map="auto",
85
+ trust_remote_code=True,
86
+ )
87
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
88
+ tokenizer.pad_token = tokenizer.eos_token
89
+
90
+
91
+ model = PeftModel.from_pretrained(model, peft_model_id)
92
+
93
+ """## Inference
94
+
95
+ You can then directly use the trained model or the model that you have loaded from the 🤗 Hub for inference as you would do it usually in `transformers`.
96
+ """
97
+
98
+ generation_config = model.generation_config
99
+ generation_config.max_new_tokens = 200
100
+ generation_config_temperature = 1
101
+ generation_config.top_p = 0.7
102
+ generation_config.num_return_sequences = 1
103
+ generation_config.pad_token_id = tokenizer.eos_token_id
104
+ generation_config_eod_token_id = tokenizer.eos_token_id
105
+
106
+ DEVICE = "cuda:0"
107
+
108
+ # Commented out IPython magic to ensure Python compatibility.
109
+ # %%time
110
+ # prompt = f"""
111
+ # <human>: Who appoints the Chief Justice of India?
112
+ # <assistant>:
113
+ # """.strip()
114
+ #
115
+ # encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
116
+ # with torch.inference_mode():
117
+ # outputs = model.generate(
118
+ # input_ids=encoding.attention_mask,
119
+ # generation_config=generation_config,
120
+ # )
121
+ # print(tokenizer.decode(outputs[0],skip_special_tokens=True))
122
+
123
+ def generate_response(question: str) -> str:
124
+ prompt = f"""
125
+ <human>: {question}
126
+ <assistant>:
127
+ """.strip()
128
+ encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
129
+ with torch.inference_mode():
130
+ outputs = model.generate(
131
+ input_ids=encoding.input_ids,
132
+ attention_mask=encoding.attention_mask,
133
+ generation_config=generation_config,
134
+ )
135
+ response = tokenizer.decode(outputs[0],skip_special_tokens=True)
136
+
137
+ assistant_start = '<assistant>:'
138
+ response_start = response.find(assistant_start)
139
+ return response[response_start + len(assistant_start):].strip()
140
+
141
+ prompt = "Debate the merits and demerits of introducing simultaneous elections in India?"
142
+ print(generate_response(prompt))
143
+
144
+ prompt = "What are the duties of the President of India as per the Constitution?"
145
+ print(generate_response(prompt))
146
+
147
+ prompt = "Write a legal memo on the issue of manual scavenging in light of The Prohibition of Employment as Manual Scavengers and their Rehabilitation Act, 2013."
148
+ print(generate_response(prompt))
149
+
150
+ prompt
151
+
152
+ prompt = "Explain the concept of 'Separation of Powers' in the Indian Constitution"
153
+ print(generate_response(prompt))
154
+
155
+ prompt = "Can you explain the steps for registration of a trademark in India?"
156
+ print(generate_response(prompt))
157
+
158
+ prompt = "What are the potential implications of the proposed Personal Data Protection Bill on tech companies in India?"
159
+ print(generate_response(prompt))
160
+
161
+ prompt = "Can you draft a non-disclosure agreement (NDA) under Indian law?"
162
+ print(generate_response(prompt))
163
+
164
+ prompt = "Can you summarize the main points of Article 21 of the Indian Constitution?"
165
+ print(generate_response(prompt))
166
+
167
+ prompt = "Can you summarize the main arguments of the Supreme Court of India judgment in Kesavananda Bharati v. State of Kerala?"
168
+ print(generate_response(prompt))
169
+
170
+ prompt = "what is the mysterious case of Advocate Nisaar that was a famous in supreme court of india?"
171
+ print(generate_response(prompt))
172
+
173
+ prompt = "what is the mysterious case of Advocate Nisaar that was a famous in supreme court of india?"
174
+ print(generate_response(prompt))
175
+
176
+ prompt = "Can you draft a confidentiality clause for a contract under Indian law?"
177
+ print(generate_response(prompt))
178
+
179
+ prompt = "How is the concept of 'Economic Justice' enshrined in the Preamble of the Indian Constitution??"
180
+ print(generate_response(prompt))
181
+
182
+ prompt = "What is the role of the 'Supreme Court' in preserving the fundamental rights of citizens in India?"
183
+ print(generate_response(prompt))
184
+
185
+ prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
186
+ print(generate_response(prompt))
187
+
188
+ prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
189
+ print(generate_response(prompt))
190
+
191
+ prompt = "Discuss the potential effects of a 'Universal Basic Income' policy in India"
192
+ print(generate_response(prompt))
193
+
194
+ prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
195
+ print(generate_response(prompt))