File size: 6,428 Bytes
18ac6c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# -*- coding: utf-8 -*-
"""Inference_LawyerGPT_Finetune_falcon7b_Indian_Law_Data.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1NpBtrGAcXsmoSmM5Sr-INiE5-tU9D37n
### Install requirements
First, run the cells below to install the requirements:
"""
!nvidia-smi
!pip install -Uqqq pip --progress-bar off
!pip install -qqq bitsandbytes==0.39.0
!pip install -qqq torch--2.0.1 --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f --progress-bar off
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71 --progress-bar off
!pip install -qqq datasets==2.12.0 --progress-bar off
!pip install -qqq loralib==0.1.1 --progress-bar off
!pip install einops
import os
# from pprint import pprint
# import json
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from huggingface_hub import notebook_login
from peft import (
LoraConfig,
PeftConfig,
get_peft_model,
prepare_model_for_kbit_training,
)
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
notebook_login()
#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
"""### Load dataset"""
from datasets import load_dataset
dataset_name = "nisaar/Lawyer_GPT_India"
#dataset_name = "patrick11434/TEST_LLM_DATASET"
dataset = load_dataset(dataset_name, split="train")
"""## Load adapters from the Hub
You can also directly load adapters from the Hub using the commands below:
"""
from peft import *
#change peft_model_id
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
load_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
peft_model_id = "nisaar/falcon7b-Indian_Law_150Prompts"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, peft_model_id)
"""## Inference
You can then directly use the trained model or the model that you have loaded from the 🤗 Hub for inference as you would do it usually in `transformers`.
"""
generation_config = model.generation_config
generation_config.max_new_tokens = 200
generation_config_temperature = 1
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config_eod_token_id = tokenizer.eos_token_id
DEVICE = "cuda:0"
# Commented out IPython magic to ensure Python compatibility.
# %%time
# prompt = f"""
# <human>: Who appoints the Chief Justice of India?
# <assistant>:
# """.strip()
#
# encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
# with torch.inference_mode():
# outputs = model.generate(
# input_ids=encoding.attention_mask,
# generation_config=generation_config,
# )
# print(tokenizer.decode(outputs[0],skip_special_tokens=True))
def generate_response(question: str) -> str:
prompt = f"""
<human>: {question}
<assistant>:
""".strip()
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
outputs = model.generate(
input_ids=encoding.input_ids,
attention_mask=encoding.attention_mask,
generation_config=generation_config,
)
response = tokenizer.decode(outputs[0],skip_special_tokens=True)
assistant_start = '<assistant>:'
response_start = response.find(assistant_start)
return response[response_start + len(assistant_start):].strip()
prompt = "Debate the merits and demerits of introducing simultaneous elections in India?"
print(generate_response(prompt))
prompt = "What are the duties of the President of India as per the Constitution?"
print(generate_response(prompt))
prompt = "Write a legal memo on the issue of manual scavenging in light of The Prohibition of Employment as Manual Scavengers and their Rehabilitation Act, 2013."
print(generate_response(prompt))
prompt
prompt = "Explain the concept of 'Separation of Powers' in the Indian Constitution"
print(generate_response(prompt))
prompt = "Can you explain the steps for registration of a trademark in India?"
print(generate_response(prompt))
prompt = "What are the potential implications of the proposed Personal Data Protection Bill on tech companies in India?"
print(generate_response(prompt))
prompt = "Can you draft a non-disclosure agreement (NDA) under Indian law?"
print(generate_response(prompt))
prompt = "Can you summarize the main points of Article 21 of the Indian Constitution?"
print(generate_response(prompt))
prompt = "Can you summarize the main arguments of the Supreme Court of India judgment in Kesavananda Bharati v. State of Kerala?"
print(generate_response(prompt))
prompt = "what is the mysterious case of Advocate Nisaar that was a famous in supreme court of india?"
print(generate_response(prompt))
prompt = "what is the mysterious case of Advocate Nisaar that was a famous in supreme court of india?"
print(generate_response(prompt))
prompt = "Can you draft a confidentiality clause for a contract under Indian law?"
print(generate_response(prompt))
prompt = "How is the concept of 'Economic Justice' enshrined in the Preamble of the Indian Constitution??"
print(generate_response(prompt))
prompt = "What is the role of the 'Supreme Court' in preserving the fundamental rights of citizens in India?"
print(generate_response(prompt))
prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
print(generate_response(prompt))
prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
print(generate_response(prompt))
prompt = "Discuss the potential effects of a 'Universal Basic Income' policy in India"
print(generate_response(prompt))
prompt = "Analyze the potential impact of 'Online Education Rights' for students in India?"
print(generate_response(prompt)) |