King-Harry commited on
Commit
1c9818f
·
verified ·
1 Parent(s): 7ce6937

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -7
README.md CHANGED
@@ -60,18 +60,51 @@ The model is designed for responsible data management, ensuring that sensitive i
60
  To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
61
 
62
  ```python
 
 
 
 
63
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
64
 
 
65
  model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
66
- model = AutoModelForCausalLM.from_pretrained(model_name)
67
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- input_text = "Write an email to Kendra Harvey at [email protected] summarizing the key findings from a recent cognitive therapy conference."
70
- inputs = tokenizer(input_text, return_tensors="pt")
71
- outputs = model.generate(**inputs, max_new_tokens=64)
72
 
73
- redacted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
- print(redacted_text)
75
  ```
76
 
77
  ### Citation
 
60
  To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
61
 
62
  ```python
63
+ # Install necessary packages
64
+ !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
65
+ !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
66
+
67
  from transformers import AutoModelForCausalLM, AutoTokenizer
68
+ from unsloth import FastLanguageModel
69
 
70
+ # Load the fine-tuned model from Hugging Face Hub
71
  model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
72
+ model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
73
+
74
+ # Ensure the model is ready for inference
75
+ FastLanguageModel.for_inference(model)
76
+
77
+ # Define the Alpaca-style prompt
78
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
79
+
80
+ ### Instruction:
81
+ {}
82
+
83
+ ### Input:
84
+ {}
85
+
86
+ ### Response:
87
+ {}"""
88
+
89
+ # Define the input text using the Alpaca prompt
90
+ inputs = tokenizer(
91
+ [
92
+ alpaca_prompt.format(
93
+ "Replace all the PII from this text and use only the following tags: [FULLNAME], [NAME], [EMAIL], [CITY], [JOBAREA], [FIRSTNAME], [STATE], [STREETADDRESS], [URL], [USERNAME], [NUMBER], [JOBTITLE], [LASTNAME], [ACCOUNTNUMBER], [AMOUNT], [BUILDINGNUMBER], [ZIPCODE], [CURRENCY], [STREET], [PASSWORD], [IPV4], [CURRENCYNAME], [ACCOUNTNAME], [GENDER], [COUNTY], [CREDITCARDNUMBER], [DISPLAYNAME], [IPV6], [USERAGENT], [BITCOINADDRESS], [CURRENCYCODE], [JOBTYPE], [IBAN], [ETHEREUMADDRESS], [MAC], [IP], [CREDITCARDISSUER], [CREDITCARDCVV], [MASKEDNUMBER], [SEX], [JOBDESCRIPTOR]", # instruction
94
+ "Write an email to Kendra Harvey at [email protected] summarizing the key findings from a recent cognitive therapy conference they attended.", # input
95
+ "" # output - leave this blank for generation!
96
+ )
97
+ ],
98
+ return_tensors="pt"
99
+ ).to("cuda")
100
+
101
+ # Generate the redacted output
102
+ outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
103
 
104
+ # Decode and print the output
105
+ redacted_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
106
+ print(redacted_text[0])
107
 
 
 
108
  ```
109
 
110
  ### Citation