|
|
|
import gradio as gr |
|
import logging |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
import sys |
|
import argparse |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
model_repo="filipealmeida/open-llama-3b-v2-pii-transform" |
|
|
|
|
|
|
|
def download_model(): |
|
print("Downloading model...") |
|
sys.stdout.flush() |
|
print(f"Loading model from {args.model_filename}") |
|
file = hf_hub_download( |
|
repo_id=model_repo, filename=args.model_filename |
|
) |
|
print("Downloaded " + file) |
|
return file |
|
|
|
def generate_text(prompt, example): |
|
logging.debug(f"Received prompt: {prompt}") |
|
input = f""" |
|
### Instruction: |
|
{prompt} |
|
### Response: |
|
""" |
|
|
|
logging.info(f"Input : {input}") |
|
|
|
output_stream = llm(input, max_tokens=-1, stop=["</s>", "###"], stream=True, temperature=args.temp) |
|
|
|
full_text = "" |
|
for output_chunk in output_stream: |
|
text_chunk = output_chunk.get('choices', [{}])[0].get('text', "") |
|
full_text += text_chunk |
|
yield(full_text) |
|
|
|
logging.info(f"Generated text: {full_text}") |
|
|
|
|
|
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
|
parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q4_0.gguf") |
|
parser.add_argument("--model-local", help="Path to the model file") |
|
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int) |
|
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int) |
|
parser.add_argument("--temp", help="temperature", default=0.8, type=float) |
|
args = parser.parse_args() |
|
|
|
model_path = None |
|
if args.model_local: |
|
model_path = args.model_local |
|
else: |
|
model_path = download_model() |
|
print(f"Loading model from {model_path}") |
|
|
|
|
|
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=generate_text, |
|
inputs=[ |
|
gr.Textbox(lines=4, placeholder="Enter text to anonimize...", label="Text with PII", |
|
value="My name is Filipe and my phone number is 555-121-2234. How are you?\nWant to meet up in Los Angeles at 5pm by the Grove?\nI live in downtown LA.") |
|
], |
|
outputs=gr.Textbox(label="PII Sanitized version of the text"), |
|
title="PII Sanitization Model", |
|
description="This application assists you in transforming personally identifiable information (PII) present in the text you provide by replacing it with synthetic PII. Simply input a text containing PII in the textbox below, and the app will return a version of the text with the original PII replaced with synthetic, non-sensitive information. The process ensures the privacy and security of sensitive data while retaining the utility of the text. This is an EXPERIMENTAL application and should not be used in production. DO NOT TYPE real PII in the app as this is a public server and is only to be used for demonstration purposes." |
|
) |
|
|
|
interface.queue() |
|
|
|
|
|
interface.launch() |
|
|