filipealmeida
commited on
Commit
•
be312ed
1
Parent(s):
68194a3
Add ability to configure model name
Browse files
app.py
CHANGED
@@ -12,13 +12,14 @@ logging.basicConfig(level=logging.INFO)
|
|
12 |
|
13 |
model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
|
14 |
# model_filename="ggml-model-f16.gguf"
|
15 |
-
model_filename="ggml-model-Q8_0.gguf"
|
16 |
|
17 |
def download_model():
|
18 |
print("Downloading model...")
|
19 |
sys.stdout.flush()
|
|
|
20 |
file = hf_hub_download(
|
21 |
-
repo_id=model_repo, filename=model_filename
|
22 |
)
|
23 |
print("Downloaded " + file)
|
24 |
return file
|
@@ -44,17 +45,14 @@ def generate_text(prompt, example):
|
|
44 |
logging.info(f"Generated text: {full_text}")
|
45 |
|
46 |
|
47 |
-
parser = argparse.ArgumentParser()
|
48 |
-
parser.add_argument("--model", help="Path to the model file")
|
49 |
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
|
50 |
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
|
51 |
parser.add_argument("--temp", help="temperature", default=0.1, type=float)
|
52 |
args = parser.parse_args()
|
53 |
|
54 |
-
|
55 |
-
model_path = args.model
|
56 |
-
else:
|
57 |
-
model_path = download_model()
|
58 |
|
59 |
# TODO n_gpu_layers doesn't seem to be working
|
60 |
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
|
|
|
12 |
|
13 |
model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
|
14 |
# model_filename="ggml-model-f16.gguf"
|
15 |
+
# model_filename="ggml-model-Q8_0.gguf"
|
16 |
|
17 |
def download_model():
|
18 |
print("Downloading model...")
|
19 |
sys.stdout.flush()
|
20 |
+
print(f"Loading model from {args.model_filename}")
|
21 |
file = hf_hub_download(
|
22 |
+
repo_id=model_repo, filename=args.model_filename
|
23 |
)
|
24 |
print("Downloaded " + file)
|
25 |
return file
|
|
|
45 |
logging.info(f"Generated text: {full_text}")
|
46 |
|
47 |
|
48 |
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
49 |
+
parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q8_0.gguf")
|
50 |
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
|
51 |
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
|
52 |
parser.add_argument("--temp", help="temperature", default=0.1, type=float)
|
53 |
args = parser.parse_args()
|
54 |
|
55 |
+
model_path = download_model()
|
|
|
|
|
|
|
56 |
|
57 |
# TODO n_gpu_layers doesn't seem to be working
|
58 |
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
|