filipealmeida
commited on
Commit
•
16cbd47
1
Parent(s):
69fd23e
Allow local models to be loaded
Browse files
app.py
CHANGED
@@ -47,12 +47,18 @@ def generate_text(prompt, example):
|
|
47 |
|
48 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
49 |
parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q8_0.gguf")
|
|
|
50 |
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
|
51 |
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
|
52 |
parser.add_argument("--temp", help="temperature", default=0.8, type=float)
|
53 |
args = parser.parse_args()
|
54 |
|
55 |
-
model_path =
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# TODO n_gpu_layers doesn't seem to be working
|
58 |
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
|
|
|
47 |
|
48 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
49 |
parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q8_0.gguf")
|
50 |
+
parser.add_argument("--model-local", help="Path to the model file")
|
51 |
parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
|
52 |
parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
|
53 |
parser.add_argument("--temp", help="temperature", default=0.8, type=float)
|
54 |
args = parser.parse_args()
|
55 |
|
56 |
+
model_path = None
|
57 |
+
if args.model_local:
|
58 |
+
model_path = args.model_local
|
59 |
+
else:
|
60 |
+
model_path = download_model()
|
61 |
+
print(f"Loading model from {model_path}")
|
62 |
|
63 |
# TODO n_gpu_layers doesn't seem to be working
|
64 |
llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
|