filipealmeida commited on
Commit
be312ed
1 Parent(s): 68194a3

Add ability to configure model name

Browse files
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -12,13 +12,14 @@ logging.basicConfig(level=logging.INFO)
12
 
13
  model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
14
  # model_filename="ggml-model-f16.gguf"
15
- model_filename="ggml-model-Q8_0.gguf"
16
 
17
  def download_model():
18
  print("Downloading model...")
19
  sys.stdout.flush()
 
20
  file = hf_hub_download(
21
- repo_id=model_repo, filename=model_filename
22
  )
23
  print("Downloaded " + file)
24
  return file
@@ -44,17 +45,14 @@ def generate_text(prompt, example):
44
  logging.info(f"Generated text: {full_text}")
45
 
46
 
47
- parser = argparse.ArgumentParser()
48
- parser.add_argument("--model", help="Path to the model file")
49
  parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
50
  parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
51
  parser.add_argument("--temp", help="temperature", default=0.1, type=float)
52
  args = parser.parse_args()
53
 
54
- if args.model:
55
- model_path = args.model
56
- else:
57
- model_path = download_model()
58
 
59
  # TODO n_gpu_layers doesn't seem to be working
60
  llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)
 
12
 
13
  model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
14
  # model_filename="ggml-model-f16.gguf"
15
+ # model_filename="ggml-model-Q8_0.gguf"
16
 
17
  def download_model():
18
  print("Downloading model...")
19
  sys.stdout.flush()
20
+ print(f"Loading model from {args.model_filename}")
21
  file = hf_hub_download(
22
+ repo_id=model_repo, filename=args.model_filename
23
  )
24
  print("Downloaded " + file)
25
  return file
 
45
  logging.info(f"Generated text: {full_text}")
46
 
47
 
48
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
49
+ parser.add_argument("--model-filename", help="Path to the model file", default="ggml-model-Q8_0.gguf")
50
  parser.add_argument("--gpu", help="How many GPU layers to use", default=0, type=int)
51
  parser.add_argument("--ctx", help="How many context tokens to use", default=1024, type=int)
52
  parser.add_argument("--temp", help="temperature", default=0.1, type=float)
53
  args = parser.parse_args()
54
 
55
+ model_path = download_model()
 
 
 
56
 
57
  # TODO n_gpu_layers doesn't seem to be working
58
  llm = Llama(model_path=model_path, n_gpu_layers=args.gpu, n_ctx=args.ctx)