NicholasJohn commited on
Commit
cf14dc1
·
1 Parent(s): c1dba53
Files changed (1) hide show
  1. app.py +10 -37
app.py CHANGED
@@ -7,44 +7,17 @@ from huggingface_hub.file_download import http_get
7
 
8
  # huggingface-cli download microsoft/Phi-3-mini-4k-instruct-gguf Phi-3-mini-4k-instruct-q4.gguf --local-dir .
9
  # huggingface-cli download LoneStriker/OpenBioLLM-Llama3-8B-GGUF --local-dir ./llama3-gguf
10
- # Explicitly create the cache directory if it doesn't exist
11
 
12
-
13
- def load_model(
14
- directory: str = ".",
15
- model_name: str = "OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
16
- model_url: str = "https://huggingface.co/LoneStriker/OpenBioLLM-Llama3-8B-GGUF/resolve/main/OpenBioLLM-Llama3-8B-Q5_K_M.gguf"
17
- ):
18
- final_model_path = os.path.join(directory, model_name)
19
-
20
- print("Downloading all files...")
21
- if not os.path.exists(final_model_path):
22
- with open(final_model_path, "wb") as f:
23
- http_get(model_url, f)
24
- os.chmod(final_model_path, 0o777)
25
- print("Files downloaded!")
26
-
27
- model = Llama(
28
- model_path=final_model_path,
29
- n_ctx=2000,
30
- n_parts=1,
31
- )
32
-
33
- print("Model loaded!")
34
- return model
35
-
36
- # llm = Llama(
37
- # # model_path="./Phi-3-mini-4k-instruct-q4.gguf",
38
- # # model_path="./llama3-gguf/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
39
- # model_path = hf_hub_download(
40
- # repo_id=os.environ.get("REPO_ID", "LoneStriker/OpenBioLLM-Llama3-8B-GGUF"),
41
- # filename=os.environ.get("MODEL_FILE", "Llama3-8B-Q5_K_M.gguf"),
42
- # ),
43
- # n_ctx=2048,
44
- # n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
45
- # )
46
-
47
- llm = load_model()
48
 
49
  # print("here")
50
  def generate_text(
 
7
 
8
  # huggingface-cli download microsoft/Phi-3-mini-4k-instruct-gguf Phi-3-mini-4k-instruct-q4.gguf --local-dir .
9
  # huggingface-cli download LoneStriker/OpenBioLLM-Llama3-8B-GGUF --local-dir ./llama3-gguf
 
10
 
11
+ llm = Llama(
12
+ # model_path="./Phi-3-mini-4k-instruct-q4.gguf",
13
+ # model_path="./llama3-gguf/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
14
+ model_path = hf_hub_download(
15
+ repo_id=os.environ.get("REPO_ID", "LoneStriker/OpenBioLLM-Llama3-8B-GGUF"),
16
+ filename=os.environ.get("MODEL_FILE", "Llama3-8B-Q5_K_M.gguf"),
17
+ ),
18
+ n_ctx=2048,
19
+ n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
20
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # print("here")
23
  def generate_text(