6hb default again

much bigger vram usage in newer models, not really worth it much anymore.
also removed fast safetensors because deprecated

Files changed (4) hide show

exllamav2 scripts/auto-exl2-upload/auto-exl2-upload.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b73875c6edbb815972d7891783bd010031c2f8a23758a59ccc483081579b1a1
-size 8726

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d5c346d6dd344f29c196b33bdb8e2d07cf8d70e1e9135e1f2bfab8550bcba4e
+size 8717

exllamav2 scripts/auto-exl2-upload/exl2-quant.py CHANGED Viewed

@@ -203,7 +203,7 @@ for bpw in bpwvalue:
     os.makedirs(f"{model}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
     subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
     #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
-    result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {model}-exl2-{bpw}bpw -b {bpw}{mskip} -hb 8 -fst", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
     if result.returncode != 0:
         print("Quantization failed.")
         sys.exit("Exiting...")

     os.makedirs(f"{model}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
     subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
     #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
+    result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {model}-exl2-{bpw}bpw -b {bpw}{mskip}", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
     if result.returncode != 0:
         print("Quantization failed.")
         sys.exit("Exiting...")

exllamav2 scripts/exl2-multi-quant-local/exl2-multi-quant-local.zip CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f566c8956273329b8cbff4cdbfba9b968a4a88845a5886d5f19b9419ecff65d3
-size 7521

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd45a8c909552d9e4f1f8b2069d210e91a2ce0fd8043b45dcffef764dab370ba
+size 7513

exllamav2 scripts/exl2-multi-quant-local/exl2-quant.py CHANGED Viewed

@@ -150,7 +150,7 @@ for bpw in bpwvalue:
     os.makedirs(f"{modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
     subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
     #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
-    result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw -b {bpw}{mskip} -hb 8 -fst", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
     if result.returncode != 0:
         print("Quantization failed.")
         sys.exit("Exiting...")

     os.makedirs(f"{modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw", exist_ok=True) #create compile full directory
     subprocess.run(f"{oscp} models{slsh}{model}{slsh}config.json {model}-exl2-{bpw}bpw-WD", shell=True) #copy config to working directory
     #more settings exist in the convert.py script, to veiw them go to docs/convert.md or https://github.com/turboderp/exllamav2/blob/master/doc/convert.md
+    result = subprocess.run(f"{pyt} exllamav2/convert.py -i models/{model} -o {model}-exl2-{bpw}bpw-WD -cf {modelname}-exl2-quants{slsh}{modelname}-exl2-{bpw}bpw -b {bpw}{mskip}", shell=True) #run quantization and exit if failed (Credit to turbo for his dedication to exl2)
     if result.returncode != 0:
         print("Quantization failed.")
         sys.exit("Exiting...")