Spaces:

arubenruben
/

Quantization-Attempts

Sleeping

Rúben Almeida commited on Apr 16

Commit

4b9e103

1 Parent(s): fe22757

Typo in COPY requirements.txt relative path

Files changed (3) hide show

Dockerfile CHANGED Viewed

@@ -14,7 +14,7 @@ RUN pip install -U setuptools wheel
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the requirements file into the container
-COPY requirements.txt .
 # Copy the rest of the application code into the container
 COPY . .

 RUN pip install --no-cache-dir -r requirements.txt
 # Copy the requirements file into the container
+COPY ./requirements.txt .
 # Copy the rest of the application code into the container
 COPY . .

load_gguf.py DELETED Viewed

@@ -1,21 +0,0 @@
-from transformers import AutoModel
-base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
-GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
-#model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")
-model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
-model.config
-"""
-# pip install gguf
-from transformers import AutoTokenizer, AutoModelForCausalLM
-model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
-filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
-torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
-tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
-model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
-"""

main.py CHANGED Viewed

@@ -7,6 +7,7 @@ from starlette.responses import FileResponse
 class ConvertRequest(BaseModel):
     hf_model_name: str
     hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
 @asynccontextmanager
 async def lifespan(app:FastAPI):

 class ConvertRequest(BaseModel):
     hf_model_name: str
     hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
+    hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model")
 @asynccontextmanager
 async def lifespan(app:FastAPI):