Spaces:
Sleeping
Sleeping
Rúben Almeida
commited on
Commit
·
4b9e103
1
Parent(s):
fe22757
Typo in COPY requirements.txt relative path
Browse files- Dockerfile +1 -1
- load_gguf.py +0 -21
- main.py +1 -0
Dockerfile
CHANGED
@@ -14,7 +14,7 @@ RUN pip install -U setuptools wheel
|
|
14 |
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
|
16 |
# Copy the requirements file into the container
|
17 |
-
COPY requirements.txt .
|
18 |
|
19 |
# Copy the rest of the application code into the container
|
20 |
COPY . .
|
|
|
14 |
RUN pip install --no-cache-dir -r requirements.txt
|
15 |
|
16 |
# Copy the requirements file into the container
|
17 |
+
COPY ./requirements.txt .
|
18 |
|
19 |
# Copy the rest of the application code into the container
|
20 |
COPY . .
|
load_gguf.py
DELETED
@@ -1,21 +0,0 @@
|
|
1 |
-
from transformers import AutoModel
|
2 |
-
|
3 |
-
|
4 |
-
base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
|
5 |
-
GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
|
6 |
-
#model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")
|
7 |
-
|
8 |
-
model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
|
9 |
-
model.config
|
10 |
-
"""
|
11 |
-
# pip install gguf
|
12 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
13 |
-
|
14 |
-
model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
15 |
-
filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
|
16 |
-
|
17 |
-
torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
|
18 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
|
19 |
-
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
|
20 |
-
|
21 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -7,6 +7,7 @@ from starlette.responses import FileResponse
|
|
7 |
class ConvertRequest(BaseModel):
|
8 |
hf_model_name: str
|
9 |
hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
|
|
|
10 |
|
11 |
@asynccontextmanager
|
12 |
async def lifespan(app:FastAPI):
|
|
|
7 |
class ConvertRequest(BaseModel):
|
8 |
hf_model_name: str
|
9 |
hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
|
10 |
+
hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model")
|
11 |
|
12 |
@asynccontextmanager
|
13 |
async def lifespan(app:FastAPI):
|