Rúben Almeida commited on
Commit
4b9e103
·
1 Parent(s): fe22757

Typo in COPY requirements.txt relative path

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. load_gguf.py +0 -21
  3. main.py +1 -0
Dockerfile CHANGED
@@ -14,7 +14,7 @@ RUN pip install -U setuptools wheel
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
  # Copy the requirements file into the container
17
- COPY requirements.txt .
18
 
19
  # Copy the rest of the application code into the container
20
  COPY . .
 
14
  RUN pip install --no-cache-dir -r requirements.txt
15
 
16
  # Copy the requirements file into the container
17
+ COPY ./requirements.txt .
18
 
19
  # Copy the rest of the application code into the container
20
  COPY . .
load_gguf.py DELETED
@@ -1,21 +0,0 @@
1
- from transformers import AutoModel
2
-
3
-
4
- base_model = "ibm-research/granite-3.2-8b-instruct-GGUF"
5
- GGUF_MODEL = "granite-3.2-8b-instruct-Q4_K_M.gguf"
6
- #model = AutoModel.from_pretrained("ibm-research/granite-3.2-8b-instruct", device_map="auto")
7
-
8
- model = AutoModel.from_pretrained(base_model, device_map="auto", torch_dtype="auto", quantization_config=None, gguf_file=GGUF_MODEL)
9
- model.config
10
- """
11
- # pip install gguf
12
- from transformers import AutoTokenizer, AutoModelForCausalLM
13
-
14
- model_id = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
15
- filename = "tinyllama-1.1b-chat-v1.0.Q6_K.gguf"
16
-
17
- torch_dtype = torch.float32 # could be torch.float16 or torch.bfloat16 too
18
- tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
19
- model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename, torch_dtype=torch_dtype)
20
-
21
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -7,6 +7,7 @@ from starlette.responses import FileResponse
7
  class ConvertRequest(BaseModel):
8
  hf_model_name: str
9
  hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
 
10
 
11
  @asynccontextmanager
12
  async def lifespan(app:FastAPI):
 
7
  class ConvertRequest(BaseModel):
8
  hf_model_name: str
9
  hf_token: Optional[str] = Field(None, description="Hugging Face token for private models")
10
+ hf_push_repo: Optional[str] = Field(None, description="Hugging Face repo to push the converted model")
11
 
12
  @asynccontextmanager
13
  async def lifespan(app:FastAPI):