Spaces:

lilmeaty
/

aws

Sleeping

App Files Files Community

Hjgugugjhuhjggg commited on Dec 6, 2024

Commit

f2e20dd

verified ·

1 Parent(s): 99862b8

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -9

app.py CHANGED Viewed

@@ -16,11 +16,12 @@ from transformers import (
     StoppingCriteriaList
 )
 import boto3
-from huggingface_hub import hf_hub_download
 import soundfile as sf
 import numpy as np
 import torch
 import uvicorn
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
@@ -64,6 +65,7 @@ class S3ModelLoader:
     def __init__(self, bucket_name, s3_client):
         self.bucket_name = bucket_name
         self.s3_client = s3_client
     def _get_s3_uri(self, model_name):
         return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"
@@ -72,9 +74,9 @@ class S3ModelLoader:
         s3_uri = self._get_s3_uri(model_name)
         try:
             logging.info(f"Trying to load {model_name} from S3...")
-            config = AutoConfig.from_pretrained(s3_uri)
-            model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config)
-            tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config)
             if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
                 tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
@@ -84,9 +86,18 @@ class S3ModelLoader:
         except EnvironmentError:
             logging.info(f"Model {model_name} not found in S3. Downloading...")
             try:
-                config = AutoConfig.from_pretrained(model_name)
-                tokenizer = AutoTokenizer.from_pretrained(model_name, config=config)
-                model = AutoModelForCausalLM.from_pretrained(model_name, config=config, token=HUGGINGFACE_HUB_TOKEN)
                 if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
                     tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
@@ -96,6 +107,9 @@ class S3ModelLoader:
                 model.save_pretrained(s3_uri)
                 tokenizer.save_pretrained(s3_uri)
                 logging.info(f"Saved {model_name} to S3 successfully.")
                 return model, tokenizer
             except Exception as e:
                 logging.exception(f"Error downloading/uploading model: {e}")
@@ -122,7 +136,7 @@ async def generate(request: Request, body: GenerateRequest):
                 top_k=validated_body.top_k,
                 repetition_penalty=validated_body.repetition_penalty,
                 do_sample=validated_body.do_sample,
-                num_return_sequences=validated_body.num_return_sequences
             )
             async def stream_text():
@@ -139,7 +153,6 @@ async def generate(request: Request, body: GenerateRequest):
                         break
                     generation_config.max_new_tokens = min(remaining_tokens, validated_body.max_new_tokens)
                     stopping_criteria = StoppingCriteriaList(
                         [lambda _, outputs: tokenizer.decode(outputs[0][-1], skip_special_tokens=True) in validated_body.stop_sequences] if validated_body.stop_sequences else []
                     )

     StoppingCriteriaList
 )
 import boto3
+from huggingface_hub import hf_hub_download, HfApi
 import soundfile as sf
 import numpy as np
 import torch
 import uvicorn
+import shutil
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s")
     def __init__(self, bucket_name, s3_client):
         self.bucket_name = bucket_name
         self.s3_client = s3_client
+        self.api = HfApi()
     def _get_s3_uri(self, model_name):
         return f"s3://{self.bucket_name}/{model_name.replace('/', '-')}"
         s3_uri = self._get_s3_uri(model_name)
         try:
             logging.info(f"Trying to load {model_name} from S3...")
+            config = AutoConfig.from_pretrained(s3_uri, local_files_only=False)
+            model = AutoModelForCausalLM.from_pretrained(s3_uri, config=config, local_files_only=False)
+            tokenizer = AutoTokenizer.from_pretrained(s3_uri, config=config, local_files_only=False)
             if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
                 tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
         except EnvironmentError:
             logging.info(f"Model {model_name} not found in S3. Downloading...")
             try:
+                model_info = self.api.model_info(model_name)
+                files_to_download = [f.rfilename for f in self.api.list_repo_files(model_name)]
+                temp_dir = "temp_model"
+                os.makedirs(temp_dir, exist_ok = True)
+                for file_name in files_to_download:
+                    hf_hub_download(repo_id=model_name, filename=file_name, local_dir=temp_dir, token=HUGGINGFACE_HUB_TOKEN)
+                config = AutoConfig.from_pretrained(temp_dir)
+                tokenizer = AutoTokenizer.from_pretrained(temp_dir, config=config)
+                model = AutoModelForCausalLM.from_pretrained(temp_dir, config=config)
                 if tokenizer.eos_token_id is not None and tokenizer.pad_token_id is None:
                     tokenizer.pad_token_id = config.pad_token_id or tokenizer.eos_token_id
                 model.save_pretrained(s3_uri)
                 tokenizer.save_pretrained(s3_uri)
                 logging.info(f"Saved {model_name} to S3 successfully.")
+                shutil.rmtree(temp_dir)
                 return model, tokenizer
             except Exception as e:
                 logging.exception(f"Error downloading/uploading model: {e}")
                 top_k=validated_body.top_k,
                 repetition_penalty=validated_body.repetition_penalty,
                 do_sample=validated_body.do_sample,
+                num_return_sequences=validated_body.num_return_sequences,
             )
             async def stream_text():
                         break
                     generation_config.max_new_tokens = min(remaining_tokens, validated_body.max_new_tokens)
                     stopping_criteria = StoppingCriteriaList(
                         [lambda _, outputs: tokenizer.decode(outputs[0][-1], skip_special_tokens=True) in validated_body.stop_sequences] if validated_body.stop_sequences else []
                     )