Spaces:

Somekindofathing
/

ontology-individuals-filler

Paused

Somekindofa commited on Feb 12

Commit

e507847

1 Parent(s): 23ff81d

test/ not loading LLM for faster build.

Files changed (1) hide show

app.py CHANGED Viewed

@@ -17,7 +17,6 @@ MAX_MAX_NEW_TOKENS = 4096
 DEFAULT_MAX_NEW_TOKENS = 2048
 # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 MAX_INPUT_TOKEN_LENGTH = 100000
 DEFAULT_USER_QUERY = '''
 @prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
 @prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@@ -793,8 +792,8 @@ DEFAULT_USER_QUERY = '''
 ###  Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
 '''
 DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
 class HuggingFaceLogin:
     """Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
     def __init__(self, env_token_key: str = "HF_TOKEN"):
@@ -835,11 +834,11 @@ model_config = BitsAndBytesConfig(
     bnb_4bit_compute_dtype=torch.float16
 )
-if torch.cuda.is_available():
-    model_id = "meta-llama/Llama-3.1-8B-Instruct"
-    model = AutoModelForCausalLM.from_pretrained(model_id,
-                                                 device_map="auto")
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
 def generate(

 DEFAULT_MAX_NEW_TOKENS = 2048
 # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 MAX_INPUT_TOKEN_LENGTH = 100000
 DEFAULT_USER_QUERY = '''
 @prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
 @prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
 ###  Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
 '''
 DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
 class HuggingFaceLogin:
     """Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
     def __init__(self, env_token_key: str = "HF_TOKEN"):
     bnb_4bit_compute_dtype=torch.float16
 )
+# if torch.cuda.is_available():
+#     model_id = "meta-llama/Llama-3.1-8B-Instruct"
+#     model = AutoModelForCausalLM.from_pretrained(model_id,
+#                                                  device_map="auto")
+#     tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
 def generate(