Somekindofa
commited on
Commit
·
e507847
1
Parent(s):
23ff81d
test/ not loading LLM for faster build.
Browse files
app.py
CHANGED
@@ -17,7 +17,6 @@ MAX_MAX_NEW_TOKENS = 4096
|
|
17 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
18 |
# MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
19 |
MAX_INPUT_TOKEN_LENGTH = 100000
|
20 |
-
|
21 |
DEFAULT_USER_QUERY = '''
|
22 |
@prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
|
23 |
@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
|
@@ -793,8 +792,8 @@ DEFAULT_USER_QUERY = '''
|
|
793 |
|
794 |
### Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
|
795 |
'''
|
796 |
-
|
797 |
DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
|
|
|
798 |
class HuggingFaceLogin:
|
799 |
"""Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
|
800 |
def __init__(self, env_token_key: str = "HF_TOKEN"):
|
@@ -835,11 +834,11 @@ model_config = BitsAndBytesConfig(
|
|
835 |
bnb_4bit_compute_dtype=torch.float16
|
836 |
)
|
837 |
|
838 |
-
if torch.cuda.is_available():
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
|
844 |
@spaces.GPU
|
845 |
def generate(
|
|
|
17 |
DEFAULT_MAX_NEW_TOKENS = 2048
|
18 |
# MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
19 |
MAX_INPUT_TOKEN_LENGTH = 100000
|
|
|
20 |
DEFAULT_USER_QUERY = '''
|
21 |
@prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
|
22 |
@prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
|
|
|
792 |
|
793 |
### Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
|
794 |
'''
|
|
|
795 |
DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
|
796 |
+
|
797 |
class HuggingFaceLogin:
|
798 |
"""Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
|
799 |
def __init__(self, env_token_key: str = "HF_TOKEN"):
|
|
|
834 |
bnb_4bit_compute_dtype=torch.float16
|
835 |
)
|
836 |
|
837 |
+
# if torch.cuda.is_available():
|
838 |
+
# model_id = "meta-llama/Llama-3.1-8B-Instruct"
|
839 |
+
# model = AutoModelForCausalLM.from_pretrained(model_id,
|
840 |
+
# device_map="auto")
|
841 |
+
# tokenizer = AutoTokenizer.from_pretrained(model_id)
|
842 |
|
843 |
@spaces.GPU
|
844 |
def generate(
|