Somekindofa commited on
Commit
e507847
·
1 Parent(s): 23ff81d

test/ not loading LLM for faster build.

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -17,7 +17,6 @@ MAX_MAX_NEW_TOKENS = 4096
17
  DEFAULT_MAX_NEW_TOKENS = 2048
18
  # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
19
  MAX_INPUT_TOKEN_LENGTH = 100000
20
-
21
  DEFAULT_USER_QUERY = '''
22
  @prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
23
  @prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
@@ -793,8 +792,8 @@ DEFAULT_USER_QUERY = '''
793
 
794
  ### Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
795
  '''
796
-
797
  DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
 
798
  class HuggingFaceLogin:
799
  """Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
800
  def __init__(self, env_token_key: str = "HF_TOKEN"):
@@ -835,11 +834,11 @@ model_config = BitsAndBytesConfig(
835
  bnb_4bit_compute_dtype=torch.float16
836
  )
837
 
838
- if torch.cuda.is_available():
839
- model_id = "meta-llama/Llama-3.1-8B-Instruct"
840
- model = AutoModelForCausalLM.from_pretrained(model_id,
841
- device_map="auto")
842
- tokenizer = AutoTokenizer.from_pretrained(model_id)
843
 
844
  @spaces.GPU
845
  def generate(
 
17
  DEFAULT_MAX_NEW_TOKENS = 2048
18
  # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
19
  MAX_INPUT_TOKEN_LENGTH = 100000
 
20
  DEFAULT_USER_QUERY = '''
21
  @prefix : <urn:webprotege:ontology:7272b2af-011f-4d40-8519-9fc3f830442e#> .
22
  @prefix geo: <http://www.w3.org/2003/01/geo/wgs84_pos#> .
 
792
 
793
  ### Generated by the OWL API (version 4.5.13) https://github.com/owlcs/owlapi
794
  '''
 
795
  DEFAULT_SYSTEM_PROMPT = "You answer the User's questions about the topic at hand."
796
+
797
  class HuggingFaceLogin:
798
  """Handles authentication to the Hugging Face Hub using environment variables or explicit tokens."""
799
  def __init__(self, env_token_key: str = "HF_TOKEN"):
 
834
  bnb_4bit_compute_dtype=torch.float16
835
  )
836
 
837
+ # if torch.cuda.is_available():
838
+ # model_id = "meta-llama/Llama-3.1-8B-Instruct"
839
+ # model = AutoModelForCausalLM.from_pretrained(model_id,
840
+ # device_map="auto")
841
+ # tokenizer = AutoTokenizer.from_pretrained(model_id)
842
 
843
  @spaces.GPU
844
  def generate(