Spaces:

giulio98
/

beyondrag

Runtime error

giulio98 commited on Mar 28

Commit

ba89be7

verified ·

1 Parent(s): 06b7f61

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -30,15 +30,15 @@ from utils import (
 # Initialize the model and tokenizer.
 api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
-# model_name = "meta-llama/Llama-3.1-8B-Instruct"
-model_name = "google/gemma-3-27b-it"
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
-quantization_config = BitsAndBytesConfig(load_in_8bit=True)
-# model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
-model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = model.eval()
-# model.to(device)
 embedding_model = HuggingFaceBgeEmbeddings(
     model_name="BAAI/bge-large-en-v1.5",
     model_kwargs={"device": str(device)},
@@ -577,14 +577,14 @@ def chat_response_stream(message: str, history: list, state: dict, compression_d
         streamer=streamer,
         use_cache=True,
         max_new_tokens=1024,
-        # num_beams=1,
-        # do_sample=False,
-        # top_p=1.0,
-        # top_k=None,
         temperature=1.0,
-        top_k=64,
-        top_p=0.95,
-        min_p=0.0
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()

 # Initialize the model and tokenizer.
 api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
+model_name = "meta-llama/Llama-3.1-8B-Instruct"
+# model_name = "google/gemma-3-27b-it"
 tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
+# quantization_config = BitsAndBytesConfig(load_in_8bit=True)
+model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
+# model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = model.eval()
+model.to(device)
 embedding_model = HuggingFaceBgeEmbeddings(
     model_name="BAAI/bge-large-en-v1.5",
     model_kwargs={"device": str(device)},
         streamer=streamer,
         use_cache=True,
         max_new_tokens=1024,
+        num_beams=1,
+        do_sample=False,
+        top_p=1.0,
+        top_k=None,
         temperature=1.0,
+        # top_k=64,
+        # top_p=0.95,
+        # min_p=0.0
     )
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()