giulio98 commited on
Commit
ba89be7
·
verified ·
1 Parent(s): 06b7f61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -30,15 +30,15 @@ from utils import (
30
 
31
  # Initialize the model and tokenizer.
32
  api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
33
- # model_name = "meta-llama/Llama-3.1-8B-Instruct"
34
- model_name = "google/gemma-3-27b-it"
35
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
36
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
37
- # model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
38
- model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
39
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
  model = model.eval()
41
- # model.to(device)
42
  embedding_model = HuggingFaceBgeEmbeddings(
43
  model_name="BAAI/bge-large-en-v1.5",
44
  model_kwargs={"device": str(device)},
@@ -577,14 +577,14 @@ def chat_response_stream(message: str, history: list, state: dict, compression_d
577
  streamer=streamer,
578
  use_cache=True,
579
  max_new_tokens=1024,
580
- # num_beams=1,
581
- # do_sample=False,
582
- # top_p=1.0,
583
- # top_k=None,
584
  temperature=1.0,
585
- top_k=64,
586
- top_p=0.95,
587
- min_p=0.0
588
  )
589
  t = Thread(target=model.generate, kwargs=generate_kwargs)
590
  t.start()
 
30
 
31
  # Initialize the model and tokenizer.
32
  api_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
33
+ model_name = "meta-llama/Llama-3.1-8B-Instruct"
34
+ # model_name = "google/gemma-3-27b-it"
35
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token)
36
+ # quantization_config = BitsAndBytesConfig(load_in_8bit=True)
37
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token, torch_dtype=torch.float16)
38
+ # model = Gemma3ForCausalLM.from_pretrained(model_name, token=api_token, quantization_config=quantization_config, torch_dtype="auto")
39
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
40
  model = model.eval()
41
+ model.to(device)
42
  embedding_model = HuggingFaceBgeEmbeddings(
43
  model_name="BAAI/bge-large-en-v1.5",
44
  model_kwargs={"device": str(device)},
 
577
  streamer=streamer,
578
  use_cache=True,
579
  max_new_tokens=1024,
580
+ num_beams=1,
581
+ do_sample=False,
582
+ top_p=1.0,
583
+ top_k=None,
584
  temperature=1.0,
585
+ # top_k=64,
586
+ # top_p=0.95,
587
+ # min_p=0.0
588
  )
589
  t = Thread(target=model.generate, kwargs=generate_kwargs)
590
  t.start()