Spaces:
Runtime error
Runtime error
changed to llama 3.1 model
Browse files
app.py
CHANGED
@@ -42,7 +42,7 @@ h1 {
|
|
42 |
"""
|
43 |
|
44 |
# Load the tokenizer and model with quantization
|
45 |
-
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
46 |
bnb_config = BitsAndBytesConfig(
|
47 |
load_in_4bit=True,
|
48 |
bnb_4bit_use_double_quant=True,
|
@@ -241,7 +241,7 @@ def chat_llama3_8b(message: str, history: list, temperature: float, max_new_toke
|
|
241 |
responses = []
|
242 |
count=0
|
243 |
for chunk in chunks:
|
244 |
-
logger.info(f"Processing chunk {count+1}/{len(
|
245 |
response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
|
246 |
responses.append(response)
|
247 |
count+=1
|
|
|
42 |
"""
|
43 |
|
44 |
# Load the tokenizer and model with quantization
|
45 |
+
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
|
46 |
bnb_config = BitsAndBytesConfig(
|
47 |
load_in_4bit=True,
|
48 |
bnb_4bit_use_double_quant=True,
|
|
|
241 |
responses = []
|
242 |
count=0
|
243 |
for chunk in chunks:
|
244 |
+
logger.info(f"Processing chunk {count+1}/{len(chunk)}")
|
245 |
response = generate_response_for_chunk(chunk, history, temperature, max_new_tokens)
|
246 |
responses.append(response)
|
247 |
count+=1
|