Spaces:
Runtime error
Runtime error
BitsAndBytesConfig removed
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from langchain.memory import ConversationBufferMemory
|
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
from transformers import pipeline
|
10 |
from langchain import HuggingFacePipeline
|
11 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM,
|
12 |
import torch
|
13 |
|
14 |
|
@@ -25,15 +25,10 @@ def init():
|
|
25 |
def init_llm_pipeline():
|
26 |
if "llm" not in st.session_state:
|
27 |
model_id = "bigcode/starcoder2-15b"
|
28 |
-
|
29 |
-
load_in_4bit=True,
|
30 |
-
bnb_4bit_compute_dtype=torch.float16
|
31 |
-
)
|
32 |
-
|
33 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
34 |
model = AutoModelForCausalLM.from_pretrained(
|
35 |
model_id,
|
36 |
-
quantization_config=quantization_config,
|
37 |
device_map="auto",
|
38 |
)
|
39 |
tokenizer.add_eos_token = True
|
@@ -44,7 +39,7 @@ def init_llm_pipeline():
|
|
44 |
model=model,
|
45 |
tokenizer=tokenizer,
|
46 |
task="text-generation",
|
47 |
-
temperature=0.
|
48 |
repetition_penalty=1.1,
|
49 |
return_full_text=True,
|
50 |
max_new_tokens=300,
|
|
|
8 |
from langchain.chains import ConversationalRetrievalChain
|
9 |
from transformers import pipeline
|
10 |
from langchain import HuggingFacePipeline
|
11 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
12 |
import torch
|
13 |
|
14 |
|
|
|
25 |
def init_llm_pipeline():
|
26 |
if "llm" not in st.session_state:
|
27 |
model_id = "bigcode/starcoder2-15b"
|
28 |
+
|
|
|
|
|
|
|
|
|
29 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
model_id,
|
|
|
32 |
device_map="auto",
|
33 |
)
|
34 |
tokenizer.add_eos_token = True
|
|
|
39 |
model=model,
|
40 |
tokenizer=tokenizer,
|
41 |
task="text-generation",
|
42 |
+
temperature=0.2,
|
43 |
repetition_penalty=1.1,
|
44 |
return_full_text=True,
|
45 |
max_new_tokens=300,
|