Spaces:
Runtime error
Runtime error
binqiangliu
commited on
Commit
·
498ddeb
1
Parent(s):
a837a98
Update app.py
Browse files
app.py
CHANGED
@@ -1,11 +1,9 @@
|
|
1 |
# import dependencies
|
2 |
import torch
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
|
4 |
-
|
5 |
import os
|
6 |
import gradio as gr
|
7 |
#from google.colab import drive
|
8 |
-
|
9 |
import chromadb
|
10 |
from langchain.llms import HuggingFacePipeline
|
11 |
from langchain.document_loaders import TextLoader
|
@@ -17,6 +15,9 @@ from langchain.document_loaders import PyPDFDirectoryLoader
|
|
17 |
from langchain.chains import ConversationalRetrievalChain
|
18 |
from langchain.memory import ConversationBufferMemory
|
19 |
|
|
|
|
|
|
|
20 |
# specify model huggingface mode name
|
21 |
model_name = "anakin87/zephyr-7b-alpha-sharded"
|
22 |
#https://huggingface.co/anakin87/zephyr-7b-alpha-sharded
|
@@ -31,10 +32,11 @@ def load_quantized_model(model_name: str):
|
|
31 |
:return: Loaded quantized model.
|
32 |
"""
|
33 |
bnb_config = BitsAndBytesConfig(
|
34 |
-
load_in_4bit=True,
|
|
|
35 |
#bnb_4bit_use_double_quant=True,
|
36 |
bnb_4bit_use_double_quant=False,
|
37 |
-
bnb_4bit_quant_type="nf4"
|
38 |
#bnb_4bit_compute_dtype=torch.bfloat16
|
39 |
)
|
40 |
|
@@ -42,7 +44,6 @@ def load_quantized_model(model_name: str):
|
|
42 |
model_name,
|
43 |
load_in_4bit=True,
|
44 |
#torch_dtype=torch.bfloat16,
|
45 |
-
#torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
46 |
quantization_config=bnb_config
|
47 |
)
|
48 |
return model
|
@@ -57,7 +58,7 @@ def initialize_tokenizer(model_name: str):
|
|
57 |
"""
|
58 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
59 |
tokenizer.bos_token_id = 1 # Set beginning of sentence token id
|
60 |
-
return tokenizer
|
61 |
|
62 |
# load model
|
63 |
model = load_quantized_model(model_name)
|
@@ -125,7 +126,6 @@ def create_conversation(query: str, chat_history: list) -> tuple:
|
|
125 |
chat_history.append((query, result['answer']))
|
126 |
return '', chat_history
|
127 |
|
128 |
-
|
129 |
except Exception as e:
|
130 |
chat_history.append((query, e))
|
131 |
return '', chat_history
|
|
|
1 |
# import dependencies
|
2 |
import torch
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
|
|
|
4 |
import os
|
5 |
import gradio as gr
|
6 |
#from google.colab import drive
|
|
|
7 |
import chromadb
|
8 |
from langchain.llms import HuggingFacePipeline
|
9 |
from langchain.document_loaders import TextLoader
|
|
|
15 |
from langchain.chains import ConversationalRetrievalChain
|
16 |
from langchain.memory import ConversationBufferMemory
|
17 |
|
18 |
+
#import locale
|
19 |
+
#locale.getpreferredencoding = lambda: "UTF-8"
|
20 |
+
|
21 |
# specify model huggingface mode name
|
22 |
model_name = "anakin87/zephyr-7b-alpha-sharded"
|
23 |
#https://huggingface.co/anakin87/zephyr-7b-alpha-sharded
|
|
|
32 |
:return: Loaded quantized model.
|
33 |
"""
|
34 |
bnb_config = BitsAndBytesConfig(
|
35 |
+
#load_in_4bit=True,
|
36 |
+
load_in_4bit=False,
|
37 |
#bnb_4bit_use_double_quant=True,
|
38 |
bnb_4bit_use_double_quant=False,
|
39 |
+
bnb_4bit_quant_type="nf4"
|
40 |
#bnb_4bit_compute_dtype=torch.bfloat16
|
41 |
)
|
42 |
|
|
|
44 |
model_name,
|
45 |
load_in_4bit=True,
|
46 |
#torch_dtype=torch.bfloat16,
|
|
|
47 |
quantization_config=bnb_config
|
48 |
)
|
49 |
return model
|
|
|
58 |
"""
|
59 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
60 |
tokenizer.bos_token_id = 1 # Set beginning of sentence token id
|
61 |
+
return tokenizer
|
62 |
|
63 |
# load model
|
64 |
model = load_quantized_model(model_name)
|
|
|
126 |
chat_history.append((query, result['answer']))
|
127 |
return '', chat_history
|
128 |
|
|
|
129 |
except Exception as e:
|
130 |
chat_history.append((query, e))
|
131 |
return '', chat_history
|