Wisdom Chen commited on
Commit
03d07e9
·
unverified ·
1 Parent(s): c0a17cf

Update model.py

Browse files
Files changed (1) hide show
  1. model.py +12 -62
model.py CHANGED
@@ -47,67 +47,10 @@ embeddings_df: Optional[pd.DataFrame] = None
47
  text_faiss: Optional[object] = None
48
  image_faiss: Optional[object] = None
49
 
50
- # def initialize_models() -> bool:
51
- # global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
52
-
53
- # try:
54
- # print(f"Initializing models on device: {device}")
55
-
56
- # # Initialize CLIP model with error handling
57
- # try:
58
- # clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
59
- # 'hf-hub:Marqo/marqo-fashionCLIP'
60
- # )
61
- # clip_model = clip_model.to(device)
62
- # clip_model.eval()
63
- # clip_tokenizer = open_clip.get_tokenizer('hf-hub:Marqo/marqo-fashionCLIP')
64
- # print("CLIP model initialized successfully")
65
- # except Exception as e:
66
- # raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
67
-
68
- # # Initialize LLM with optimized settings
69
- # try:
70
- # model_name = "mistralai/Mistral-7B-v0.1"
71
- # quantization_config = BitsAndBytesConfig(
72
- # load_in_4bit=True,
73
- # bnb_4bit_compute_dtype=torch.float16,
74
- # bnb_4bit_use_double_quant=True,
75
- # bnb_4bit_quant_type="nf4"
76
- # )
77
-
78
- # # Get token from Streamlit secrets
79
- # hf_token = st.secrets["HUGGINGFACE_TOKEN"]
80
-
81
- # llm_tokenizer = AutoTokenizer.from_pretrained(
82
- # model_name,
83
- # padding_side="left",
84
- # truncation_side="left",
85
- # token=hf_token # Add token here
86
- # )
87
- # llm_tokenizer.pad_token = llm_tokenizer.eos_token
88
-
89
- # llm_model = AutoModelForCausalLM.from_pretrained(
90
- # model_name,
91
- # quantization_config=quantization_config,
92
- # device_map="auto",
93
- # torch_dtype=torch.float16,
94
- # token=hf_token # Add token here
95
- # )
96
- # llm_model.eval()
97
- # print("LLM initialized successfully")
98
- # except Exception as e:
99
- # raise RuntimeError(f"Failed to initialize LLM: {str(e)}")
100
-
101
- # return True
102
-
103
- # except Exception as e:
104
- # raise RuntimeError(f"Model initialization failed: {str(e)}")
105
-
106
  def initialize_models() -> bool:
107
  global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
108
 
109
  try:
110
- device = "cpu" # Force CPU usage for Streamlit Cloud
111
  print(f"Initializing models on device: {device}")
112
 
113
  # Initialize CLIP model with error handling
@@ -122,10 +65,16 @@ def initialize_models() -> bool:
122
  except Exception as e:
123
  raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
124
 
125
- # Initialize LLM with CPU-compatible settings
126
  try:
127
  model_name = "mistralai/Mistral-7B-v0.1"
128
-
 
 
 
 
 
 
129
  # Get token from Streamlit secrets
130
  hf_token = st.secrets["HUGGINGFACE_TOKEN"]
131
 
@@ -133,15 +82,16 @@ def initialize_models() -> bool:
133
  model_name,
134
  padding_side="left",
135
  truncation_side="left",
136
- token=hf_token
137
  )
138
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
139
 
140
  llm_model = AutoModelForCausalLM.from_pretrained(
141
  model_name,
 
142
  device_map="auto",
143
- low_cpu_mem_usage=True,
144
- token=hf_token
145
  )
146
  llm_model.eval()
147
  print("LLM initialized successfully")
 
47
  text_faiss: Optional[object] = None
48
  image_faiss: Optional[object] = None
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  def initialize_models() -> bool:
51
  global clip_model, clip_preprocess, clip_tokenizer, llm_tokenizer, llm_model, device
52
 
53
  try:
 
54
  print(f"Initializing models on device: {device}")
55
 
56
  # Initialize CLIP model with error handling
 
65
  except Exception as e:
66
  raise RuntimeError(f"Failed to initialize CLIP model: {str(e)}")
67
 
68
+ # Initialize LLM with optimized settings
69
  try:
70
  model_name = "mistralai/Mistral-7B-v0.1"
71
+ quantization_config = BitsAndBytesConfig(
72
+ load_in_4bit=True,
73
+ bnb_4bit_compute_dtype=torch.float16,
74
+ bnb_4bit_use_double_quant=True,
75
+ bnb_4bit_quant_type="nf4"
76
+ )
77
+
78
  # Get token from Streamlit secrets
79
  hf_token = st.secrets["HUGGINGFACE_TOKEN"]
80
 
 
82
  model_name,
83
  padding_side="left",
84
  truncation_side="left",
85
+ token=hf_token # Add token here
86
  )
87
  llm_tokenizer.pad_token = llm_tokenizer.eos_token
88
 
89
  llm_model = AutoModelForCausalLM.from_pretrained(
90
  model_name,
91
+ quantization_config=quantization_config,
92
  device_map="auto",
93
+ torch_dtype=torch.float16,
94
+ token=hf_token # Add token here
95
  )
96
  llm_model.eval()
97
  print("LLM initialized successfully")