Wedyan2023 commited on
Commit
80a2299
·
verified ·
1 Parent(s): b51ed4e

Update app104.py

Browse files
Files changed (1) hide show
  1. app104.py +68 -68
app104.py CHANGED
@@ -244,84 +244,84 @@ with st.sidebar:
244
  mime="application/pdf"
245
  )
246
 
247
- # selected_model = st.selectbox(
248
- # "Select Model",
249
- # ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
250
- # "meta-llama/Llama-3.1-70B-Instruct"],
251
- # key='model_select'
252
- # )
253
 
254
  #################new oooo
255
 
256
- # Model selection dropdown
257
- selected_model = st.selectbox(
258
- "Select Model",
259
- [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
260
- "meta-llama/Llama-3.2-3B-Instruct",
261
- "meta-llama/Llama-3.3-70B-Instruct",
262
- "meta-llama/Llama-3.2-3B-Instruct",
263
- "meta-llama/Llama-4-Scout-17B-16E-Instruct",
264
- "meta-llama/Meta-Llama-3-8B-Instruct",
265
- "meta-llama/Llama-3.1-70B-Instruct"],
266
- key='model_select'
267
- )
268
 
269
- @st.cache_resource # Cache the model to prevent reloading
270
- def load_model(model_name):
271
- try:
272
- # Optimized model loading configuration
273
- model = AutoModelForCausalLM.from_pretrained(
274
- model_name,
275
- torch_dtype=torch.float16, # Use half precision
276
- device_map="auto", # Automatic device mapping
277
- load_in_8bit=True, # Enable 8-bit quantization
278
- low_cpu_mem_usage=True, # Optimize CPU memory usage
279
- max_memory={0: "10GB"} # Limit GPU memory usage
280
- )
281
 
282
- tokenizer = AutoTokenizer.from_pretrained(
283
- model_name,
284
- padding_side="left",
285
- truncation_side="left"
286
- )
287
 
288
- return model, tokenizer
289
 
290
- except Exception as e:
291
- st.error(f"Error loading model: {str(e)}")
292
- return None, None
293
-
294
- # Load the selected model with optimizations
295
- if selected_model:
296
- model, tokenizer = load_model(selected_model)
297
-
298
- # Check if model loaded successfully
299
- if model is not None:
300
- st.success(f"Successfully loaded {selected_model}")
301
- else:
302
- st.warning("Please select a different model or check your hardware capabilities")
303
-
304
- # Function to generate text
305
- def generate_response(prompt, model, tokenizer):
306
- try:
307
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
308
 
309
- with torch.no_grad():
310
- outputs = model.generate(
311
- inputs["input_ids"],
312
- max_length=256,
313
- num_return_sequences=1,
314
- temperature=0.7,
315
- do_sample=True,
316
- pad_token_id=tokenizer.pad_token_id
317
- )
318
 
319
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
320
- return response
321
 
322
- except Exception as e:
323
- return f"Error generating response: {str(e)}"
324
- ################
325
 
326
  # model = AutoModelForCausalLM.from_pretrained(
327
  # "meta-llama/Meta-Llama-3-8B-Instruct",
 
244
  mime="application/pdf"
245
  )
246
 
247
+ selected_model = st.selectbox(
248
+ "Select Model",
249
+ ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
250
+ "meta-llama/Llama-3.1-70B-Instruct"],
251
+ key='model_select'
252
+ )
253
 
254
  #################new oooo
255
 
256
+ # # Model selection dropdown
257
+ # selected_model = st.selectbox(
258
+ # "Select Model",
259
+ # [#"meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
260
+ # "meta-llama/Llama-3.2-3B-Instruct",
261
+ # "meta-llama/Llama-3.3-70B-Instruct",
262
+ # "meta-llama/Llama-3.2-3B-Instruct",
263
+ # "meta-llama/Llama-4-Scout-17B-16E-Instruct",
264
+ # "meta-llama/Meta-Llama-3-8B-Instruct",
265
+ # "meta-llama/Llama-3.1-70B-Instruct"],
266
+ # key='model_select'
267
+ # )
268
 
269
+ # @st.cache_resource # Cache the model to prevent reloading
270
+ # def load_model(model_name):
271
+ # try:
272
+ # # Optimized model loading configuration
273
+ # model = AutoModelForCausalLM.from_pretrained(
274
+ # model_name,
275
+ # torch_dtype=torch.float16, # Use half precision
276
+ # device_map="auto", # Automatic device mapping
277
+ # load_in_8bit=True, # Enable 8-bit quantization
278
+ # low_cpu_mem_usage=True, # Optimize CPU memory usage
279
+ # max_memory={0: "10GB"} # Limit GPU memory usage
280
+ # )
281
 
282
+ # tokenizer = AutoTokenizer.from_pretrained(
283
+ # model_name,
284
+ # padding_side="left",
285
+ # truncation_side="left"
286
+ # )
287
 
288
+ # return model, tokenizer
289
 
290
+ # except Exception as e:
291
+ # st.error(f"Error loading model: {str(e)}")
292
+ # return None, None
293
+
294
+ # # Load the selected model with optimizations
295
+ # if selected_model:
296
+ # model, tokenizer = load_model(selected_model)
297
+
298
+ # # Check if model loaded successfully
299
+ # if model is not None:
300
+ # st.success(f"Successfully loaded {selected_model}")
301
+ # else:
302
+ # st.warning("Please select a different model or check your hardware capabilities")
303
+
304
+ # # Function to generate text
305
+ # def generate_response(prompt, model, tokenizer):
306
+ # try:
307
+ # inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
308
 
309
+ # with torch.no_grad():
310
+ # outputs = model.generate(
311
+ # inputs["input_ids"],
312
+ # max_length=256,
313
+ # num_return_sequences=1,
314
+ # temperature=0.7,
315
+ # do_sample=True,
316
+ # pad_token_id=tokenizer.pad_token_id
317
+ # )
318
 
319
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
320
+ # return response
321
 
322
+ # except Exception as e:
323
+ # return f"Error generating response: {str(e)}"
324
+ # ################
325
 
326
  # model = AutoModelForCausalLM.from_pretrained(
327
  # "meta-llama/Meta-Llama-3-8B-Instruct",