Wedyan2023 commited on
Commit
0b8c1dd
·
verified ·
1 Parent(s): bca2f9e

Update app104.py

Browse files
Files changed (1) hide show
  1. app104.py +90 -6
app104.py CHANGED
@@ -12,6 +12,9 @@ from openai import OpenAI
12
  from dotenv import load_dotenv
13
  import warnings
14
 
 
 
 
15
  warnings.filterwarnings('ignore')
16
 
17
  os.getenv("OAUTH_CLIENT_ID")
@@ -23,6 +26,79 @@ client = OpenAI(
23
  base_url="https://api-inference.huggingface.co/v1",
24
  api_key=os.environ.get('TOKEN2') # Hugging Face API token
25
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  ####new
28
  # from openai import OpenAI
@@ -168,13 +244,21 @@ with st.sidebar:
168
  mime="application/pdf"
169
  )
170
 
171
- selected_model = st.selectbox(
172
- "Select Model",
173
- ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
174
- "meta-llama/Llama-3.1-70B-Instruct"],
175
- key='model_select'
176
- )
 
177
 
 
 
 
 
 
 
 
178
  temperature = st.slider(
179
  "Temperature",
180
  0.0, 1.0, 0.7,
 
12
  from dotenv import load_dotenv
13
  import warnings
14
 
15
+ from transformers import AutoModelForCausalLM, AutoTokenizer
16
+ import torch
17
+
18
  warnings.filterwarnings('ignore')
19
 
20
  os.getenv("OAUTH_CLIENT_ID")
 
26
  base_url="https://api-inference.huggingface.co/v1",
27
  api_key=os.environ.get('TOKEN2') # Hugging Face API token
28
  )
29
+ ##########################################################3
30
+ # import streamlit as st
31
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
32
+ # import torch
33
+
34
+ # Model selection dropdown
35
+ selected_model = st.selectbox(
36
+ "Select Model",
37
+ ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo",
38
+ "meta-llama/Llama-3.3-70B-Instruct",
39
+ "meta-llama/Llama-3.2-3B-Instruct",
40
+ "meta-llama/Llama-4-Scout-17B-16E-Instruct",
41
+ "meta-llama/Meta-Llama-3-8B-Instruct",
42
+ "meta-llama/Llama-3.1-70B-Instruct"],
43
+ key='model_select'
44
+ )
45
+
46
+ @st.cache_resource # Cache the model to prevent reloading
47
+ def load_model(model_name):
48
+ try:
49
+ # Optimized model loading configuration
50
+ model = AutoModelForCausalLM.from_pretrained(
51
+ model_name,
52
+ torch_dtype=torch.float16, # Use half precision
53
+ device_map="auto", # Automatic device mapping
54
+ load_in_8bit=True, # Enable 8-bit quantization
55
+ low_cpu_mem_usage=True, # Optimize CPU memory usage
56
+ max_memory={0: "10GB"} # Limit GPU memory usage
57
+ )
58
+
59
+ tokenizer = AutoTokenizer.from_pretrained(
60
+ model_name,
61
+ padding_side="left",
62
+ truncation_side="left"
63
+ )
64
+
65
+ return model, tokenizer
66
+
67
+ except Exception as e:
68
+ st.error(f"Error loading model: {str(e)}")
69
+ return None, None
70
+
71
+ # Load the selected model with optimizations
72
+ if selected_model:
73
+ model, tokenizer = load_model(selected_model)
74
+
75
+ # Check if model loaded successfully
76
+ if model is not None:
77
+ st.success(f"Successfully loaded {selected_model}")
78
+ else:
79
+ st.warning("Please select a different model or check your hardware capabilities")
80
+
81
+ # Function to generate text
82
+ def generate_response(prompt, model, tokenizer):
83
+ try:
84
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
85
+
86
+ with torch.no_grad():
87
+ outputs = model.generate(
88
+ inputs["input_ids"],
89
+ max_length=256,
90
+ num_return_sequences=1,
91
+ temperature=0.7,
92
+ do_sample=True,
93
+ pad_token_id=tokenizer.pad_token_id
94
+ )
95
+
96
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
97
+ return response
98
+
99
+ except Exception as e:
100
+ return f"Error generating response: {str(e)}"
101
+ ############################################################
102
 
103
  ####new
104
  # from openai import OpenAI
 
244
  mime="application/pdf"
245
  )
246
 
247
+ # selected_model = st.selectbox(
248
+ # "Select Model",
249
+ # ["meta-llama/Meta-Llama-3-8B-Instruct-Turbo", "meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
250
+ # "meta-llama/Llama-3.1-70B-Instruct"],
251
+ # key='model_select'
252
+ # )
253
+
254
 
255
+
256
+ # model = AutoModelForCausalLM.from_pretrained(
257
+ # "meta-llama/Meta-Llama-3-8B-Instruct",
258
+ # torch_dtype=torch.float16, # Use half precision
259
+ # device_map="auto", # Automatic device mapping
260
+ # load_in_8bit=True # Load in 8-bit precision
261
+ # )
262
  temperature = st.slider(
263
  "Temperature",
264
  0.0, 1.0, 0.7,