Canstralian commited on
Commit
f067322
·
verified ·
1 Parent(s): 24ba913

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -20,14 +20,13 @@ from huggingface_hub import login
20
 
21
  login(token=hf_token)
22
 
23
-
24
  # Load config.yaml
25
  with open("config.yaml", "r") as file:
26
  config = yaml.safe_load(file)
27
 
28
  # Streamlit page configuration
29
  st.set_page_config(
30
- page_title="( -_•)▄︻テ═一💥 (´༎ຶٹ༎ຶ)NCTC OSINT AGENT BY TRHACKNON ╭∩╮( •̀_•́ )╭∩╮",
31
  page_icon="𓃮",
32
  )
33
 
@@ -41,10 +40,9 @@ html_title = '''
41
  color: #00008B; /* Deep blue color */
42
  font-size: 36px; /* Adjust font size as desired */
43
  font-weight: bold; /* Add boldness (optional) */
44
- /* Add other font styling here (optional) */
45
  }
46
  </style>
47
- <h1 class="stTitle">( -_•)▄︻テ═一💥(´༎ຶٹ༎ຶ)NCTC OSINT AGENT💥╾━╤デ╦︻(•̀⤙•́)</h1>
48
  '''
49
 
50
  # Display HTML title
@@ -91,7 +89,6 @@ def get_github_workflow_status(owner, repo):
91
  def fetch_page_title(url):
92
  try:
93
  response = requests.get(url)
94
- st.write(f"Fetching URL: {url} - Status Code: {response.status_code}")
95
  if response.status_code == 200:
96
  soup = BeautifulSoup(response.text, 'html.parser')
97
  title = soup.title.string if soup.title else 'No title found'
@@ -142,27 +139,32 @@ def main():
142
  # Dataset Upload & Model Fine-Tuning Section
143
  st.write("### Dataset Upload & Model Fine-Tuning")
144
  dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
 
145
  if dataset_file:
146
  df = pd.read_csv(dataset_file)
 
147
  st.dataframe(df.head())
148
 
 
149
  st.write("Select a model for fine-tuning:")
150
  model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
151
 
152
  if st.button("Fine-tune Model"):
153
  if dataset_file:
154
- dataset = Dataset.from_pandas(df)
155
- tokenizer = AutoTokenizer.from_pretrained(model_name)
156
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
157
-
158
- def tokenize_function(examples):
159
- return tokenizer(examples['text'], padding="max_length", truncation=True)
160
-
161
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
162
- training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
163
- trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
164
- trainer.train()
165
- st.write("Model fine-tuned successfully!")
 
 
166
 
167
  # Load and display OSINT dataset
168
  st.write("### OSINT Dataset")
 
20
 
21
  login(token=hf_token)
22
 
 
23
  # Load config.yaml
24
  with open("config.yaml", "r") as file:
25
  config = yaml.safe_load(file)
26
 
27
  # Streamlit page configuration
28
  st.set_page_config(
29
+ page_title="NCTC OSINT AGENT - Fine-tuning Models",
30
  page_icon="𓃮",
31
  )
32
 
 
40
  color: #00008B; /* Deep blue color */
41
  font-size: 36px; /* Adjust font size as desired */
42
  font-weight: bold; /* Add boldness (optional) */
 
43
  }
44
  </style>
45
+ <h1 class="stTitle">NCTC OSINT AGENT - Fine-tuning AI Models</h1>
46
  '''
47
 
48
  # Display HTML title
 
89
  def fetch_page_title(url):
90
  try:
91
  response = requests.get(url)
 
92
  if response.status_code == 200:
93
  soup = BeautifulSoup(response.text, 'html.parser')
94
  title = soup.title.string if soup.title else 'No title found'
 
139
  # Dataset Upload & Model Fine-Tuning Section
140
  st.write("### Dataset Upload & Model Fine-Tuning")
141
  dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
142
+
143
  if dataset_file:
144
  df = pd.read_csv(dataset_file)
145
+ st.write("Preview of the uploaded dataset:")
146
  st.dataframe(df.head())
147
 
148
+ # Select model for fine-tuning
149
  st.write("Select a model for fine-tuning:")
150
  model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
151
 
152
  if st.button("Fine-tune Model"):
153
  if dataset_file:
154
+ with st.spinner("Fine-tuning in progress..."):
155
+ dataset = Dataset.from_pandas(df)
156
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
157
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
158
+
159
+ def tokenize_function(examples):
160
+ return tokenizer(examples['text'], padding="max_length", truncation=True)
161
+
162
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
163
+ training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
164
+ trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
165
+ trainer.train()
166
+
167
+ st.success("Model fine-tuned successfully!")
168
 
169
  # Load and display OSINT dataset
170
  st.write("### OSINT Dataset")