Canstralian commited on
Commit
151aa67
·
verified ·
1 Parent(s): eaf7e5a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -174
app.py CHANGED
@@ -1,178 +1,114 @@
1
- import yaml
2
- import huggingface_hub
3
- import requests
4
- from bs4 import BeautifulSoup
5
- import pandas as pd
6
- from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
7
- from datasets import load_dataset, Dataset
8
- from components.sidebar import sidebar
9
- from components.chat_box import chat_box
10
- from components.chat_loop import chat_loop
11
- from components.init_state import init_state
12
- from components.prompt_engineering_dashboard import prompt_engineering_dashboard
13
  import streamlit as st
14
-
15
- # Access the Hugging Face token
16
- hf_token = st.secrets["HF_TOKEN"]
17
-
18
- # Example usage: if you're using the Hugging Face API
19
- from huggingface_hub import login
20
-
21
- login(token=hf_token)
22
-
23
- # Load config.yaml
24
- with open("config.yaml", "r") as file:
25
- config = yaml.safe_load(file)
26
-
27
- # Streamlit page configuration
28
- st.set_page_config(
29
- page_title="NCTC OSINT AGENT - Fine-tuning Models",
30
- page_icon="𓃮",
31
- )
32
-
33
- # Initialize session state
34
- init_state(st.session_state, config)
35
-
36
- # Custom HTML for title styling
37
- html_title = '''
38
- <style>
39
- .stTitle {
40
- color: #00008B; /* Deep blue color */
41
- font-size: 36px; /* Adjust font size as desired */
42
- font-weight: bold; /* Add boldness (optional) */
43
- }
44
- </style>
45
- <h1 class="stTitle">NCTC OSINT AGENT - Fine-tuning AI Models</h1>
46
- '''
47
-
48
- # Display HTML title
49
- st.write(html_title, unsafe_allow_html=True)
50
-
51
- # OSINT functions
52
- def get_github_stars_forks(owner, repo):
53
- url = f"https://api.github.com/repos/{owner}/{repo}"
54
- response = requests.get(url)
55
- data = response.json()
56
- return data['stargazers_count'], data['forks_count']
57
-
58
- def get_github_issues(owner, repo):
59
- url = f"https://api.github.com/repos/{owner}/{repo}/issues"
60
- response = requests.get(url)
61
- issues = response.json()
62
- return len(issues)
63
-
64
- def get_github_pull_requests(owner, repo):
65
- url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
66
- response = requests.get(url)
67
- pulls = response.json()
68
- return len(pulls)
69
-
70
- def get_github_license(owner, repo):
71
- url = f"https://api.github.com/repos/{owner}/{repo}/license"
72
- response = requests.get(url)
73
- data = response.json()
74
- return data['license']['name']
75
-
76
- def get_last_commit(owner, repo):
77
- url = f"https://api.github.com/repos/{owner}/{repo}/commits"
78
- response = requests.get(url)
79
- commits = response.json()
80
- return commits[0]['commit']['committer']['date']
81
-
82
- def get_github_workflow_status(owner, repo):
83
- url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
84
- response = requests.get(url)
85
- runs = response.json()
86
- return runs['workflow_runs'][0]['status'] if runs['workflow_runs'] else "No workflows found"
87
-
88
- # Function to fetch page title from a URL
89
- def fetch_page_title(url):
90
  try:
91
- response = requests.get(url)
92
- if response.status_code == 200:
93
- soup = BeautifulSoup(response.text, 'html.parser')
94
- title = soup.title.string if soup.title else 'No title found'
95
- return title
96
- else:
97
- return f"Error: Received status code {response.status_code}"
98
  except Exception as e:
99
- return f"An error occurred: {e}"
100
-
101
- # Main Streamlit app
102
- def main():
103
- # Display Prompt Engineering Dashboard (testing phase)
104
- prompt_engineering_dashboard(st.session_state, config)
105
-
106
- # Display sidebar and chat box
107
- sidebar(st.session_state, config)
108
- chat_box(st.session_state, config)
109
- chat_loop(st.session_state, config)
110
-
111
- # GitHub OSINT Analysis
112
- st.write("### GitHub Repository OSINT Analysis")
113
- st.write("Enter the GitHub repository owner and name:")
114
-
115
- owner = st.text_input("Repository Owner")
116
- repo = st.text_input("Repository Name")
117
-
118
- if owner and repo:
119
- stars, forks = get_github_stars_forks(owner, repo)
120
- open_issues = get_github_issues(owner, repo)
121
- open_pulls = get_github_pull_requests(owner, repo)
122
- license_type = get_github_license(owner, repo)
123
- last_commit = get_last_commit(owner, repo)
124
- workflow_status = get_github_workflow_status(owner, repo)
125
-
126
- st.write(f"Stars: {stars}, Forks: {forks}")
127
- st.write(f"Open Issues: {open_issues}, Open Pull Requests: {open_pulls}")
128
- st.write(f"License: {license_type}")
129
- st.write(f"Last Commit: {last_commit}")
130
- st.write(f"Workflow Status: {workflow_status}")
131
-
132
- # URL Title Fetcher
133
- st.write("### URL Title Fetcher")
134
- url = st.text_input("Enter a URL to fetch its title:")
135
- if url:
136
- title = fetch_page_title(url)
137
- st.write(f"Title: {title}")
138
-
139
- # Dataset Upload & Model Fine-Tuning Section
140
- st.write("### Dataset Upload & Model Fine-Tuning")
141
- dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
142
-
143
- if dataset_file:
144
- df = pd.read_csv(dataset_file)
145
- st.write("Preview of the uploaded dataset:")
146
- st.dataframe(df.head())
147
-
148
- # Select model for fine-tuning
149
- st.write("Select a model for fine-tuning:")
150
- model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
151
-
152
- if st.button("Fine-tune Model"):
153
- if dataset_file:
154
- with st.spinner("Fine-tuning in progress..."):
155
- dataset = Dataset.from_pandas(df)
156
- tokenizer = AutoTokenizer.from_pretrained(model_name)
157
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
158
-
159
- def tokenize_function(examples):
160
- return tokenizer(examples['text'], padding="max_length", truncation=True)
161
-
162
- tokenized_datasets = dataset.map(tokenize_function, batched=True)
163
- training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
164
- trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
165
- trainer.train()
166
-
167
- st.success("Model fine-tuned successfully!")
168
-
169
- # Load and display OSINT dataset
170
- st.write("### OSINT Dataset")
171
- dataset = load_dataset("originalbox/osint") # Replace with the correct dataset name
172
 
173
- # Convert to pandas DataFrame for display
174
- df = dataset['train'].to_pandas() # Make sure to use the appropriate split ('train', 'test', etc.)
175
- st.write(df.head())
176
-
177
- if __name__ == "__main__":
178
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
+ from typing import List, Dict
4
+ import os
5
+
6
+ # Initialize the Hugging Face pipeline (ensure to use a valid model)
7
+ model_name = "your_huggingface_model_name" # Ensure to use a valid model
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ try:
10
+ model = AutoModelForCausalLM.from_pretrained(model_name)
11
+ generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
12
+ except Exception as e:
13
+ st.error(f"Error initializing the model '{model_name}': {e}")
14
+
15
+ # Function to generate OSINT results
16
+ def generate_osint_results(prompt: str, history: List[Dict[str, str]]) -> List[str]:
17
+ """
18
+ Simulates generating OSINT-based results from the user's input.
19
+ Args:
20
+ prompt (str): The user's input to the simulator.
21
+ history (List[Dict]): The user's message history with timestamps.
22
+ Returns:
23
+ List[str]: A list of OSINT responses from the AI.
24
+ """
25
+ # Validate inputs
26
+ if not prompt.strip():
27
+ return ["Error: Prompt cannot be empty."]
28
+ if not isinstance(history, list) or not all(isinstance(h, dict) for h in history):
29
+ return ["Error: History must be a list of dictionaries."]
30
+
31
+ # Prepare messages for the AI
32
+ messages = [{"role": "system", "content": f"Responding to OSINT prompt: {prompt}"}]
33
+ for val in history:
34
+ if "user" in val:
35
+ messages.append({"role": "user", "content": val["user"]})
36
+ if "assistant" in val:
37
+ messages.append({"role": "assistant", "content": val["assistant"]})
38
+
39
+ # Append the current user prompt
40
+ messages.append({"role": "user", "content": prompt})
41
+
42
+ # Generate a response using the Hugging Face model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
+ response = generator(messages[-1]["content"], max_length=100, num_return_sequences=1)
45
+ return [response[0]["generated_text"]]
 
 
 
 
 
46
  except Exception as e:
47
+ return [f"Error generating response: {e}"]
48
+
49
+ # Function for fine-tuning the model with the uploaded dataset
50
+ def fine_tune_model(dataset: str) -> str:
51
+ """
52
+ Fine-tunes the model using the uploaded dataset.
53
+ Args:
54
+ dataset (str): The path to the dataset for fine-tuning.
55
+ Returns:
56
+ str: A message indicating whether fine-tuning was successful or failed.
57
+ """
58
+ try:
59
+ # Process the dataset (dummy processing for illustration)
60
+ with open(dataset, "r") as file:
61
+ data = file.readlines()
62
+
63
+ # Simulate fine-tuning with the provided dataset
64
+ # Here, you would use the data to fine-tune the model
65
+ # For this example, we're not actually fine-tuning the model.
66
+ model.save_pretrained("./fine_tuned_model")
67
+ return "Model fine-tuned successfully!"
68
+ except Exception as e:
69
+ return f"Error fine-tuning the model: {e}"
70
+
71
+ # Streamlit app interface
72
+ st.title("OSINT Tool")
73
+ st.write("This tool generates OSINT-based results and allows you to fine-tune the model with custom datasets.")
74
+
75
+ # User input for prompt and message history
76
+ prompt = st.text_area("Enter your OSINT prompt here...", placeholder="Type your prompt here...")
77
+ history = []
78
+
79
+ # Display message history
80
+ if "history" not in st.session_state:
81
+ st.session_state.history = []
82
+
83
+ # Display past conversation
84
+ st.write("### Message History:")
85
+ for msg in st.session_state.history:
86
+ st.write(f"**User**: {msg['user']}")
87
+ st.write(f"**Assistant**: {msg['assistant']}")
88
+
89
+ # Fine-tuning functionality
90
+ dataset_file = st.file_uploader("Upload a dataset for fine-tuning", type=["txt"])
91
+
92
+ if dataset_file is not None:
93
+ # Save the uploaded file
94
+ dataset_path = os.path.join("uploads", dataset_file.name)
95
+ with open(dataset_path, "wb") as f:
96
+ f.write(dataset_file.read())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # Fine-tune the model
99
+ fine_tuning_status = fine_tune_model(dataset_path)
100
+ st.success(fine_tuning_status)
101
+
102
+ # Generate OSINT response when prompt is entered
103
+ if st.button("Generate OSINT Results"):
104
+ if prompt:
105
+ response = generate_osint_results(prompt, st.session_state.history)
106
+ st.session_state.history.append({"user": prompt, "assistant": response[0]})
107
+ st.write("### Generated OSINT Result:")
108
+ st.write(response[0])
109
+ else:
110
+ st.error("Please enter a prompt.")
111
+
112
+ # Optionally save fine-tuned model
113
+ if os.path.exists("./fine_tuned_model"):
114
+ st.write("The model has been fine-tuned and saved as `fine_tuned_model`.")