Canstralian commited on
Commit
e511bc5
·
verified ·
1 Parent(s): 0f7a93c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -172
app.py CHANGED
@@ -1,176 +1,119 @@
1
  import streamlit as st
2
- from transformers import (
3
- AutoTokenizer,
4
- AutoModelForSequenceClassification,
5
- AutoModelForSeq2SeqLM,
6
- )
7
  import torch
8
- import os
9
-
10
- # Define the model names and their corresponding Hugging Face models
11
- MODEL_MAPPING = {
12
- "text2shellcommands": "t5-small", # Example seq2seq model for generating shell commands
13
- "pentest_ai": "bert-base-uncased", # Example classification model for pentesting tasks
14
- }
15
-
16
- # Function to create a sidebar for model selection
17
- def select_model():
18
- """
19
- Adds a dropdown to the Streamlit sidebar for selecting a model.
20
- Returns:
21
- str: The selected model key from MODEL_MAPPING.
22
- """
23
- st.sidebar.header("Model Configuration")
24
- selected_model = st.sidebar.selectbox("Select a model", list(MODEL_MAPPING.keys()))
25
- return selected_model
26
-
27
-
28
- # Function to load the model and tokenizer with caching
29
- @st.cache_resource
30
- def load_model_and_tokenizer(model_name):
31
- """
32
- Loads the tokenizer and model for the specified Hugging Face model name.
33
- Uses caching to optimize performance.
34
-
35
- Args:
36
- model_name (str): The name of the Hugging Face model to load.
37
-
38
- Returns:
39
- tuple: A tokenizer and model instance.
40
- """
41
- try:
42
- # Load the tokenizer
43
- tokenizer = AutoTokenizer.from_pretrained(model_name)
44
-
45
- # Determine the correct model class to use
46
- if "t5" in model_name or "seq2seq" in model_name:
47
- # Load a sequence-to-sequence model
48
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
49
  else:
50
- # Load a sequence classification model
51
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
52
-
53
- return tokenizer, model
54
- except Exception as e:
55
- # Display an error message in the Streamlit app
56
- st.error(f"An error occurred while loading the model or tokenizer: {str(e)}")
57
- return None, None
58
-
59
-
60
- # Function to handle predictions based on the selected model
61
- def predict_with_model(user_input, model, tokenizer, model_choice):
62
- """
63
- Handles predictions using the loaded model and tokenizer.
64
-
65
- Args:
66
- user_input (str): Text input from the user.
67
- model: Loaded Hugging Face model.
68
- tokenizer: Loaded Hugging Face tokenizer.
69
- model_choice (str): Selected model key from MODEL_MAPPING.
70
-
71
- Returns:
72
- dict: A dictionary containing the prediction results.
73
- """
74
- if model_choice == "text2shellcommands":
75
- # Generate shell commands (Seq2Seq task)
76
- inputs = tokenizer(user_input, return_tensors="pt", padding=True, truncation=True)
77
- with torch.no_grad():
78
- outputs = model.generate(**inputs)
79
- generated_command = tokenizer.decode(outputs[0], skip_special_tokens=True)
80
- return {"Generated Shell Command": generated_command}
81
- else:
82
- # Perform classification
83
- inputs = tokenizer(user_input, return_tensors="pt", padding=True, truncation=True)
84
- with torch.no_grad():
85
- outputs = model(**inputs)
86
- logits = outputs.logits
87
- predicted_class = torch.argmax(logits, dim=-1).item()
88
- return {
89
- "Predicted Class": predicted_class,
90
- "Logits": logits.tolist(),
91
- }
92
-
93
-
94
- # Function to process uploaded files
95
- def process_uploaded_file(uploaded_file):
96
- """
97
- Reads and processes the uploaded file. Supports text and CSV files.
98
-
99
- Args:
100
- uploaded_file: The uploaded file.
101
-
102
- Returns:
103
- str: The content of the file as a string.
104
- """
105
- try:
106
- if uploaded_file is not None:
107
- file_type = uploaded_file.type
108
-
109
- # Text file processing
110
- if "text" in file_type:
111
- content = uploaded_file.read().decode("utf-8")
112
- return content
113
- # CSV file processing
114
- elif "csv" in file_type:
115
- import pandas as pd
116
- df = pd.read_csv(uploaded_file)
117
- return df.to_string() # Convert the dataframe to string
118
- else:
119
- st.error("Unsupported file type. Please upload a text or CSV file.")
120
- return None
121
- except Exception as e:
122
- st.error(f"Error processing file: {e}")
123
- return None
124
-
125
-
126
- # Main function to define the Streamlit app
127
- def main():
128
- st.title("AI Model Inference Dashboard")
129
- st.markdown(
130
- """
131
- This dashboard allows you to interact with different AI models for inference tasks,
132
- such as generating shell commands or performing text classification.
133
- """
134
- )
135
-
136
- # Model selection
137
- model_choice = select_model()
138
- model_name = MODEL_MAPPING.get(model_choice)
139
- tokenizer, model = load_model_and_tokenizer(model_name)
140
-
141
- # Input text area or file upload
142
- input_choice = st.radio("Choose Input Method", ("Text Input", "Upload File"))
143
-
144
- if input_choice == "Text Input":
145
- user_input = st.text_area("Enter your text input:", placeholder="Type your text here...")
146
-
147
- # Handle prediction after submit
148
- submit_button = st.button("Submit")
149
-
150
- if submit_button and user_input:
151
- st.write("### Prediction Results:")
152
- result = predict_with_model(user_input, model, tokenizer, model_choice)
153
- for key, value in result.items():
154
- st.write(f"**{key}:** {value}")
155
-
156
- elif input_choice == "Upload File":
157
- uploaded_file = st.file_uploader("Choose a text or CSV file", type=["txt", "csv"])
158
-
159
- # Handle prediction after submit
160
- submit_button = st.button("Submit")
161
-
162
- if submit_button and uploaded_file:
163
- file_content = process_uploaded_file(uploaded_file)
164
- if file_content:
165
- st.write("### File Content:")
166
- st.write(file_content)
167
- result = predict_with_model(file_content, model, tokenizer, model_choice)
168
- st.write("### Prediction Results:")
169
- for key, value in result.items():
170
- st.write(f"**{key}:** {value}")
171
  else:
172
- st.info("No valid content found in the file.")
173
-
174
-
175
- if __name__ == "__main__":
176
- main()
 
1
  import streamlit as st
2
+ import requests
3
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
 
 
4
  import torch
5
+ import pandas as pd
6
+ from datasets import Dataset
7
+
8
+ # Title and description
9
+ st.title("OSINT Tool 🏢")
10
+ st.markdown("""
11
+ This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs.
12
+ It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**.
13
+ """)
14
+
15
+ # Sidebar for navigation
16
+ st.sidebar.title("Navigation")
17
+ app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"])
18
+
19
+ # GitHub Repository Analysis
20
+ if app_mode == "GitHub Repository Analysis":
21
+ st.header("GitHub Repository Analysis")
22
+ repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface")
23
+ repo_name = st.text_input("Enter GitHub Repository Name", "transformers")
24
+
25
+ if st.button("Analyze Repository"):
26
+ if repo_owner and repo_name:
27
+ try:
28
+ response = requests.get(f"https://api.github.com/repos/{repo_owner}/{repo_name}")
29
+ data = response.json()
30
+
31
+ if response.status_code == 200:
32
+ st.subheader("Repository Details")
33
+ st.write(f"**Name**: {data['name']}")
34
+ st.write(f"**Owner**: {data['owner']['login']}")
35
+ st.write(f"**Stars**: {data['stargazers_count']}")
36
+ st.write(f"**Forks**: {data['forks_count']}")
37
+ st.write(f"**Language**: {data['language']}")
38
+ st.write(f"**Description**: {data['description']}")
39
+ else:
40
+ st.error(f"Error: {data.get('message', 'Something went wrong with the request')}")
41
+ except Exception as e:
42
+ st.error(f"Error occurred: {e}")
 
 
 
43
  else:
44
+ st.warning("Please enter both repository owner and name.")
45
+
46
+ # URL Title Fetcher
47
+ elif app_mode == "URL Title Fetcher":
48
+ st.header("URL Title Fetcher")
49
+ url = st.text_input("Enter URL", "https://www.huggingface.co")
50
+
51
+ if st.button("Fetch Title"):
52
+ if url:
53
+ try:
54
+ response = requests.get(url)
55
+ if response.status_code == 200:
56
+ # Try to extract the title from the HTML
57
+ match = re.search('<title>(.*?)</title>', response.text)
58
+ if match:
59
+ title = match.group(1)
60
+ st.write(f"**Page Title**: {title}")
61
+ else:
62
+ st.warning("Title tag not found in the page")
63
+ else:
64
+ st.error(f"Failed to retrieve the page. Status code: {response.status_code}")
65
+ except Exception as e:
66
+ st.error(f"Error occurred: {e}")
67
+ else:
68
+ st.warning("Please enter a valid URL.")
69
+
70
+ # Dataset Upload & Fine-Tuning
71
+ elif app_mode == "Dataset Upload & Fine-Tuning":
72
+ st.header("Dataset Upload & Fine-Tuning")
73
+
74
+ uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv")
75
+
76
+ if uploaded_file is not None:
77
+ # Load the CSV into a pandas DataFrame
78
+ df = pd.read_csv(uploaded_file)
79
+
80
+ # Display dataset preview
81
+ st.subheader("Dataset Preview")
82
+ st.write(df.head())
83
+
84
+ # Convert CSV to Hugging Face dataset format
85
+ dataset = Dataset.from_pandas(df)
86
+
87
+ model_name = st.selectbox("Select model for fine-tuning", ["distilbert-base-uncased"])
88
+
89
+ if st.button("Fine-tune Model"):
90
+ if model_name:
91
+ try:
92
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
93
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
94
+
95
+ # Prepare the dataset
96
+ def preprocess_function(examples):
97
+ return tokenizer(examples['text'], truncation=True, padding=True)
98
+
99
+ tokenized_datasets = dataset.map(preprocess_function, batched=True)
100
+
101
+ # Training loop (example)
102
+ train_args = {
103
+ "output_dir": "./results",
104
+ "num_train_epochs": 3,
105
+ "per_device_train_batch_size": 16,
106
+ "logging_dir": "./logs",
107
+ }
108
+
109
+ # Fine-tuning logic (for demonstration purposes, actual fine-tuning will need Hugging Face Trainer)
110
+ # model.train()
111
+
112
+ st.success("Fine-tuning started (demo)!")
113
+ except Exception as e:
114
+ st.error(f"Error during fine-tuning: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  else:
116
+ st.warning("Please select a model for fine-tuning.")
117
+
118
+ else:
119
+ st.warning("Please upload a dataset.")