Canstralian commited on
Commit
0920957
·
verified ·
1 Parent(s): 91e2c5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -13
app.py CHANGED
@@ -1,19 +1,52 @@
1
  import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
 
 
 
4
 
5
- def fetch_page_title(url):
6
- """
7
- Fetches the title of the given URL.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- Args:
10
- url (str): The URL of the webpage.
 
 
 
11
 
12
- Returns:
13
- str: The title of the webpage or an error message.
14
- """
 
 
 
 
 
 
 
 
 
 
 
15
  try:
16
  response = requests.get(url)
 
17
  if response.status_code == 200:
18
  soup = BeautifulSoup(response.text, 'html.parser')
19
  title = soup.title.string if soup.title else 'No title found'
@@ -23,17 +56,58 @@ def fetch_page_title(url):
23
  except Exception as e:
24
  return f"An error occurred: {e}"
25
 
 
26
  def main():
27
- """
28
- Main function to run the Streamlit application.
29
- """
30
  st.title("OSINT Tool")
31
- st.write("Enter a URL to fetch its title:")
 
 
32
 
33
- url = st.text_input("URL")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  if url:
35
  title = fetch_page_title(url)
36
  st.write(f"Title: {title}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  if __name__ == "__main__":
39
  main()
 
1
  import streamlit as st
2
  import requests
3
  from bs4 import BeautifulSoup
4
+ import pandas as pd
5
+ from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
6
+ from datasets import load_dataset, Dataset
7
 
8
+ # OSINT functions
9
+ def get_github_stars_forks(owner, repo):
10
+ url = f"https://api.github.com/repos/{owner}/{repo}"
11
+ response = requests.get(url)
12
+ data = response.json()
13
+ return data['stargazers_count'], data['forks_count']
14
+
15
+ def get_github_issues(owner, repo):
16
+ url = f"https://api.github.com/repos/{owner}/{repo}/issues"
17
+ response = requests.get(url)
18
+ issues = response.json()
19
+ return len(issues)
20
+
21
+ def get_github_pull_requests(owner, repo):
22
+ url = f"https://api.github.com/repos/{owner}/{repo}/pulls"
23
+ response = requests.get(url)
24
+ pulls = response.json()
25
+ return len(pulls)
26
 
27
+ def get_github_license(owner, repo):
28
+ url = f"https://api.github.com/repos/{owner}/{repo}/license"
29
+ response = requests.get(url)
30
+ data = response.json()
31
+ return data['license']['name']
32
 
33
+ def get_last_commit(owner, repo):
34
+ url = f"https://api.github.com/repos/{owner}/{repo}/commits"
35
+ response = requests.get(url)
36
+ commits = response.json()
37
+ return commits[0]['commit']['committer']['date']
38
+
39
+ def get_github_workflow_status(owner, repo):
40
+ url = f"https://api.github.com/repos/{owner}/{repo}/actions/runs"
41
+ response = requests.get(url)
42
+ runs = response.json()
43
+ return runs['workflow_runs'][0]['status'] if runs['workflow_runs'] else "No workflows found"
44
+
45
+ # Function to fetch page title from a URL
46
+ def fetch_page_title(url):
47
  try:
48
  response = requests.get(url)
49
+ st.write(f"Fetching URL: {url} - Status Code: {response.status_code}")
50
  if response.status_code == 200:
51
  soup = BeautifulSoup(response.text, 'html.parser')
52
  title = soup.title.string if soup.title else 'No title found'
 
56
  except Exception as e:
57
  return f"An error occurred: {e}"
58
 
59
+ # Main Streamlit app
60
  def main():
 
 
 
61
  st.title("OSINT Tool")
62
+
63
+ st.write("### GitHub Repository OSINT Analysis")
64
+ st.write("Enter the GitHub repository owner and name:")
65
 
66
+ owner = st.text_input("Repository Owner")
67
+ repo = st.text_input("Repository Name")
68
+
69
+ if owner and repo:
70
+ stars, forks = get_github_stars_forks(owner, repo)
71
+ open_issues = get_github_issues(owner, repo)
72
+ open_pulls = get_github_pull_requests(owner, repo)
73
+ license_type = get_github_license(owner, repo)
74
+ last_commit = get_last_commit(owner, repo)
75
+ workflow_status = get_github_workflow_status(owner, repo)
76
+
77
+ st.write(f"Stars: {stars}, Forks: {forks}")
78
+ st.write(f"Open Issues: {open_issues}, Open Pull Requests: {open_pulls}")
79
+ st.write(f"License: {license_type}")
80
+ st.write(f"Last Commit: {last_commit}")
81
+ st.write(f"Workflow Status: {workflow_status}")
82
+
83
+ st.write("### URL Title Fetcher")
84
+ url = st.text_input("Enter a URL to fetch its title:")
85
  if url:
86
  title = fetch_page_title(url)
87
  st.write(f"Title: {title}")
88
+
89
+ st.write("### Dataset Upload & Model Fine-Tuning")
90
+ dataset_file = st.file_uploader("Upload a CSV file for fine-tuning", type=["csv"])
91
+ if dataset_file:
92
+ df = pd.read_csv(dataset_file)
93
+ st.dataframe(df.head())
94
+
95
+ st.write("Select a model for fine-tuning:")
96
+ model_name = st.selectbox("Model", ["bert-base-uncased", "distilbert-base-uncased"])
97
+ if st.button("Fine-tune Model"):
98
+ if dataset_file:
99
+ dataset = Dataset.from_pandas(df)
100
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
101
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
102
+
103
+ def tokenize_function(examples):
104
+ return tokenizer(examples['text'], padding="max_length", truncation=True)
105
+
106
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
107
+ training_args = TrainingArguments(output_dir="./results", num_train_epochs=1, per_device_train_batch_size=8)
108
+ trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_datasets)
109
+ trainer.train()
110
+ st.write("Model fine-tuned successfully!")
111
 
112
  if __name__ == "__main__":
113
  main()