wuhp commited on
Commit
53b94aa
·
verified ·
1 Parent(s): 7bdec5f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from internetarchive import search_items, get_item
3
+ import requests
4
+ import time
5
+
6
+ # --- VirusTotal helper functions ---
7
+ def scan_url_vt(url, api_key):
8
+ headers = {"x-apikey": api_key}
9
+ # submit URL
10
+ resp = requests.post(
11
+ "https://www.virustotal.com/api/v3/urls", headers=headers, data={"url": url}
12
+ )
13
+ resp.raise_for_status()
14
+ analysis_id = resp.json()["data"]["id"]
15
+ # poll until complete
16
+ while True:
17
+ time.sleep(5)
18
+ status_resp = requests.get(
19
+ f"https://www.virustotal.com/api/v3/analyses/{analysis_id}",
20
+ headers=headers
21
+ )
22
+ status_resp.raise_for_status()
23
+ attr = status_resp.json()["data"]["attributes"]
24
+ if attr.get("status") == "completed":
25
+ stats = attr.get("stats", {})
26
+ return stats.get("malicious", 0) == 0
27
+
28
+ # --- Core search & scan logic ---
29
+ def fetch_clean_videos(keywords, api_key, scan_enabled):
30
+ query = " OR ".join([f"{kw.strip().replace(' ', '+')}" for kw in keywords.split(",")])
31
+ ia_query = f"mediatype:(movies) AND ({query})"
32
+ results = list(search_items(ia_query, rows=50))
33
+ clean_urls = []
34
+ for res in results:
35
+ item = get_item(res['identifier'])
36
+ for f in item.files:
37
+ fmt = f.get('format','').lower()
38
+ if fmt.startswith(('mpeg','mp4','avi','mov','webm')):
39
+ url = f"https://archive.org/download/{res['identifier']}/{f['name']}"
40
+ if scan_enabled and api_key:
41
+ try:
42
+ is_clean = scan_url_vt(url, api_key)
43
+ except Exception as e:
44
+ continue
45
+ else:
46
+ is_clean = True
47
+ if is_clean:
48
+ clean_urls.append(url)
49
+ return clean_urls
50
+
51
+ # --- Gradio UI setup ---
52
+ def gui(keywords, api_key, scan_enabled):
53
+ """Perform search and optional VT scan, return list of clean URLs."""
54
+ urls = fetch_clean_videos(keywords, api_key, scan_enabled)
55
+ if not urls:
56
+ return "No clean videos found."
57
+ return "\n".join(urls)
58
+
59
+ with gr.Blocks() as demo:
60
+ gr.Markdown("# 📼 IA Drone‑Strike Video Finder with VT Scan")
61
+ with gr.Row():
62
+ kw_input = gr.Textbox(label="Search keywords (comma-separated)", value="drone strike, military uav, kamikaze drone")
63
+ key_input = gr.Textbox(label="VirusTotal API Key", type="password")
64
+ scan_toggle = gr.Checkbox(label="Enable VirusTotal scan", value=True)
65
+ run_btn = gr.Button("Search & Scan")
66
+ output = gr.Textbox(label="Results (URLs)", lines=10)
67
+ run_btn.click(fn=gui, inputs=[kw_input, key_input, scan_toggle], outputs=output)
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch() # on HuggingFace Spaces, this binds to 0.0.0.0 automatically