Spaces:
Sleeping
Sleeping
File size: 6,131 Bytes
ced22e1 1646c3f ced22e1 6d1dc98 4c32405 ced22e1 6d1dc98 1646c3f 95a596a 6d1dc98 95a596a 6d1dc98 95a596a 1646c3f 6d1dc98 b08284c 4e37c20 b08284c 6d1dc98 b08284c 6d1dc98 b08284c 6d1dc98 b08284c 6d1dc98 b08284c 6d1dc98 b08284c 6d1dc98 b08284c 4e37c20 a00ff89 6d1dc98 1646c3f b08284c 1646c3f 6d1dc98 d4eaaae 902d69b 6d1dc98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from transformers import pipeline
import re
# تحميل النموذج
classifier = pipeline("zero-shot-classification", model="cross-encoder/nli-distilroberta-base")
# عنوان التطبيق
st.title("Keyword & URL Analysis App")
# اختيار العملية
operation = st.radio("Choose an operation:", ["Filter Keywords", "Analyze URLs"])
# إدخال الملف النصي
uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
if uploaded_file is not None:
# قراءة الملف النصي
content = uploaded_file.read().decode("utf-8")
items = [line.strip() for line in content.splitlines() if line.strip()]
# تحديد الفئات
categories = ["shop", "game", "stream"]
# قوائم لتخزين النتائج
shopping_items = []
gaming_items = []
streaming_items = []
unknown_items = []
# قوائم خاصة بالروابط
parameters = []
domains = []
full_page_types = []
file_extensions = []
# دالة تصنيف الكلمات المفتاحية
def classify_keywords(items, categories):
results = classifier(items, categories)
for i, result in enumerate(results):
best_category = result['labels'][0]
score = result['scores'][0]
if best_category == "shop" and score > 0.5:
shopping_items.append(items[i])
elif best_category == "game" and score > 0.5:
gaming_items.append(items[i])
elif best_category == "stream" and score > 0.5:
streaming_items.append(items[i])
else:
unknown_items.append(items[i])
# دالة تحليل الروابط
def analyze_urls(urls):
for url in urls:
# استخراج الباراميترات باستخدام RegEx
params = re.findall(r'(\w+)=', url) # استخراج الأسماء فقط (بدون '=')
parameters.extend(params)
# استخراج الدومينات (مثل .com, .org)
domain_match = re.search(r'\.([a-zA-Z]{2,})$', url)
if domain_match:
domain = domain_match.group(1)
if domain not in domains:
domains.append(domain)
# استخراج أنماط الصفحات الكاملة (مثل product_detail.php?, index.php?)
page_type_match = re.search(r'(\w+\.[a-z]+)\?', url)
if page_type_match:
page_type = page_type_match.group(1)
if page_type not in full_page_types:
full_page_types.append(page_type)
# استخراج الصيغ (مثل php, phtml, asp) بدون باقي الرابط
extension_match = re.search(r'\.([a-z]+)(\?|$)', url)
if extension_match:
extension = extension_match.group(1)
if extension not in file_extensions:
file_extensions.append(extension)
# زر البدء
if st.button("Start"):
if operation == "Filter Keywords":
classify_keywords(items, categories)
elif operation == "Analyze URLs":
analyze_urls(items)
# إزالة التكرارات من القوائم
parameters = list(set(parameters))
domains = list(set(domains))
full_page_types = list(set(full_page_types))
file_extensions = list(set(file_extensions))
# دالة تصدير النتائج
def export_results(key, filename):
with open(filename, "w") as f:
f.write("\n".join(st.session_state[key]))
st.success(f"Results exported to {filename}")
# عرض النتائج بناءً على الخيار المختار
if operation == "Filter Keywords":
# عرض النتائج للكلمات المفتاحية
st.header("Shopping Keywords")
st.text_area("Copy the shopping keywords here:", value="\n".join(shopping_items), height=200, key="shopping")
st.button("Export Shopping Keywords", on_click=export_results, args=("shopping", "shopping_keywords.txt"))
st.header("Gaming Keywords")
st.text_area("Copy the gaming keywords here:", value="\n".join(gaming_items), height=200, key="gaming")
st.button("Export Gaming Keywords", on_click=export_results, args=("gaming", "gaming_keywords.txt"))
st.header("Streaming Keywords")
st.text_area("Copy the streaming keywords here:", value="\n".join(streaming_items), height=200, key="streaming")
st.button("Export Streaming Keywords", on_click=export_results, args=("streaming", "streaming_keywords.txt"))
st.header("Unknown Keywords")
st.text_area("Copy the unknown keywords here:", value="\n".join(unknown_items), height=200, key="unknown")
st.button("Export Unknown Keywords", on_click=export_results, args=("unknown", "unknown_keywords.txt"))
elif operation == "Analyze URLs":
# عرض النتائج للروابط
st.header("Parameters")
st.text_area("Copy the parameters here:", value="\n".join(parameters), height=200, key="parameters")
st.button("Export Parameters", on_click=export_results, args=("parameters", "parameters.txt"))
st.header("Domains")
st.text_area("Copy the domains here:", value="\n".join(domains), height=200, key="domains")
st.button("Export Domains", on_click=export_results, args=("domains", "domains.txt"))
st.header("Full PageType")
st.text_area("Copy the full page types here:", value="\n".join(full_page_types), height=200, key="full_page_types")
st.button("Export Full PageTypes", on_click=export_results, args=("full_page_types", "full_page_types.txt"))
st.header("File Extensions")
st.text_area("Copy the file extensions here:", value="\n".join(file_extensions), height=200, key="file_extensions")
st.button("Export File Extensions", on_click=export_results, args=("file_extensions", "file_extensions.txt"))
else:
st.warning("Please upload a text file to start analysis.") |