Mohzen321 commited on
Commit
4e37c20
·
verified ·
1 Parent(s): d064c14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -23
app.py CHANGED
@@ -40,6 +40,9 @@ if uploaded_file is not None:
40
  # قائمة لتحليل الصيغ (Extensions)
41
  extensions = {}
42
 
 
 
 
43
  # متغيرات للتحكم في العملية
44
  progress_bar = st.progress(0)
45
  pause_button = st.button("Pause")
@@ -50,20 +53,6 @@ if uploaded_file is not None:
50
  current_index = 0 # مؤشر للكلمة الحالية
51
  batch_size = 10 # عدد العناصر التي يتم معالجتها في الدفعة الواحدة
52
 
53
- # دالة التحقق من صحة الملف
54
- def validate_file(items, operation):
55
- if operation == "Filter Keywords":
56
- # التحقق من أن الملف يحتوي على كلمات مفتاحية فقط
57
- for item in items:
58
- if re.match(r'https?://', item):
59
- return False, "File should contain keywords only."
60
- elif operation == "Extra & Filter Param (URLs)":
61
- # التحقق من أن الملف يحتوي على روابط فقط
62
- for item in items:
63
- if not re.match(r'https?://', item):
64
- return False, "File should contain URLs only."
65
- return True, ""
66
-
67
  # دالة تصنيف الكلمات باستخدام الدفعات
68
  def classify_keywords_batch(items, categories, start_index=0):
69
  global paused, stopped, current_index
@@ -144,6 +133,13 @@ if uploaded_file is not None:
144
  extensions[ext] = 0
145
  extensions[ext] += 1
146
 
 
 
 
 
 
 
 
147
  # تحديث المؤشر الحالي
148
  current_index = i + batch_size
149
 
@@ -174,21 +170,25 @@ if uploaded_file is not None:
174
  # تحديث محتوى المربع الخاص بالصيغ
175
  st.session_state.extensions_text = "\n".join(extensions.keys())
176
 
 
 
 
 
 
 
 
 
 
177
  # زر البدء
178
  if st.button("Start"):
179
  stopped = False
180
  paused = False
181
  current_index = 0
182
 
183
- # التحقق من صحة الملف
184
- is_valid, error_message = validate_file(items, operation)
185
- if not is_valid:
186
- st.error(error_message)
187
- else:
188
- if operation == "Filter Keywords":
189
- classify_keywords_batch(items, categories, start_index=current_index)
190
- elif operation == "Extra & Filter Param (URLs)":
191
- classify_parameters(items, categories, start_index=current_index)
192
 
193
  # زر الإيقاف المؤقت
194
  if pause_button:
@@ -216,21 +216,25 @@ if uploaded_file is not None:
216
  if 'shopping_text' not in st.session_state:
217
  st.session_state.shopping_text = ""
218
  st.text_area("Copy the shopping keywords here:", value=st.session_state.shopping_text, height=200, key="shopping")
 
219
 
220
  st.header("Gaming Keywords")
221
  if 'gaming_text' not in st.session_state:
222
  st.session_state.gaming_text = ""
223
  st.text_area("Copy the gaming keywords here:", value=st.session_state.gaming_text, height=200, key="gaming")
 
224
 
225
  st.header("Streaming Keywords")
226
  if 'streaming_text' not in st.session_state:
227
  st.session_state.streaming_text = ""
228
  st.text_area("Copy the streaming keywords here:", value=st.session_state.streaming_text, height=200, key="streaming")
 
229
 
230
  st.header("Unknown Keywords")
231
  if 'unknown_text' not in st.session_state:
232
  st.session_state.unknown_text = ""
233
  st.text_area("Copy the unknown keywords here:", value=st.session_state.unknown_text, height=200, key="unknown")
 
234
 
235
  elif operation == "Extra & Filter Param (URLs)":
236
  # عرض النتائج للباراميترات
@@ -238,27 +242,39 @@ if uploaded_file is not None:
238
  if 'shop_params' not in st.session_state:
239
  st.session_state.shop_params = ""
240
  st.text_area("Copy the shop parameters here:", value=st.session_state.shop_params, height=200, key="shop_params")
 
241
 
242
  st.header("Game Parameters")
243
  if 'game_params' not in st.session_state:
244
  st.session_state.game_params = ""
245
  st.text_area("Copy the game parameters here:", value=st.session_state.game_params, height=200, key="game_params")
 
246
 
247
  st.header("Stream Parameters")
248
  if 'stream_params' not in st.session_state:
249
  st.session_state.stream_params = ""
250
  st.text_area("Copy the stream parameters here:", value=st.session_state.stream_params, height=200, key="stream_params")
 
251
 
252
  st.header("Unknown Parameters")
253
  if 'unknown_params' not in st.session_state:
254
  st.session_state.unknown_params = ""
255
  st.text_area("Copy the unknown parameters here:", value=st.session_state.unknown_params, height=200, key="unknown_params")
 
256
 
257
  # عرض الصيغ (Extensions)
258
  st.header("File Extensions")
259
  if 'extensions_text' not in st.session_state:
260
  st.session_state.extensions_text = ""
261
  st.text_area("Copy the file extensions here:", value=st.session_state.extensions_text, height=200, key="extensions")
 
 
 
 
 
 
 
 
262
 
263
  else:
264
  st.warning("Please upload a text file to start classification.")
 
40
  # قائمة لتحليل الصيغ (Extensions)
41
  extensions = {}
42
 
43
+ # قائمة لتحليل أنماط الصفحات الكاملة (Full PageType)
44
+ full_page_types = []
45
+
46
  # متغيرات للتحكم في العملية
47
  progress_bar = st.progress(0)
48
  pause_button = st.button("Pause")
 
53
  current_index = 0 # مؤشر للكلمة الحالية
54
  batch_size = 10 # عدد العناصر التي يتم معالجتها في الدفعة الواحدة
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # دالة تصنيف الكلمات باستخدام الدفعات
57
  def classify_keywords_batch(items, categories, start_index=0):
58
  global paused, stopped, current_index
 
133
  extensions[ext] = 0
134
  extensions[ext] += 1
135
 
136
+ # استخراج أنماط الصفحات الكاملة (Full PageType)
137
+ page_type_match = re.search(r'(\w+\.php|\w+\.html)\?', url)
138
+ if page_type_match:
139
+ page_type = page_type_match.group(1)
140
+ if page_type not in full_page_types:
141
+ full_page_types.append(page_type)
142
+
143
  # تحديث المؤشر الحالي
144
  current_index = i + batch_size
145
 
 
170
  # تحديث محتوى المربع الخاص بالصيغ
171
  st.session_state.extensions_text = "\n".join(extensions.keys())
172
 
173
+ # تحديث محتوى المربع الخاص بأنماط الصفحات الكاملة
174
+ st.session_state.full_page_types = "\n".join(full_page_types)
175
+
176
+ # دالة تصدير النتائج
177
+ def export_results(key, filename):
178
+ with open(filename, "w") as f:
179
+ f.write(st.session_state[key])
180
+ st.success(f"Results exported to {filename}")
181
+
182
  # زر البدء
183
  if st.button("Start"):
184
  stopped = False
185
  paused = False
186
  current_index = 0
187
 
188
+ if operation == "Filter Keywords":
189
+ classify_keywords_batch(items, categories, start_index=current_index)
190
+ elif operation == "Extra & Filter Param (URLs)":
191
+ classify_parameters(items, categories, start_index=current_index)
 
 
 
 
 
192
 
193
  # زر الإيقاف المؤقت
194
  if pause_button:
 
216
  if 'shopping_text' not in st.session_state:
217
  st.session_state.shopping_text = ""
218
  st.text_area("Copy the shopping keywords here:", value=st.session_state.shopping_text, height=200, key="shopping")
219
+ st.button("Export Shopping Keywords", on_click=export_results, args=("shopping_text", "shopping_keywords.txt"))
220
 
221
  st.header("Gaming Keywords")
222
  if 'gaming_text' not in st.session_state:
223
  st.session_state.gaming_text = ""
224
  st.text_area("Copy the gaming keywords here:", value=st.session_state.gaming_text, height=200, key="gaming")
225
+ st.button("Export Gaming Keywords", on_click=export_results, args=("gaming_text", "gaming_keywords.txt"))
226
 
227
  st.header("Streaming Keywords")
228
  if 'streaming_text' not in st.session_state:
229
  st.session_state.streaming_text = ""
230
  st.text_area("Copy the streaming keywords here:", value=st.session_state.streaming_text, height=200, key="streaming")
231
+ st.button("Export Streaming Keywords", on_click=export_results, args=("streaming_text", "streaming_keywords.txt"))
232
 
233
  st.header("Unknown Keywords")
234
  if 'unknown_text' not in st.session_state:
235
  st.session_state.unknown_text = ""
236
  st.text_area("Copy the unknown keywords here:", value=st.session_state.unknown_text, height=200, key="unknown")
237
+ st.button("Export Unknown Keywords", on_click=export_results, args=("unknown_text", "unknown_keywords.txt"))
238
 
239
  elif operation == "Extra & Filter Param (URLs)":
240
  # عرض النتائج للباراميترات
 
242
  if 'shop_params' not in st.session_state:
243
  st.session_state.shop_params = ""
244
  st.text_area("Copy the shop parameters here:", value=st.session_state.shop_params, height=200, key="shop_params")
245
+ st.button("Export Shop Parameters", on_click=export_results, args=("shop_params", "shop_params.txt"))
246
 
247
  st.header("Game Parameters")
248
  if 'game_params' not in st.session_state:
249
  st.session_state.game_params = ""
250
  st.text_area("Copy the game parameters here:", value=st.session_state.game_params, height=200, key="game_params")
251
+ st.button("Export Game Parameters", on_click=export_results, args=("game_params", "game_params.txt"))
252
 
253
  st.header("Stream Parameters")
254
  if 'stream_params' not in st.session_state:
255
  st.session_state.stream_params = ""
256
  st.text_area("Copy the stream parameters here:", value=st.session_state.stream_params, height=200, key="stream_params")
257
+ st.button("Export Stream Parameters", on_click=export_results, args=("stream_params", "stream_params.txt"))
258
 
259
  st.header("Unknown Parameters")
260
  if 'unknown_params' not in st.session_state:
261
  st.session_state.unknown_params = ""
262
  st.text_area("Copy the unknown parameters here:", value=st.session_state.unknown_params, height=200, key="unknown_params")
263
+ st.button("Export Unknown Parameters", on_click=export_results, args=("unknown_params", "unknown_params.txt"))
264
 
265
  # عرض الصيغ (Extensions)
266
  st.header("File Extensions")
267
  if 'extensions_text' not in st.session_state:
268
  st.session_state.extensions_text = ""
269
  st.text_area("Copy the file extensions here:", value=st.session_state.extensions_text, height=200, key="extensions")
270
+ st.button("Export File Extensions", on_click=export_results, args=("extensions_text", "file_extensions.txt"))
271
+
272
+ # عرض أنماط الصفحات الكاملة (Full PageType)
273
+ st.header("Full PageType")
274
+ if 'full_page_types' not in st.session_state:
275
+ st.session_state.full_page_types = ""
276
+ st.text_area("Copy the full page types here:", value=st.session_state.full_page_types, height=200, key="full_page_types")
277
+ st.button("Export Full PageTypes", on_click=export_results, args=("full_page_types", "full_page_types.txt"))
278
 
279
  else:
280
  st.warning("Please upload a text file to start classification.")