eljanmahammadli commited on
Commit
aaa4e80
·
1 Parent(s): 523c717

added inlude and exclude websites

Browse files
Files changed (2) hide show
  1. app.py +22 -9
  2. plagiarism.py +1 -0
app.py CHANGED
@@ -416,7 +416,8 @@ def generate_and_format(
416
  month_to,
417
  day_to,
418
  domains_to_include,
419
- search_keywords,
 
420
  pdf_file_input,
421
  generated_article: str = None,
422
  user_comments: str = None,
@@ -426,10 +427,14 @@ def generate_and_format(
426
  sorted_date = f"date:r:{date_from}:{date_to}"
427
  content_string = ""
428
  final_query = topic
429
- if search_keywords != "":
430
- quoted_keywords = [f'"{keyword.strip()}"' for keyword in search_keywords.split(",")]
431
- final_query = final_query + " " + " ".join(quoted_keywords)
432
- print(final_query)
 
 
 
 
433
  if google_search_check:
434
  url_content = google_search(final_query, sorted_date, domains_to_include)
435
  content_string = "\n".join(
@@ -630,8 +635,14 @@ def create_interface():
630
  label="Domains To Include",
631
  )
632
  with gr.Row():
633
- search_keywords = gr.Textbox(
634
- label="Keywords",
 
 
 
 
 
 
635
  placeholder="Enter comma-separated keywords",
636
  elem_classes="input-highlight-yellow",
637
  )
@@ -747,7 +758,8 @@ def create_interface():
747
  month_to,
748
  day_to,
749
  domains_to_include,
750
- search_keywords,
 
751
  pdf_file_input,
752
  ],
753
  outputs=[output_article],
@@ -781,7 +793,8 @@ def create_interface():
781
  domains_to_include,
782
  pdf_file_input,
783
  output_article,
784
- search_keywords,
 
785
  ai_comments,
786
  ],
787
  outputs=[output_article],
 
416
  month_to,
417
  day_to,
418
  domains_to_include,
419
+ include_sites,
420
+ exclude_sites,
421
  pdf_file_input,
422
  generated_article: str = None,
423
  user_comments: str = None,
 
427
  sorted_date = f"date:r:{date_from}:{date_to}"
428
  content_string = ""
429
  final_query = topic
430
+ if include_sites:
431
+ site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
432
+ final_query += " " + " OR ".join(site_queries)
433
+ if exclude_sites:
434
+ exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
435
+ final_query += " " + " ".join(exclude_queries)
436
+ print(f"Final Query: {final_query}")
437
+
438
  if google_search_check:
439
  url_content = google_search(final_query, sorted_date, domains_to_include)
440
  content_string = "\n".join(
 
635
  label="Domains To Include",
636
  )
637
  with gr.Row():
638
+ include_sites = gr.Textbox(
639
+ label="Include Specific Websites",
640
+ placeholder="Enter comma-separated keywords",
641
+ elem_classes="input-highlight-yellow",
642
+ )
643
+ with gr.Row():
644
+ exclude_sites = gr.Textbox(
645
+ label="Exclude Specific Websites",
646
  placeholder="Enter comma-separated keywords",
647
  elem_classes="input-highlight-yellow",
648
  )
 
758
  month_to,
759
  day_to,
760
  domains_to_include,
761
+ include_sites,
762
+ exclude_sites,
763
  pdf_file_input,
764
  ],
765
  outputs=[output_article],
 
793
  domains_to_include,
794
  pdf_file_input,
795
  output_article,
796
+ include_sites,
797
+ exclude_sites,
798
  ai_comments,
799
  ],
800
  outputs=[output_article],
plagiarism.py CHANGED
@@ -114,6 +114,7 @@ def google_search(
114
  api_key,
115
  cse_id,
116
  )
 
117
  print("GOOGLE SEARCH PROCESSING TIME: ", time.perf_counter() - start_time)
118
  # Scrape URLs in list
119
  start_time = time.perf_counter()
 
114
  api_key,
115
  cse_id,
116
  )
117
+ print("URLS: ", url_list)
118
  print("GOOGLE SEARCH PROCESSING TIME: ", time.perf_counter() - start_time)
119
  # Scrape URLs in list
120
  start_time = time.perf_counter()