Spaces:
Runtime error
Runtime error
Commit
·
aaa4e80
1
Parent(s):
523c717
added inlude and exclude websites
Browse files- app.py +22 -9
- plagiarism.py +1 -0
app.py
CHANGED
@@ -416,7 +416,8 @@ def generate_and_format(
|
|
416 |
month_to,
|
417 |
day_to,
|
418 |
domains_to_include,
|
419 |
-
|
|
|
420 |
pdf_file_input,
|
421 |
generated_article: str = None,
|
422 |
user_comments: str = None,
|
@@ -426,10 +427,14 @@ def generate_and_format(
|
|
426 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
427 |
content_string = ""
|
428 |
final_query = topic
|
429 |
-
if
|
430 |
-
|
431 |
-
final_query
|
432 |
-
|
|
|
|
|
|
|
|
|
433 |
if google_search_check:
|
434 |
url_content = google_search(final_query, sorted_date, domains_to_include)
|
435 |
content_string = "\n".join(
|
@@ -630,8 +635,14 @@ def create_interface():
|
|
630 |
label="Domains To Include",
|
631 |
)
|
632 |
with gr.Row():
|
633 |
-
|
634 |
-
label="
|
|
|
|
|
|
|
|
|
|
|
|
|
635 |
placeholder="Enter comma-separated keywords",
|
636 |
elem_classes="input-highlight-yellow",
|
637 |
)
|
@@ -747,7 +758,8 @@ def create_interface():
|
|
747 |
month_to,
|
748 |
day_to,
|
749 |
domains_to_include,
|
750 |
-
|
|
|
751 |
pdf_file_input,
|
752 |
],
|
753 |
outputs=[output_article],
|
@@ -781,7 +793,8 @@ def create_interface():
|
|
781 |
domains_to_include,
|
782 |
pdf_file_input,
|
783 |
output_article,
|
784 |
-
|
|
|
785 |
ai_comments,
|
786 |
],
|
787 |
outputs=[output_article],
|
|
|
416 |
month_to,
|
417 |
day_to,
|
418 |
domains_to_include,
|
419 |
+
include_sites,
|
420 |
+
exclude_sites,
|
421 |
pdf_file_input,
|
422 |
generated_article: str = None,
|
423 |
user_comments: str = None,
|
|
|
427 |
sorted_date = f"date:r:{date_from}:{date_to}"
|
428 |
content_string = ""
|
429 |
final_query = topic
|
430 |
+
if include_sites:
|
431 |
+
site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
|
432 |
+
final_query += " " + " OR ".join(site_queries)
|
433 |
+
if exclude_sites:
|
434 |
+
exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
|
435 |
+
final_query += " " + " ".join(exclude_queries)
|
436 |
+
print(f"Final Query: {final_query}")
|
437 |
+
|
438 |
if google_search_check:
|
439 |
url_content = google_search(final_query, sorted_date, domains_to_include)
|
440 |
content_string = "\n".join(
|
|
|
635 |
label="Domains To Include",
|
636 |
)
|
637 |
with gr.Row():
|
638 |
+
include_sites = gr.Textbox(
|
639 |
+
label="Include Specific Websites",
|
640 |
+
placeholder="Enter comma-separated keywords",
|
641 |
+
elem_classes="input-highlight-yellow",
|
642 |
+
)
|
643 |
+
with gr.Row():
|
644 |
+
exclude_sites = gr.Textbox(
|
645 |
+
label="Exclude Specific Websites",
|
646 |
placeholder="Enter comma-separated keywords",
|
647 |
elem_classes="input-highlight-yellow",
|
648 |
)
|
|
|
758 |
month_to,
|
759 |
day_to,
|
760 |
domains_to_include,
|
761 |
+
include_sites,
|
762 |
+
exclude_sites,
|
763 |
pdf_file_input,
|
764 |
],
|
765 |
outputs=[output_article],
|
|
|
793 |
domains_to_include,
|
794 |
pdf_file_input,
|
795 |
output_article,
|
796 |
+
include_sites,
|
797 |
+
exclude_sites,
|
798 |
ai_comments,
|
799 |
],
|
800 |
outputs=[output_article],
|
plagiarism.py
CHANGED
@@ -114,6 +114,7 @@ def google_search(
|
|
114 |
api_key,
|
115 |
cse_id,
|
116 |
)
|
|
|
117 |
print("GOOGLE SEARCH PROCESSING TIME: ", time.perf_counter() - start_time)
|
118 |
# Scrape URLs in list
|
119 |
start_time = time.perf_counter()
|
|
|
114 |
api_key,
|
115 |
cse_id,
|
116 |
)
|
117 |
+
print("URLS: ", url_list)
|
118 |
print("GOOGLE SEARCH PROCESSING TIME: ", time.perf_counter() - start_time)
|
119 |
# Scrape URLs in list
|
120 |
start_time = time.perf_counter()
|