Spaces:

polygraf-ai
/

copyright_checker

Runtime error

App Files Files Community

aliasgerovs commited on Jun 18, 2024

Commit

8fb8d86

1 Parent(s): 0eaca07

Added fix on plagiarsim

Browse files

Files changed (2) hide show

plagiarism.py +6 -5
predictors.py +1 -0

plagiarism.py CHANGED Viewed

@@ -290,9 +290,8 @@ def plagiarism_check(
     # api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
     # api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
     # api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
-    # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
     # api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
-    api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
     cse_id = "851813e81162b4ed4"
     url_scores = []
@@ -374,7 +373,7 @@ def plagiarism_check(
     print("SNIPPETS: ", snippets)
     snippets = [[item for item in sublist if item] for sublist in snippets]
     for ind in index_descending:
-        if url_source[ind] > 0.35:
             matched_sentence_array = [
                 [item for item in sublist if item]
                 for sublist in matched_sentence_array
@@ -470,15 +469,16 @@ def html_highlight(
     combined_sentence = ""
     total_score = 0
     total_count = 0
-    category_scores = defaultdict(list)
     for sentence, score, url, idx in sentence_scores:
         category = check_url_category(url)
         if score is None:
             total_score += 0
         else:
             total_score += score
         total_count += 1
-        category_scores[category].append(score)
         if idx != prev_idx and prev_idx is not None:
             color = color_map[prev_idx - 1]
             index_part = f"<span>[{prev_idx}]</span>"
@@ -488,6 +488,7 @@ def html_highlight(
         combined_sentence += " " + sentence
         prev_idx = idx
     total_average_score = round(total_score / total_count, 2)
     category_averages = {
         category: round((sum(scores) / len(scores)), 2)

     # api_key = "AIzaSyCLyCCpOPLZWuptuPAPSg8cUIZhdEMVf6g"
     # api_key = "AIzaSyCS1WQDMl1IMjaXtwSd_2rA195-Yc4psQE"
     # api_key = "AIzaSyCB61O70B8AC3l5Kk3KMoLb6DN37B7nqIk"
+    api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
     # api_key = "AIzaSyA5VVwY1eEoIoflejObrxFDI0DJvtbmgW8"
     cse_id = "851813e81162b4ed4"
     url_scores = []
     print("SNIPPETS: ", snippets)
     snippets = [[item for item in sublist if item] for sublist in snippets]
     for ind in index_descending:
+        if url_source[ind] > 0.1:
             matched_sentence_array = [
                 [item for item in sublist if item]
                 for sublist in matched_sentence_array
     combined_sentence = ""
     total_score = 0
     total_count = 0
+    category_scores = defaultdict(set)
     for sentence, score, url, idx in sentence_scores:
         category = check_url_category(url)
         if score is None:
             total_score += 0
         else:
             total_score += score
+            category_scores[category].add(score)
         total_count += 1
         if idx != prev_idx and prev_idx is not None:
             color = color_map[prev_idx - 1]
             index_part = f"<span>[{prev_idx}]</span>"
         combined_sentence += " " + sentence
         prev_idx = idx
+    print(category_scores)
     total_average_score = round(total_score / total_count, 2)
     category_averages = {
         category: round((sum(scores) / len(scores)), 2)

predictors.py CHANGED Viewed

@@ -26,6 +26,7 @@ nltk.download("punkt")
 nltk.download("stopwords")
 device_needed = "cuda" if torch.cuda.is_available() else "cpu"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
 text_mc_model_path = params["TEXT_MC_MODEL_PATH"]

 nltk.download("stopwords")
 device_needed = "cuda" if torch.cuda.is_available() else "cpu"
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print('DEVICE IS :' , device)
 text_bc_model_path = params["TEXT_BC_MODEL_PATH"]
 text_mc_model_path = params["TEXT_MC_MODEL_PATH"]