Spaces:

polygraf-ai
/

copyright_checker

Runtime error

minko186 commited on Mar 1, 2024

Commit

fffd7f2

1 Parent(s): 3c25106

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,7 +21,8 @@ import plotly.graph_objects as go
 import torch.nn.functional as F
 import nltk
 from unidecode import unidecode
 nltk.download('punkt')
 from writing_analysis import (
@@ -57,6 +58,8 @@ def plagiarism_check(
     # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
     cse_id = "851813e81162b4ed4"
     sentences = getSentences(input)
     urlCount = {}
     ScoreArray = []
@@ -78,12 +81,18 @@ def plagiarism_check(
         api_key,
         cse_id,
     )
     print("Number of URLs: ", len(urlCount))
     print(urlList)
     # Scrape URLs in list
     formatted_tokens = []
     soups = asyncio.run(parallel_scrap(urlList))
     print(len(soups))
     print(
         "Successful scraping: "
@@ -102,6 +111,9 @@ def plagiarism_check(
                 score = matchingScoreWithTimeout(sent, page_content)
                 ScoreArray[i][j] = score
     # ScoreArray = asyncio.run(parallel_analyze_2(soups, sentences, ScoreArray))
     # print("New Score Array:\n")
     # print2D(ScoreArray)
@@ -177,6 +189,8 @@ def plagiarism_check(
     print(f"Formatted Tokens: {formatted_tokens}")
     return formatted_tokens

 import torch.nn.functional as F
 import nltk
 from unidecode import unidecode
+import time
 nltk.download('punkt')
 from writing_analysis import (
     # api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
     cse_id = "851813e81162b4ed4"
+    time1 = time.perf_counter()
+    start = time.perf_counter()
     sentences = getSentences(input)
     urlCount = {}
     ScoreArray = []
         api_key,
         cse_id,
     )
+    print(f"Time for google search: {time.perf_counter()-time1}")
+    time1 = time.perf_counter()
     print("Number of URLs: ", len(urlCount))
     print(urlList)
     # Scrape URLs in list
     formatted_tokens = []
     soups = asyncio.run(parallel_scrap(urlList))
+    print(f"Time for scraping: {time.perf_counter()-time1}")
+    time1 = time.perf_counter()
     print(len(soups))
     print(
         "Successful scraping: "
                 score = matchingScoreWithTimeout(sent, page_content)
                 ScoreArray[i][j] = score
+    print(f"Time for matching score: {time.perf_counter()-time1}")
+    time1 = time.perf_counter()
     # ScoreArray = asyncio.run(parallel_analyze_2(soups, sentences, ScoreArray))
     # print("New Score Array:\n")
     # print2D(ScoreArray)
     print(f"Formatted Tokens: {formatted_tokens}")
+    print(f"Time for plagiarism check: {time.perf_counter()-start}")
     return formatted_tokens