Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,8 @@ import plotly.graph_objects as go
|
|
21 |
import torch.nn.functional as F
|
22 |
import nltk
|
23 |
from unidecode import unidecode
|
24 |
-
|
|
|
25 |
nltk.download('punkt')
|
26 |
|
27 |
from writing_analysis import (
|
@@ -57,6 +58,8 @@ def plagiarism_check(
|
|
57 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
58 |
cse_id = "851813e81162b4ed4"
|
59 |
|
|
|
|
|
60 |
sentences = getSentences(input)
|
61 |
urlCount = {}
|
62 |
ScoreArray = []
|
@@ -78,12 +81,18 @@ def plagiarism_check(
|
|
78 |
api_key,
|
79 |
cse_id,
|
80 |
)
|
|
|
|
|
|
|
81 |
print("Number of URLs: ", len(urlCount))
|
82 |
print(urlList)
|
83 |
|
84 |
# Scrape URLs in list
|
85 |
formatted_tokens = []
|
86 |
soups = asyncio.run(parallel_scrap(urlList))
|
|
|
|
|
|
|
87 |
print(len(soups))
|
88 |
print(
|
89 |
"Successful scraping: "
|
@@ -102,6 +111,9 @@ def plagiarism_check(
|
|
102 |
score = matchingScoreWithTimeout(sent, page_content)
|
103 |
ScoreArray[i][j] = score
|
104 |
|
|
|
|
|
|
|
105 |
# ScoreArray = asyncio.run(parallel_analyze_2(soups, sentences, ScoreArray))
|
106 |
# print("New Score Array:\n")
|
107 |
# print2D(ScoreArray)
|
@@ -177,6 +189,8 @@ def plagiarism_check(
|
|
177 |
|
178 |
print(f"Formatted Tokens: {formatted_tokens}")
|
179 |
|
|
|
|
|
180 |
return formatted_tokens
|
181 |
|
182 |
|
|
|
21 |
import torch.nn.functional as F
|
22 |
import nltk
|
23 |
from unidecode import unidecode
|
24 |
+
import time
|
25 |
+
|
26 |
nltk.download('punkt')
|
27 |
|
28 |
from writing_analysis import (
|
|
|
58 |
# api_key = "AIzaSyCg1IbevcTAXAPYeYreps6wYWDbU0Kz8tg"
|
59 |
cse_id = "851813e81162b4ed4"
|
60 |
|
61 |
+
time1 = time.perf_counter()
|
62 |
+
start = time.perf_counter()
|
63 |
sentences = getSentences(input)
|
64 |
urlCount = {}
|
65 |
ScoreArray = []
|
|
|
81 |
api_key,
|
82 |
cse_id,
|
83 |
)
|
84 |
+
print(f"Time for google search: {time.perf_counter()-time1}")
|
85 |
+
time1 = time.perf_counter()
|
86 |
+
|
87 |
print("Number of URLs: ", len(urlCount))
|
88 |
print(urlList)
|
89 |
|
90 |
# Scrape URLs in list
|
91 |
formatted_tokens = []
|
92 |
soups = asyncio.run(parallel_scrap(urlList))
|
93 |
+
|
94 |
+
print(f"Time for scraping: {time.perf_counter()-time1}")
|
95 |
+
time1 = time.perf_counter()
|
96 |
print(len(soups))
|
97 |
print(
|
98 |
"Successful scraping: "
|
|
|
111 |
score = matchingScoreWithTimeout(sent, page_content)
|
112 |
ScoreArray[i][j] = score
|
113 |
|
114 |
+
print(f"Time for matching score: {time.perf_counter()-time1}")
|
115 |
+
time1 = time.perf_counter()
|
116 |
+
|
117 |
# ScoreArray = asyncio.run(parallel_analyze_2(soups, sentences, ScoreArray))
|
118 |
# print("New Score Array:\n")
|
119 |
# print2D(ScoreArray)
|
|
|
189 |
|
190 |
print(f"Formatted Tokens: {formatted_tokens}")
|
191 |
|
192 |
+
print(f"Time for plagiarism check: {time.perf_counter()-start}")
|
193 |
+
|
194 |
return formatted_tokens
|
195 |
|
196 |
|