Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,10 @@ from sklearn.feature_extraction.text import TfidfVectorizer
|
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import re
|
10 |
from collections import Counter
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def load_data():
|
13 |
try:
|
@@ -52,14 +56,13 @@ tfidf_vectorizer = TfidfVectorizer(stop_words='english')
|
|
52 |
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
|
53 |
|
54 |
def extract_key_features(text):
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
feature_phrases = re.
|
59 |
|
60 |
all_features = noun_phrases + feature_phrases
|
61 |
-
|
62 |
-
return list(set(feature.lower() for feature in all_features))
|
63 |
|
64 |
def compare_features(query_features, patent_features):
|
65 |
common_features = set(query_features) & set(patent_features)
|
|
|
8 |
from sklearn.metrics.pairwise import cosine_similarity
|
9 |
import re
|
10 |
from collections import Counter
|
11 |
+
import spacy
|
12 |
+
|
13 |
+
# Load Spacy model for advanced NLP
|
14 |
+
nlp = spacy.load("en_core_web_sm")
|
15 |
|
16 |
def load_data():
|
17 |
try:
|
|
|
56 |
tfidf_matrix = tfidf_vectorizer.fit_transform(texts)
|
57 |
|
58 |
def extract_key_features(text):
|
59 |
+
# Use Spacy to extract noun phrases and key phrases
|
60 |
+
doc = nlp(text)
|
61 |
+
noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
|
62 |
+
feature_phrases = [sent.text.lower() for sent in doc.sents if re.search(r'(comprising|including|consisting of)', sent.text, re.IGNORECASE)]
|
63 |
|
64 |
all_features = noun_phrases + feature_phrases
|
65 |
+
return list(set(all_features))
|
|
|
66 |
|
67 |
def compare_features(query_features, patent_features):
|
68 |
common_features = set(query_features) & set(patent_features)
|