Spaces:
Runtime error
Runtime error
Commit
·
4515305
1
Parent(s):
97632b5
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ nltk.download('stopwords')
|
|
18 |
nltk.download('punkt')
|
19 |
import string
|
20 |
|
|
|
|
|
|
|
21 |
########### DATA CLEANER VARIABLES #############
|
22 |
all_stopwords = stopwords.words('english') # Making sure to only use English stopwords
|
23 |
extra_stopwords = ['ii', 'iii'] # Can add extra stopwords to be removed from dataset/input abstracts
|
@@ -93,20 +96,6 @@ def bot(history):
|
|
93 |
|
94 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
95 |
def clean_data(input, type='Dataframe'):
|
96 |
-
"""
|
97 |
-
As preparation for use with the text similarity model, this function removes superfluous data from either a dataframe full of
|
98 |
-
classifications, or an input string, in order for embeddings to be calculated for them. Removes:
|
99 |
-
• Entries with missing abstracts/descriptions/classifications/typos
|
100 |
-
• Duplicate entries
|
101 |
-
• Unnecessary punctuation
|
102 |
-
• Stop words (e.g., by, a , an, he, she, it)
|
103 |
-
• URLs
|
104 |
-
• All entries are in the same language
|
105 |
-
:param input: Either a dataframe or an individual string
|
106 |
-
:param type: Tells fucntion whether input is a dataframe or an individual string
|
107 |
-
:return: (if dataframe), returns a dataframe containing CPC classfication codes and their associated 'cleaned' description
|
108 |
-
:return: (if string), returns a 'cleaned' version of the input string
|
109 |
-
"""
|
110 |
if type == 'Dataframe':
|
111 |
cleaneddf = pd.DataFrame(columns=['Class', 'Description'])
|
112 |
for i in range(0, len(input)):
|
@@ -173,7 +162,7 @@ def clean_data(input, type='Dataframe'):
|
|
173 |
def classifier(userin):
|
174 |
clean_in = clean_data(userin, type='String')
|
175 |
in_emb = sentence_embedder(clean_in, 'Model_bert')
|
176 |
-
|
177 |
Number = 10
|
178 |
broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
|
179 |
|
@@ -225,9 +214,6 @@ def generateresponse(history):#, task):
|
|
225 |
print(history)
|
226 |
return history
|
227 |
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
############# GRADIO APP ###############
|
232 |
theme = gr.themes.Base(
|
233 |
primary_hue="indigo",
|
@@ -240,7 +226,7 @@ with gr.Blocks(title='Claimed', theme=theme) as demo:
|
|
240 |
gr.Markdown("""
|
241 |
# CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
|
242 |
|
243 |
-
The patenting process can
|
244 |
|
245 |
Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
|
246 |
|
|
|
18 |
nltk.download('punkt')
|
19 |
import string
|
20 |
|
21 |
+
########### Import Classifier Embeddings #########
|
22 |
+
class_embeddings = pd.read_csv('Embeddings/MainClassEmbeddings.csv')
|
23 |
+
|
24 |
########### DATA CLEANER VARIABLES #############
|
25 |
all_stopwords = stopwords.words('english') # Making sure to only use English stopwords
|
26 |
extra_stopwords = ['ii', 'iii'] # Can add extra stopwords to be removed from dataset/input abstracts
|
|
|
96 |
|
97 |
########## LOADING PRE-COMPUTED EMBEDDINGS ##########
|
98 |
def clean_data(input, type='Dataframe'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
if type == 'Dataframe':
|
100 |
cleaneddf = pd.DataFrame(columns=['Class', 'Description'])
|
101 |
for i in range(0, len(input)):
|
|
|
162 |
def classifier(userin):
|
163 |
clean_in = clean_data(userin, type='String')
|
164 |
in_emb = sentence_embedder(clean_in, 'Model_bert')
|
165 |
+
|
166 |
Number = 10
|
167 |
broad_scope_predictions = broad_scope_class_predictor(class_embeddings, in_emb, Number, Sensitivity='High')
|
168 |
|
|
|
214 |
print(history)
|
215 |
return history
|
216 |
|
|
|
|
|
|
|
217 |
############# GRADIO APP ###############
|
218 |
theme = gr.themes.Base(
|
219 |
primary_hue="indigo",
|
|
|
226 |
gr.Markdown("""
|
227 |
# CLAIMED - A GENERATIVE TOOLKIT FOR PATENT ATTORNEYS
|
228 |
|
229 |
+
The patenting process can be incredibly time-consuming and expensive. We're on a mission to change that.
|
230 |
|
231 |
Welcome to our demo! We've trained Meta's Llama on over 200k entries, with a focus on tasks related to the intellectual property domain.
|
232 |
|