Spaces:
Build error
Build error
abdulmatinomotoso
commited on
Commit
·
4dad73d
1
Parent(s):
6f7c5de
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#importing the necessary libraries
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
from keybert import KeyBERT
|
7 |
+
from keyphrase_vectorizers import KeyphraseCountVectorizer
|
8 |
+
|
9 |
+
# Defining a function to read in the text file
|
10 |
+
|
11 |
+
def read_in_text(url):
|
12 |
+
with open(url, 'r') as file:
|
13 |
+
article = file.read()
|
14 |
+
return article
|
15 |
+
|
16 |
+
tmp_model = SentenceTransformer('valurank/MiniLM-L6-Keyword-Extraction')
|
17 |
+
kw_extractor = KeyBERT(tmp_model)
|
18 |
+
|
19 |
+
def get_keybert_results_with_vectorizer(file, number_of_results=20):
|
20 |
+
try:
|
21 |
+
text = read_in_text(file.name)
|
22 |
+
keywords = kw_extractor.extract_keywords(text, vectorizer=KeyphraseCountVectorizer(), stop_words=None, top_n=number_of_results)
|
23 |
+
keywords = [i for i in keywords if i[1] >= 0.25]
|
24 |
+
|
25 |
+
keybert_diversity_phrases = []
|
26 |
+
for i, j in keywords:
|
27 |
+
keybert_diversity_phrases.append(i)
|
28 |
+
|
29 |
+
output_df = pd.DataFrame()
|
30 |
+
output_df['keyword'] = np.array(keybert_diversity_phrases)
|
31 |
+
return output_df.head(20)
|
32 |
+
except Exception:
|
33 |
+
return "Error"
|
34 |
+
|
35 |
+
demo = gr.Interface(get_keybert_results_with_vectorizer, inputs=gr.inputs.File(),
|
36 |
+
outputs=gr.outputs.Dataframe(),
|
37 |
+
title = "Keyword Extraction")
|
38 |
+
|
39 |
+
if __name__ == "__main__":
|
40 |
+
demo.launch(debug=True)
|