Spaces:
Runtime error
Runtime error
import glob, os, sys; | |
sys.path.append('../udfPreprocess') | |
#import helper | |
import udfPreprocess.docPreprocessing as pre | |
import udfPreprocess.cleaning as clean | |
#import needed libraries | |
import seaborn as sns | |
from pandas import DataFrame | |
from keybert import KeyBERT | |
from transformers import pipeline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import streamlit as st | |
import pandas as pd | |
import docx | |
from docx.shared import Inches | |
from docx.shared import Pt | |
from docx.enum.style import WD_STYLE_TYPE | |
import tempfile | |
import sqlite3 | |
import logging | |
logger = logging.getLogger(__name__) | |
import configparser | |
def load_sdgClassifier(): | |
classifier = pipeline("text-classification", model= "jonas/sdg_classifier_osdg") | |
logging.info("Loading classifier") | |
return classifier | |
def sdg_classification(par_list): | |
logging.info("running SDG classifiication") | |
config = configparser.ConfigParser() | |
config.read_file(open('udfPreprocess/paramconfig.cfg')) | |
threshold = float(config.get('sdg','THRESHOLD')) | |
classifier = load_sdgClassifier() | |
labels = classifier(par_list) | |
labels_= [(l['label'],l['score']) for l in labels] | |
# df2 = DataFrame(labels_, columns=["SDG", "Relevancy"]) | |
df2 = DataFrame(labels_, columns=["SDG", "Relevancy"]) | |
df2['text'] = par_list | |
df2 = df2.sort_values(by="Relevancy", ascending=False).reset_index(drop=True) | |
df2.index += 1 | |
df2 =df2[df2['Relevancy']>threshold] | |
x = df2['SDG'].value_counts() | |
df3 = df2.copy() | |
df3= df3.drop(['Relevancy'], axis = 1) | |
return df3, x |