from lime.lime_text import LimeTextExplainer from nltk.tokenize import sent_tokenize from predictors import predict_for_explainanility def explainer(text, model_type): def predictor_wrapper(text): return predict_for_explainanility(text=text, model_type=model_type) class_names = ["negative", "positive"] explainer_ = LimeTextExplainer( class_names=class_names, split_expression=sent_tokenize ) sentences = [sent for sent in sent_tokenize(text)] num_sentences = len(sentences) exp = explainer_.explain_instance( text, predictor_wrapper, num_features=num_sentences, num_samples=500 ) weights_mapping = exp.as_map()[1] sentences_weights = {sentence: 0 for sentence in sentences} for idx, weight in weights_mapping: if 0 <= idx < len(sentences): sentences_weights[sentences[idx]] = weight print(sentences_weights, model_type) return sentences_weights, exp def analyze_and_highlight(text, model_type): highlighted_text = "" sentences_weights, _ = explainer(text, model_type) min_weight = min(sentences_weights.values()) max_weight = max(sentences_weights.values()) for sentence, weight in sentences_weights.items(): normalized_weight = (weight - min_weight) / (max_weight - min_weight) if weight >= 0: color = f"rgba(255, {255 * (1 - normalized_weight)}, {255 * (1 - normalized_weight)}, 1)" else: color = ( f"rgba({255 * normalized_weight}, 255, {255 * normalized_weight}, 1)" ) sentence = sentence.strip() if not sentence: continue highlighted_sentence = ( f'{sentence} ' ) highlighted_text += highlighted_sentence return highlighted_text