# -*- coding: utf-8 -*- """ Created on Tue Jun 14 00:29:28 2022 @author: UTKARSH """ import spacy nlp = spacy.load("en_core_web_sm") class vocab: # We will add extra weight to negation words NEGATION_WEIGHT = 0.2 # Strong modal verbs are given very high weight strong_modal_verbs = { "must", "shall", } other_modal_verbs = { "may", "should", "would" } other_relevant_stopwords = { "without", "however" } # Stopwords we would not be considering while normalizing # We do not need stopwords if we don't normalize, but just in case license_stopwords = { ",", "(", ")", ".", "\"", "software", "license", "work", "program", "source", "code", "rights", "notice", "provided", "version", "library", "covered", "public", "disclaimer", "documentation" }.union( nlp.Defaults.stop_words ) - strong_modal_verbs - other_modal_verbs - other_relevant_stopwords negation_words = { "no", "not", "non" } # These words will have a high weightage while ranking sentences high_imp_verbs = { "permit", "copy", "modify", "change", "sell", "reproduce", "transfer", "rent", "lease", "assign", "sublet", "distribute", "redistribute", "allow", "require", "use" } low_imp_verbs = { "merge", "publish", "include", "grant", "run", "affirm", "propagate", "acknowledge", "limit", "retain", "associate" } high_imp_neg_verbs = {f"not-{verb}" for verb in high_imp_verbs} low_imp_neg_verbs = {f"not-{verb}" for verb in low_imp_verbs} properties_dict = { "0.1": { "investigative", "contract", "contribution" }, "0.2": { "everyone", "hereby", "claim" }, "0.3": { "termination", "terminate", "meet", "tort", "files", "author", "available", "apply", "material", "user" }, "0.4": { "liable", "contributors", }, "0.5": low_imp_verbs.union({ "restriction", "however", "without" }), "0.6": { "distribution", "redistribution", "attribution", "permission", "modification", "copyright", "limitation", "free", "charge", "warranty", "term", "terms", "condition", "right", "sublicense", "commercial", "non-commercial", "exception", "liability", "irrevocable" }, "0.7": low_imp_neg_verbs.union({ "no-charge" }), "0.8": high_imp_verbs.union({ "patent" }), "0.9": { "" }, "1.0": high_imp_neg_verbs.union({ "" }), "2.0": other_modal_verbs, "3.0": strong_modal_verbs } properties_scores = { "0.1": 0.1, "0.2": 0.2, "0.3": 0.3, "0.4": 0.4, "0.5": 0.5, "0.6": 0.6, "0.7": 0.7, "0.8": 0.8, "0.9": 0.9, "1.0": 1.0, "2.0": 2.0, "3.0": 3.0 } class color: GREEN = "#03AC13" RED = "#D22B2B" BLACK = "#000000" GRAY = "#AAAAAA" class captions: APP_TITLE = "Clearly Defined: License Summarizer" APP_DISCLAIMER = "DISCLAIMER: This app is the result of a Capstone \ Project and further development is required before productive use." LICENSE_TEXT = "License text" ENTER_LICENSE_CONTENT = "Enter contents of the license" LOADING = "Loading..." SUMMARY = "Summary" SIMILARITY_INDEX = "Similarity Index" SIMILARITY_INDEX_DISCLAIMER = "The following list of licenses are from \ choosealicense.com and consist of 41 known open source licenses." PROPERTIES = "Properties" PROPERTIES_DISCLAIMER = "The properties defined below are from \ choosealicense.com. For more information, visit \ choosealicense.com/appendix." DEFINITIONS = "Definitions" EXCEPTIONS = "Exceptions" SUMMARY_BY_T5 = "Summary will be generated by a T5 Transformer Model" WARNING_ABSTRACTIVE = "WARNING: The results generated by the abstractive \ summarizer might not be as expected" SUMMARY_BY_TEXTRANK = "Summary will be generated by a custom TextRank \ Algorithm" SUMMARY_BY_BOTH = "The License text will be first passed through the \ custom TextRank algorithm and then passed on to the T5 Transformer \ Model to generate a summary." WARNING_BOTH = "WARNING: The results generated by the abstractive \ summarizer might not be as expected" SUMMARY_LENGTH_PERCENTAGE = "Summary length percentage" SELECT_SUMMARIZATION_TYPE = "Select summarization type" SUMMARY_VIEW = "Summary View" DISPLAY_SUMMARY_ONLY_DESC = "Shows the important sentences from the \ license" DISPLAY_HIGHLIGHTED_SUMMARY_DESC = "Highlights the important sentences in \ the license" CLEANED_LICENSE_ONLY = "Shows the cleaned license text only" CLEANED_LICENSE_WITH_DIFF = "Shows the cleaned license text with \ highlighted diffs" HIDE_CLEANED_LICENSE = "Hides the cleaned license text" NO_SIMILAR_LICENSE_FOUND = "No similar license found" CLEANED_LICENSE_VIEW = "Cleaned License View" CLEANED_LICENSE_TEXT = "Cleaned License Text" CLEANED_LICENSE_DIFF = "Cleaned License Diff" class options: ABSTRACTIVE = "Abstractive" EXTRACTIVE = "Extractive" BOTH = "Both" DISPLAY_SUMMARY_ONLY = "Display Summary Only" DISPLAY_HIGHLIGHTED_SUMMARY = "Display Highlighted Summary" HIDE_CLEANED_LICENSE = "Hide Cleaned License" DISPLAY_CLEANED_LICENSE = "Display Cleaned License" DISPLAY_CLEANED_DIFF = "Display Cleaned License + Diff" SHOW_LICENSE_PROPERTIES = "Show license properties" SHOW_LICENSE_DEFINITIONS = "Show license definitions" SHOW_LICENSE_EXCEPTIONS = "Show license exceptions" class help_messages: SUMMARIZATION_TYPE = f"""Select the type of summarization to perform. \ "{options.EXTRACTIVE}" would select the most important sentences to \ generate a summary. "{options.ABSTRACTIVE}" would try and paraphrase \ the meaning of the license and form a summary. "{options.BOTH}" would \ first pass the license through "extractive" and then "abstractive" \ to generate a summary.""" SLIDER = "Slide to vary the size of the summary. 1 will result in the \ smallest summary possible, whereas 100 will display the complete \ (cleaned) license text." SUMMARY_VIEW = f""""Select the type of summary view desired. \ {options.DISPLAY_SUMMARY_ONLY}" will show only the \ summary text. "{options.DISPLAY_HIGHLIGHTED_SUMMARY}" will show the \ complete (cleaned) license text with the summary highlighted.""" CLEANED_LICENSE_VIEW = f""""Select the type of cleaned license view \ desired. {options.HIDE_CLEANED_LICENSE}" will not show \ the cleaned license text. "{options.DISPLAY_CLEANED_LICENSE}" will \ show the cleaned license text. "{options.DISPLAY_CLEANED_DIFF}" will \ show the cleaned license text and the diff between the input text and \ the closest matching SPDX license (from the similarity index table).""" PROPERTIES_CHECKBOX = "Select this checkbox to view the properties of the \ license that shares the highest similarity with the input license \ text. This checkbox would be disabled if no known license crosses the \ similarity threshold." DEFINITIONS_CHECKBOX = "Select this checkbox to view definitions within \ the license. This checkbox would be disabled if no definitions are \ found within the license." EXCEPTIONS_CHECKBOX = "Select this checkbox to view exceptions within \ the license. This checkbox would be disabled if no exceptions are \ found within the license."