File size: 1,815 Bytes
074115a 2020d9c 074115a 2020d9c c04970e 074115a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
from keybert import KeyBERT
# Create a KeyBERT instance
kw_model = KeyBERT()
# Define the Streamlit app
def main():
st.title("Keyword Extraction")
st.write("Enter your document below:")
# Get user input
doc = st.text_area("Document")
# Get user choice for stopwords removal
remove_stopwords = st.checkbox("Remove Stopwords")
# Extract keywords
if st.button("Extract Keywords"):
keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english")
# Get user choice for MMR
apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)")
if apply_mmr:
# Apply Maximal Marginal Relevance (MMR)
selected_keywords = []
selected_keywords.append(keywords[0]) # Select the top-scoring keyword
# Set the MMR hyperparameters
lambda_param = 0.7 # Weight for the trade-off between relevance and diversity
num_keywords = 5 # Number of keywords to select
for i in range(1, num_keywords):
selected_keywords_scores = [kw[1] for kw in selected_keywords]
remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]]
mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param)
max_mmr_index = mmr_scores.index(max(mmr_scores))
selected_keywords.append(remaining_keywords[max_mmr_index])
keywords = selected_keywords # Update keywords with MMR-selected keywords
st.write("Keywords:")
for keyword, score in keywords:
st.write(f"- {keyword} (Score: {score})")
# Run the app
if __name__ == "__main__":
main()
|