keybert / app.py
varun500's picture
Update app.py
5ea6ee6
raw
history blame
1.97 kB
import streamlit as st
from keybert import KeyBERT
# Create a KeyBERT instance
kw_model = KeyBERT()
# Define the Streamlit app
def main():
st.title("Keyword Extraction")
st.write("Enter your document below:")
# Get user input
doc = st.text_area("Document")
# Get user choice for stopwords removal (default checkbox)
remove_stopwords = st.checkbox("Remove Stopwords", value=True)
# Get user choice for MMR (default checkbox)
apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)", value=True)
# Get user choice for number of results (slider)
num_results = st.slider("Number of Results", min_value=1, max_value=30, value=5, step=1)
# Extract keywords
if st.button("Extract Keywords"):
keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english")
if apply_mmr:
# Apply Maximal Marginal Relevance (MMR)
selected_keywords = []
selected_keywords.append(keywords[0]) # Select the top-scoring keyword
# Set the MMR hyperparameters
lambda_param = 0.7 # Weight for the trade-off between relevance and diversity
for i in range(1, num_results):
selected_keywords_scores = [kw[1] for kw in selected_keywords]
remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]]
mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param)
max_mmr_index = mmr_scores.index(max(mmr_scores))
selected_keywords.append(remaining_keywords[max_mmr_index])
keywords = selected_keywords # Update keywords with MMR-selected keywords
st.write(f"Top {num_results} Keywords:")
for keyword, score in keywords:
st.write(f"- {keyword} (Score: {score})")
# Run the app
if __name__ == "__main__":
main()