import gradio as gr from wordllama import WordLlama from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import numpy as np # Load the default WordLlama model wl = WordLlama.load() # Initialize TF-IDF vectorizer tfidf_vectorizer = TfidfVectorizer() def calculate_similarities(sentence1, sentence2): # WordLlama similarity wordllama_score = wl.similarity(sentence1, sentence2) # TF-IDF similarity tfidf_matrix = tfidf_vectorizer.fit_transform([sentence1, sentence2]) tfidf_score = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] return float(wordllama_score), float(tfidf_score) # Examples combining original and new homophone-based examples examples = [ # Original examples ["I went to the car", "I went to the pawn shop"], ["The cat is on the roof", "A dog is in the yard"], ["She loves playing tennis", "She enjoys sports"], ["This is a bright day", "It's a sunny morning"], ["I bought a new phone", "I got a new mobile"], ["The restaurant serves delicious food", "This place has great cuisine"], ["Python is a programming language", "Java is used for coding"], ["The movie was entertaining", "I enjoyed watching the film"], ["Climate change affects our planet", "Global warming is a serious issue"], ["Students study in the library", "People read books in the library"], # New examples with similar words but different meanings ["The executive board met this morning", "I was so bored during the meeting"], ["Don't waste your time on this", "The dress fits perfectly at the waist"], ["The principal called a meeting", "It's a matter of principle"], ["The weather is beautiful today", "I don't know whether to go or stay"], ["I need a piece of the cake", "The world needs peace"], ["The bass was swimming in the lake", "Turn up the bass in the speaker"], ["The fair is in town this weekend", "That decision wasn't fair at all"], ["I need to address this letter", "What's your new address?"], ["The bank of the river is muddy", "I need to go to the bank for money"], ["Can you bear this weight?", "I saw a bear in the woods"] ] # Define Gradio interface with updated layout with gr.Blocks(theme=gr.themes.Soft()) as iface: gr.Markdown("# Advanced Text Similarity Comparison") gr.Markdown(""" Compare sentences using both WordLlama and TF-IDF similarity metrics. This tool includes examples of similar words with different meanings to demonstrate semantic understanding. """) with gr.Row(): with gr.Column(): sentence1 = gr.Textbox( lines=2, placeholder="Enter first sentence...", label="First Sentence", info="Type or select from examples below" ) with gr.Column(): sentence2 = gr.Textbox( lines=2, placeholder="Enter second sentence...", label="Second Sentence", info="Type or select from examples below" ) button = gr.Button("Calculate Similarities", variant="primary") with gr.Row(): wordllama_output = gr.Number( label="WordLlama Similarity", info="Contextual similarity score (0-1)", value=0.0 ) tfidf_output = gr.Number( label="TF-IDF Similarity", info="Term frequency-based similarity score (0-1)", value=0.0 ) gr.Markdown(""" ### Understanding the Scores - **WordLlama Similarity**: Measures semantic similarity considering context and meaning - **TF-IDF Similarity**: Measures similarity based on word frequency and importance """) gr.Markdown("### Example Sentence Pairs") gr.Markdown(""" The examples include: - Regular sentence pairs - Sentences with similar words but different meanings (homophones) - Contextually related sentences """) button.click( calculate_similarities, inputs=[sentence1, sentence2], outputs=[wordllama_output, tfidf_output] ) gr.Examples( examples=examples, inputs=[sentence1, sentence2], label="Click on any example to load it" ) # Launch the interface iface.launch(share=True)