Spaces:

blazingbunny
/

nouns-verbs-identifier-gradio

Sleeping

App Files Files Community

blazingbunny commited on Jun 7, 2024

Commit

a789703

verified ·

1 Parent(s): 83cb24a

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -57

app.py CHANGED Viewed

@@ -1,82 +1,74 @@
 import gradio as gr
 import spacy
 import json
-import os
-import subprocess  # Added import statement for subprocess
 # Download the spaCy model if it is not already downloaded
-subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-# Load the spaCy model for POS tagging
-nlp = spacy.load("en_core_web_sm")
-# Load the list of nouns and verbs from the JSON file
-json_file_path = "/mnt/data/ED-input_list.json"
-with open(json_file_path, 'r') as json_file:
-    input_list = json.load(json_file)
-input_nouns = set(input_list["Nouns"])
-input_verbs = set(input_list["Verbs"])
 def identify_nouns_verbs(text):
     # Process the text with spaCy
     doc = nlp(text)
-    # Extract nouns and verbs with offsets
-    nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
-    verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
     return {"Nouns": nouns, "Verbs": verbs}
-def calculate_similarity(input_text, json_file):
-    input_list = json.load(json_file)
-    input_nouns = set(input_list["Nouns"])
-    input_verbs = set(input_list["Verbs"])
-    doc = nlp(input_text)
-    output = {"Nouns": [], "Verbs": [], "Similarities": {"Nouns": {}, "Verbs": {}}}
-    # Find nouns and verbs with offsets
-    found_nouns = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
-    found_verbs = [{"word": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
-    output["Nouns"] = [noun for noun in found_nouns if noun["word"] not in input_nouns]
-    output["Verbs"] = [verb for verb in found_verbs if verb["word"] not in input_verbs]
-    # Calculate similarity for nouns
-    for noun in output["Nouns"]:
-        token = nlp(noun["word"])
-        similar_words = []
-        for input_word in input_nouns:
-            input_token = nlp(input_word)
-            similarity = token.similarity(input_token)
-            if similarity > 0.7:
-                similar_words.append((input_word, similarity))
-        output["Similarities"]["Nouns"][noun["word"]] = similar_words
-    # Calculate similarity for verbs
-    for verb in output["Verbs"]:
-        token = nlp(verb["word"])
-        similar_words = []
-        for input_word in input_verbs:
-            input_token = nlp(input_word)
-            similarity = token.similarity(input_token)
-            if similarity > 0.7:
-                similar_words.append((input_word, similarity))
-        output["Similarities"]["Verbs"][verb["word"]] = similar_words
-    return output
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=calculate_similarity,
-    inputs=[gr.Textbox(lines=10, placeholder="Enter your text here..."), gr.File(label="Upload JSON List")],
     outputs=gr.JSON(),
-    title="Noun and Verb Similarity Checker",
-    description="Enter a document and upload a JSON list to identify nouns and verbs and find their similarities."
 )
 if __name__ == "__main__":

+# app.py
 import gradio as gr
 import spacy
+import subprocess
 import json
 # Download the spaCy model if it is not already downloaded
+subprocess.run(["python", "-m", "spacy", "download", "en_core_web_md"])
+# Load the spaCy model for POS tagging and similarity
+nlp = spacy.load("en_core_web_md")
 def identify_nouns_verbs(text):
     # Process the text with spaCy
     doc = nlp(text)
+    # Extract nouns and verbs with their positions
+    nouns = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "NOUN"]
+    verbs = [{"text": token.text, "begin_offset": token.idx} for token in doc if token.pos_ == "VERB"]
     return {"Nouns": nouns, "Verbs": verbs}
+def calculate_similarity(nouns_verbs, input_list):
+    similarities = {"Nouns": {}, "Verbs": {}}
+    def add_similarity(word, similar_word, score, pos):
+        if word not in similarities[pos]:
+            similarities[pos][word] = []
+        if similar_word not in [sim[0] for sim in similarities[pos][word]]:
+            similarities[pos][word].append((similar_word, score))
+    for noun in nouns_verbs["Nouns"]:
+        noun_text = noun["text"]
+        noun_token = nlp(noun_text)
+        for word in input_list["Nouns"]:
+            word_token = nlp(word)
+            similarity = noun_token.similarity(word_token)
+            if similarity > 0.7:  # Adjust threshold as needed
+                add_similarity(noun_text, word, similarity, "Nouns")
+    for verb in nouns_verbs["Verbs"]:
+        verb_text = verb["text"]
+        verb_token = nlp(verb_text)
+        for word in input_list["Verbs"]:
+            word_token = nlp(word)
+            similarity = verb_token.similarity(word_token)
+            if similarity > 0.7:  # Adjust threshold as needed
+                add_similarity(verb_text, word, similarity, "Verbs")
+    return similarities
+def process_inputs(text, json_file):
+    # Read the content of the uploaded file
+    with open(json_file.name, 'r') as f:
+        input_list = json.load(f)
+    nouns_verbs = identify_nouns_verbs(text)
+    similarities = calculate_similarity(nouns_verbs, input_list)
+    return {"Nouns and Verbs": nouns_verbs, "Similarities": similarities}
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=process_inputs,
+    inputs=[
+        gr.Textbox(lines=10, placeholder="Enter your text here..."),
+        gr.File(label="Upload JSON File")
+    ],
     outputs=gr.JSON(),
+    title="Noun and Verb Identifier with Similarity Check",
+    description="Enter a document or text to identify the nouns and verbs, and check for similarities with a given list of words."
 )
 if __name__ == "__main__":