import streamlit as st import spacy import graphviz import pandas as pd import base64 import shutil import subprocess # Load English language model for spaCy nlp = spacy.load('en_core_web_md') def check_graphviz_installation(): """ Check if Graphviz is installed and accessible """ if shutil.which('dot') is None: return False try: subprocess.run(['dot', '-V'], capture_output=True, check=True) return True except (subprocess.SubprocessError, OSError): return False def identify_clauses(doc): """ Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses """ clauses = [] # First identify all subordinate clauses and their spans subordinate_spans = [] for token in doc: if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]: span = doc[token.left_edge.i:token.right_edge.i + 1] subordinate_spans.append({ "span": span, "type": { "ccomp": "Complement Clause", "xcomp": "Open Complement Clause", "advcl": "Adverbial Clause", "relcl": "Adjective Clause" }[token.dep_] }) # Find the root and construct the main clause by excluding subordinate spans root = None for token in doc: if token.dep_ == "ROOT": root = token break if root: # Get all tokens in the root's subtree main_clause_tokens = set(token for token in root.subtree) # Remove tokens that are part of subordinate clauses for sub_clause in subordinate_spans: for token in sub_clause["span"]: if token in main_clause_tokens: main_clause_tokens.remove(token) # Construct the main clause text from remaining tokens main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens], key=lambda x: [t.i for t in doc if t.text == x][0])) main_clause_text = main_clause_text.strip().replace(",","").replace(".","") clauses.append({"Type": "Independent Clause", "Text": main_clause_text}) # Add the subordinate clauses for sub_clause in subordinate_spans: clauses.append({ "Type": sub_clause["type"], "Text": sub_clause["span"].text }) return clauses def analyze_clause_functions(doc): """ Analyze the function of each clause """ functions = [] for token in doc: if token.dep_ == "ROOT": functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"}) elif token.dep_ == "ccomp": functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"}) elif token.dep_ == "xcomp": functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"}) elif token.dep_ == "advcl": functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"}) elif token.dep_ == "relcl": functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"}) return functions def create_dependency_graph(doc): """ Create a graphviz visualization of the dependency tree """ if not check_graphviz_installation(): return None dot = graphviz.Digraph(comment='Dependency Tree') # Add nodes for token in doc: dot.node(str(token.i), f"{token.text}\n({token.pos_})") # Add edges for token in doc: if token.head is not token: # Skip root dot.edge(str(token.head.i), str(token.i), token.dep_) return dot def get_graph_download_link(dot): """ Generate a download link for the graph image """ try: # Create PDF in memory pdf = dot.pipe(format='pdf') # Encode to base64 b64 = base64.b64encode(pdf).decode() href = f'Download Syntax Tree (PDF)' return href except Exception as e: return f"Error generating download link: {str(e)}" def main(): # Set page to wide mode for better visualization st.set_page_config(layout="wide") st.markdown("