import streamlit as st import spacy import graphviz import pandas as pd import base64 import shutil import subprocess # Load English language model for spaCy nlp = spacy.load('en_core_web_md') def check_graphviz_installation(): """ Check if Graphviz is installed and accessible """ if shutil.which('dot') is None: return False try: subprocess.run(['dot', '-V'], capture_output=True, check=True) return True except (subprocess.SubprocessError, OSError): return False def identify_clauses(doc): """ Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses """ clauses = [] # First identify all subordinate clauses and their spans subordinate_spans = [] for token in doc: if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]: span = doc[token.left_edge.i:token.right_edge.i + 1] subordinate_spans.append({ "span": span, "type": { "ccomp": "Complement Clause", "xcomp": "Open Complement Clause", "advcl": "Adverbial Clause", "relcl": "Adjective Clause" }[token.dep_] }) # Find the root and construct the main clause by excluding subordinate spans root = None for token in doc: if token.dep_ == "ROOT": root = token break if root: # Get all tokens in the root's subtree main_clause_tokens = set(token for token in root.subtree) # Remove tokens that are part of subordinate clauses for sub_clause in subordinate_spans: for token in sub_clause["span"]: if token in main_clause_tokens: main_clause_tokens.remove(token) # Construct the main clause text from remaining tokens main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens], key=lambda x: [t.i for t in doc if t.text == x][0])) main_clause_text = main_clause_text.strip().replace(",","").replace(".","") clauses.append({"Type": "Independent Clause", "Text": main_clause_text}) # Add the subordinate clauses for sub_clause in subordinate_spans: clauses.append({ "Type": sub_clause["type"], "Text": sub_clause["span"].text }) return clauses def analyze_clause_functions(doc): """ Analyze the function of each clause """ functions = [] for token in doc: if token.dep_ == "ROOT": functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"}) elif token.dep_ == "ccomp": functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"}) elif token.dep_ == "xcomp": functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"}) elif token.dep_ == "advcl": functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"}) elif token.dep_ == "relcl": functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"}) return functions def create_dependency_graph(doc): """ Create a graphviz visualization of the dependency tree """ if not check_graphviz_installation(): return None dot = graphviz.Digraph(comment='Dependency Tree') # Add nodes for token in doc: dot.node(str(token.i), f"{token.text}\n({token.pos_})") # Add edges for token in doc: if token.head is not token: # Skip root dot.edge(str(token.head.i), str(token.i), token.dep_) return dot def get_graph_download_link(dot): """ Generate a download link for the graph image """ try: # Create PDF in memory pdf = dot.pipe(format='pdf') # Encode to base64 b64 = base64.b64encode(pdf).decode() href = f'Download Syntax Tree (PDF)' return href except Exception as e: return f"Error generating download link: {str(e)}" def main(): # Set page to wide mode for better visualization st.set_page_config(layout="wide") st.markdown("

English Clause Analyzer

", unsafe_allow_html=True) st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.") # Input text text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100) if st.button("Analyze"): if text: # Process the text doc = nlp(text) # Create two columns for layout col1, col2 = st.columns(2) with col1: # Identify clauses clauses = identify_clauses(doc) st.subheader(f"Clauses Analysis") # Convert clauses to DataFrame for better presentation df_clauses = pd.DataFrame(clauses) st.table(df_clauses.style.set_properties(**{ 'background-color': 'rgba(0,0,0,0.1)', 'color': 'white' })) # Display clause functions functions = analyze_clause_functions(doc) st.subheader("Clause Functions") df_functions = pd.DataFrame(functions) st.table(df_functions.style.set_properties(**{ 'background-color': 'rgba(0,0,0,0.1)', 'color': 'white' })) with col2: # Display dependency visualization st.subheader("Syntax Tree Visualization") if not check_graphviz_installation(): st.error("Graphviz is not installed. Please install it using:") st.code("sudo apt-get install graphviz") st.markdown("After installation, restart the application.") else: dot = create_dependency_graph(doc) st.graphviz_chart(dot) # Add download button for the graph st.markdown(get_graph_download_link(dot), unsafe_allow_html=True) # Display part-of-speech tags in a table st.subheader("Part-of-Speech Analysis") pos_data = [{"Word": token.text, "Part of Speech": token.pos_, "Description": spacy.explain(token.pos_)} for token in doc] df_pos = pd.DataFrame(pos_data) st.table(df_pos.style.set_properties(**{ 'background-color': 'rgba(0,0,0,0.1)', 'color': 'white' })) if __name__ == "__main__": main()