import streamlit as st
import spacy
import graphviz
import pandas as pd
import base64
import shutil
import subprocess

# Load English language model for spaCy
nlp = spacy.load('en_core_web_md')

def check_graphviz_installation():
    """
    Check if Graphviz is installed and accessible
    """
    if shutil.which('dot') is None:
        return False
    try:
        subprocess.run(['dot', '-V'], capture_output=True, check=True)
        return True
    except (subprocess.SubprocessError, OSError):
        return False

def identify_clauses(doc):
    """
    Identify clauses in the sentence using spaCy, correctly separating dependent and independent clauses
    """
    clauses = []
    
    # First identify all subordinate clauses and their spans
    subordinate_spans = []
    for token in doc:
        if token.dep_ in ["ccomp", "xcomp", "advcl", "relcl"]:
            span = doc[token.left_edge.i:token.right_edge.i + 1]
            subordinate_spans.append({
                "span": span,
                "type": {
                    "ccomp": "Complement Clause",
                    "xcomp": "Open Complement Clause",
                    "advcl": "Adverbial Clause",
                    "relcl": "Adjective Clause"
                }[token.dep_]
            })
    
    # Find the root and construct the main clause by excluding subordinate spans
    root = None
    for token in doc:
        if token.dep_ == "ROOT":
            root = token
            break
    
    if root:
        # Get all tokens in the root's subtree
        main_clause_tokens = set(token for token in root.subtree)
        
        # Remove tokens that are part of subordinate clauses
        for sub_clause in subordinate_spans:
            for token in sub_clause["span"]:
                if token in main_clause_tokens:
                    main_clause_tokens.remove(token)
        
        # Construct the main clause text from remaining tokens
        main_clause_text = " ".join(sorted([token.text for token in main_clause_tokens], 
                                         key=lambda x: [t.i for t in doc if t.text == x][0]))
        main_clause_text = main_clause_text.strip().replace(",","").replace(".","")
        clauses.append({"Type": "Independent Clause", "Text": main_clause_text})
    
    # Add the subordinate clauses
    for sub_clause in subordinate_spans:
        clauses.append({
            "Type": sub_clause["type"],
            "Text": sub_clause["span"].text
        })
            
    return clauses

def analyze_clause_functions(doc):
    """
    Analyze the function of each clause
    """
    functions = []
    
    for token in doc:
        if token.dep_ == "ROOT":
            functions.append({"Type": "Independent Clause", "Function": "Express the primary action or state"})
        elif token.dep_ == "ccomp":
            functions.append({"Type": "Complement Clause", "Function": "Acts as object of the main verb"})
        elif token.dep_ == "xcomp":
            functions.append({"Type": "Open Complement Clause", "Function": "Predicate complement without its own subject"})
        elif token.dep_ == "advcl":
            functions.append({"Type": "Adverbial Clause", "Function": "Modifies the verb like an adverb"})
        elif token.dep_ == "relcl":
            functions.append({"Type": "Adjective Clause", "Function": "Modifies a noun like an adjective"})
            
    return functions

def create_dependency_graph(doc):
    """
    Create a graphviz visualization of the dependency tree
    """
    if not check_graphviz_installation():
        return None
    
    dot = graphviz.Digraph(comment='Dependency Tree')
    
    # Add nodes
    for token in doc:
        dot.node(str(token.i), f"{token.text}\n({token.pos_})")
    
    # Add edges
    for token in doc:
        if token.head is not token:  # Skip root
            dot.edge(str(token.head.i), str(token.i), token.dep_)
    
    return dot

def get_graph_download_link(dot):
    """
    Generate a download link for the graph image
    """
    try:
        # Create PDF in memory
        pdf = dot.pipe(format='pdf')
        
        # Encode to base64
        b64 = base64.b64encode(pdf).decode()
        
        href = f'<a href="data:application/pdf;base64,{b64}" download="syntax_tree.pdf">Download Syntax Tree (PDF)</a>'
        return href
    except Exception as e:
        return f"Error generating download link: {str(e)}"

def main():
    # Set page to wide mode for better visualization
    st.set_page_config(layout="wide")
    st.markdown("<h1 style='text-align: center; color: white;'>English Clause Analyzer</h1>", unsafe_allow_html=True)
    st.write("Enter an English sentence to analyze its clauses, their functions, and syntax tree.")
    
    # Input text
    text = st.text_area("Enter your sentence:", "When I arrived at the station, the train had already left.", height=100)
    
    if st.button("Analyze"):
        if text:
            # Process the text
            doc = nlp(text)
            
            # Create two columns for layout
            col1, col2 = st.columns(2)
            
            with col1:
                # Identify clauses
                clauses = identify_clauses(doc)
                st.subheader(f"Clauses Analysis")
                
                # Convert clauses to DataFrame for better presentation
                df_clauses = pd.DataFrame(clauses)
                st.table(df_clauses.style.set_properties(**{
                    'background-color': 'rgba(0,0,0,0.1)',
                    'color': 'white'
                }))
                
                # Display clause functions
                functions = analyze_clause_functions(doc)
                st.subheader("Clause Functions")
                df_functions = pd.DataFrame(functions)
                st.table(df_functions.style.set_properties(**{
                    'background-color': 'rgba(0,0,0,0.1)',
                    'color': 'white'
                }))
            
            with col2:
                # Display dependency visualization
                st.subheader("Syntax Tree Visualization")
                if not check_graphviz_installation():
                    st.error("Graphviz is not installed. Please install it using:")
                    st.code("sudo apt-get install graphviz")
                    st.markdown("After installation, restart the application.")
                else:
                    dot = create_dependency_graph(doc)
                    st.graphviz_chart(dot)
                    
                    # Add download button for the graph
                    st.markdown(get_graph_download_link(dot), unsafe_allow_html=True)
                    
                    # Display part-of-speech tags in a table
                    st.subheader("Part-of-Speech Analysis")
                    pos_data = [{"Word": token.text, "Part of Speech": token.pos_, 
                            "Description": spacy.explain(token.pos_)} for token in doc]
                    df_pos = pd.DataFrame(pos_data)
                    st.table(df_pos.style.set_properties(**{
                        'background-color': 'rgba(0,0,0,0.1)',
                        'color': 'white'
                    }))

if __name__ == "__main__":
    main()