File size: 5,369 Bytes
99dc100
 
feab938
99dc100
 
feab938
a949995
 
d97dd70
99dc100
b76b3d8
a949995
b76b3d8
 
99dc100
 
feab938
a949995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feab938
97905e6
 
 
 
 
52dcb43
99dc100
52dcb43
99dc100
52dcb43
 
99dc100
52dcb43
feab938
99dc100
a949995
 
99dc100
 
 
 
 
 
 
 
d97dd70
 
 
 
 
 
a949995
 
 
 
 
 
 
99dc100
a949995
 
 
 
 
 
 
 
 
 
 
99dc100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d97dd70
b76b3d8
 
 
 
 
 
d97dd70
 
52dcb43
ba4d8fc
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# app.py

import streamlit as st
import base64
from annotations import analyze_pdf

def display_pdf_iframe(pdf_bytes):
    """Displays the PDF using an iframe tag."""
    if pdf_bytes and len(pdf_bytes) > 0:
        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
        pdf_display = f"""
            <iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="800px" type="application/pdf"></iframe>
        """
        st.components.v1.html(pdf_display, height=800, width=700, scrolling=True)
    else:
        st.info("No annotated PDF to display.")

def display_pdf_object(pdf_bytes):
    """Displays the PDF using an object tag."""
    if pdf_bytes and len(pdf_bytes) > 0:
        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
        pdf_display = f"""
            <object data="data:application/pdf;base64,{base64_pdf}" type="application/pdf" width="100%" height="800px">
                <p>Your browser does not support PDFs.
                <a href="data:application/pdf;base64,{base64_pdf}">Download the PDF</a>.</p>
            </object>
        """
        st.components.v1.html(pdf_display, height=800, width=700, scrolling=True)
    else:
        st.info("No annotated PDF to display.")

def display_pdf_pdfjs(pdf_bytes):
    """Displays the PDF using PDF.js embedded in an iframe."""
    if pdf_bytes and len(pdf_bytes) > 0:
        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
        pdfjs_viewer_url = f"https://mozilla.github.io/pdf.js/web/viewer.html?file=data:application/pdf;base64,{base64_pdf}"
        pdf_display = f"""
            <iframe src="{pdfjs_viewer_url}" width="100%" height="800px"></iframe>
        """
        st.components.v1.html(pdf_display, height=800, width=700, scrolling=True)
    else:
        st.info("No annotated PDF to display.")

def display_pdf_new_tab(pdf_bytes):
    """Provides a link to view the PDF in a new browser tab."""
    if pdf_bytes and len(pdf_bytes) > 0:
        base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
        href = f"data:application/pdf;base64,{base64_pdf}"
        pdf_display = f"""
            <a href="{href}" target="_blank">🔍 View Annotated PDF</a>
        """
        st.markdown(pdf_display, unsafe_allow_html=True)
    else:
        st.info("No annotated PDF to display.")

def main():
    st.set_page_config(
        page_title="PDF Analyzer",
        page_icon="📄",
        layout="wide",
    )

    st.title("📄 PDF Analyzer")
    st.markdown("""
    Upload a PDF to analyze its language, highlight errors, and view detailed error reports.
    """)

    uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"])

    if uploaded_file is not None:
        with st.spinner("Analyzing PDF..."):
            # Reset file pointer before reading
            uploaded_file.seek(0)
            language_results, annotated_pdf = analyze_pdf(uploaded_file)

        if "error" in language_results:
            st.error("An error occurred during analysis:")
            st.code(language_results["error"])
        else:
            st.success("Analysis complete!")

            # Debugging: Show the size of annotated_pdf
            if annotated_pdf and len(annotated_pdf) > 0:
                st.write(f"Annotated PDF size: {len(annotated_pdf)} bytes")
            else:
                st.write("Annotated PDF is empty.")

            # Select embedding method
            embedding_option = st.selectbox(
                "Select PDF Display Method",
                options=["Iframe", "Object Tag", "PDF.js", "Open in New Tab"],
                index=0
            )

            st.subheader("📄 Annotated PDF")

            if embedding_option == "Iframe":
                display_pdf_iframe(annotated_pdf)
            elif embedding_option == "Object Tag":
                display_pdf_object(annotated_pdf)
            elif embedding_option == "PDF.js":
                display_pdf_pdfjs(annotated_pdf)
            elif embedding_option == "Open in New Tab":
                display_pdf_new_tab(annotated_pdf)
            else:
                st.info("Select a display method to view the PDF.")

            # Sidebar for error details
            st.sidebar.header("📝 Error Details")

            if language_results.get("total_issues", 0) > 0:
                for idx, issue in enumerate(language_results["issues"], 1):
                    with st.sidebar.expander(f"Issue {idx}"):
                        st.markdown(f"**Message:** {issue['message']}")
                        st.markdown(f"**Category:** {issue['category']}")
                        st.markdown(f"**Suggestions:** {', '.join(issue['suggestions']) if issue['suggestions'] else 'No suggestions'}")
                        st.markdown(f"**Sentence:** {issue['context']}")
            else:
                st.sidebar.success("No language issues found!")

            # Option to download the annotated PDF
            if annotated_pdf and len(annotated_pdf) > 0:
                st.download_button(
                    label="📥 Download Annotated PDF",
                    data=annotated_pdf,
                    file_name="annotated.pdf",
                    mime="application/pdf",
                )
            else:
                st.info("No annotated PDF available for download.")

if __name__ == "__main__":
    main()