textmetric-stramlit-1

Sleeping

App Files Files Community

samyak152002 commited on Nov 3, 2024

Commit

52dcb43

verified ·

1 Parent(s): 3410c51

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -459

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import fitz  # PyMuPDF
 from pdfminer.high_level import extract_text
 from pdfminer.layout import LAParams
 import language_tool_python
-from typing import List, Dict, Any, Tuple, Optional
 from collections import Counter
 import json
 import traceback
@@ -12,32 +12,10 @@ import io
 import tempfile
 import os
 import base64
-from dataclasses import dataclass
 # Set JAVA_HOME environment variable
 os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
-# ------------------------------
-# Data Classes
-# ------------------------------
-@dataclass
-class Highlight:
-    page: int
-    rect: Tuple[float, float, float, float]
-    color: str
-    message: str
-    category: str
-@dataclass
-class AnalysisResult:
-    highlights: List[Highlight]
-    messages: List[Dict[str, Any]]
-    summary: Dict[str, Any]
-# ------------------------------
-# PDF Processing Functions
-# ------------------------------
 # ------------------------------
 # Analysis Functions
 # ------------------------------
@@ -63,451 +41,145 @@ def check_text_presence(full_text: str, search_terms: List[str]) -> Dict[str, bo
     """Checks for the presence of required terms in the text."""
     return {term: term.lower() in full_text.lower() for term in search_terms}
-def label_authors(full_text: str) -> str:
-    """Label authors in the text with 'Authors:' if not already labeled."""
-    author_line_regex = r"^(?:.*\n)(.*?)(?:\n\n)"
-    match = re.search(author_line_regex, full_text, re.MULTILINE)
-    if match:
-        authors = match.group(1).strip()
-        return full_text.replace(authors, f"Authors: {authors}")
-    return full_text
 def check_metadata(full_text: str) -> Dict[str, Any]:
     """Check for metadata elements."""
     return {
         "author_email": bool(re.search(r'\b[\w.-]+?@\w+?\.\w+?\b', full_text)),
         "list_of_authors": bool(re.search(r'Authors?:', full_text, re.IGNORECASE)),
         "keywords_list": bool(re.search(r'Keywords?:', full_text, re.IGNORECASE)),
-        "word_count": len(full_text.split()) or "Missing"
-    }
-def check_disclosures(full_text: str) -> Dict[str, bool]:
-    """Check for disclosure statements."""
-    search_terms = [
-        "author contributions statement",
-        "conflict of interest statement",
-        "ethics statement",
-        "funding statement",
-        "data access statement"
-    ]
-    return check_text_presence(full_text, search_terms)
-def check_figures_and_tables(full_text: str) -> Dict[str, bool]:
-    """Check for figures and tables."""
-    return {
-        "figures_with_citations": bool(re.search(r'Figure \d+.*?citation', full_text, re.IGNORECASE)),
-        "figures_legends": bool(re.search(r'Figure \d+.*?legend', full_text, re.IGNORECASE)),
-        "tables_legends": bool(re.search(r'Table \d+.*?legend', full_text, re.IGNORECASE))
-    }
-def check_references(full_text: str) -> Dict[str, Any]:
-    """Check for references."""
-    return {
-        "old_references": bool(re.search(r'\b19[0-9]{2}\b', full_text)),
-        "citations_in_abstract": bool(re.search(r'\b(citation|reference)\b', full_text[:1000], re.IGNORECASE)),
-        "reference_count": len(re.findall(r'\[.*?\]', full_text)),
-        "self_citations": bool(re.search(r'Self-citation', full_text, re.IGNORECASE))
-    }
-def check_structure(full_text: str) -> Dict[str, bool]:
-    """Check document structure."""
-    return {
-        "imrad_structure": all(section in full_text for section in ["Introduction", "Methods", "Results", "Discussion"]),
-        "abstract_structure": "structured abstract" in full_text.lower()
     }
 def check_language_issues(full_text: str) -> Dict[str, Any]:
-    """Check for issues with capitalization, hyphenation, punctuation, spacing, etc."""
-    language_tool = language_tool_python.LanguageTool('en-US')
-    matches = language_tool.check(full_text)
-    word_count = len(full_text.split())
-    issues_count = len(matches)
-    issues_per_1000 = (issues_count / word_count) * 1000 if word_count else 0
-    serializable_matches = [
-        {
-            "message": match.message,
-            "replacements": match.replacements,
-            "offset": match.offset,
-            "errorLength": match.errorLength,
-            "category": match.category,
-            "ruleIssueType": match.ruleIssueType,
-            "sentence": match.sentence
-        }
-        for match in matches
-    ]
-    return {
-        "issues_count": issues_count,
-        "issues_per_1000": issues_per_1000,
-        "failed": issues_per_1000 > 20,
-        "matches": serializable_matches
-    }
-def check_language(full_text: str) -> Dict[str, Any]:
-    """Check language quality."""
-    return {
-        "plain_language": bool(re.search(r'plain language summary', full_text, re.IGNORECASE)),
-        "readability_issues": False,  # Placeholder for future implementation
-        "language_issues": check_language_issues(full_text)
-    }
-def check_figure_order(full_text: str) -> Dict[str, Any]:
-    """Check if figures are referred to in sequential order."""
-    figure_pattern = r'(?:Fig(?:ure)?\.?|Figure)\s*(\d+)'
-    figure_references = re.findall(figure_pattern, full_text, re.IGNORECASE)
-    figure_numbers = sorted(set(int(num) for num in figure_references))
-    is_sequential = all(a + 1 == b for a, b in zip(figure_numbers, figure_numbers[1:]))
-    if figure_numbers:
-        expected_figures = set(range(1, max(figure_numbers) + 1))
-        missing_figures = list(expected_figures - set(figure_numbers))
-    else:
-        missing_figures = None
-    duplicates = [num for num, count in Counter(figure_references).items() if count > 1]
-    duplicate_numbers = [int(num) for num in duplicates]
-    not_mentioned = list(set(figure_references) - set(duplicates))
-    return {
-        "sequential_order": is_sequential,
-        "figure_count": len(figure_numbers),
-        "missing_figures": missing_figures,
-        "figure_order": figure_numbers,
-        "duplicate_references": duplicates,
-        "not_mentioned": not_mentioned
-    }
-def check_reference_order(full_text: str) -> Dict[str, Any]:
-    """Check if references in the main body text are in order."""
-    reference_pattern = r'\[(\d+)\]'
-    references = re.findall(reference_pattern, full_text)
-    ref_numbers = [int(ref) for ref in references]
-    max_ref = 0
-    out_of_order = []
-    for i, ref in enumerate(ref_numbers):
-        if ref > max_ref + 1:
-            out_of_order.append((i+1, ref))
-        max_ref = max(max_ref, ref)
-    all_refs = set(range(1, max_ref + 1))
-    used_refs = set(ref_numbers)
-    missing_refs = list(all_refs - used_refs)
-    return {
-        "max_reference": max_ref,
-        "out_of_order": out_of_order,
-        "missing_references": missing_refs,
-        "is_ordered": len(out_of_order) == 0 and len(missing_refs) == 0
-    }
-def check_reference_style(full_text: str) -> Dict[str, Any]:
-    """Check the reference style used in the paper and identify inconsistencies."""
-    reference_section_match = re.search(r'References\b([\s\S]*?)(?:\n\S|\Z)', full_text, re.IGNORECASE)
-    if not reference_section_match:
-        return {"style": "Unknown", "reason": "References section not found", "inconsistent_refs": []}
-    references_text = reference_section_match.group(1)
-    reference_list = re.split(r'\n(?=\[\d+\]|\d+\.\s|\(\w+,\s*\d{4}\))', references_text)
-    references = [ref.strip() for ref in reference_list if ref.strip()]
-    styles = []
-    inconsistent_refs = []
-    patterns = {
-        "IEEE": r'^\[\d+\]',
-        "Harvard": r'^[A-Z][a-z]+,?\s[A-Z]\.\s\(?\d{4}\)?',
-        "APA": r'^[A-Z][a-z]+,?\s[A-Z]\.\s\(?\d{4}\)?',
-        "MLA": r'^[A-Z][a-z]+,\s[A-Z][a-z]+\.',
-        "Vancouver": r'^\d+\.\s',
-        "Chicago": r'^\d+\s[A-Z][a-z]+\s[A-Z]',
-    }
-    for i, ref in enumerate(references, 1):
-        matched = False
-        for style, pattern in patterns.items():
-            if re.match(pattern, ref):
-                styles.append(style)
-                matched = True
-                break
-        if not matched:
-            styles.append("Unknown")
-            inconsistent_refs.append((i, ref, "Unknown"))
-    if not styles:
-        return {"style": "Unknown", "reason": "No references found", "inconsistent_refs": []}
-    style_counts = Counter(styles)
-    majority_style, majority_count = style_counts.most_common(1)[0]
-    for i, style in enumerate(styles, 1):
-        if style != majority_style and style != "Unknown":
-            inconsistent_refs.append((i, references[i-1], style))
-    consistency = majority_count / len(styles)
-    return {
-        "majority_style": majority_style,
-        "inconsistent_refs": inconsistent_refs,
-        "consistency": consistency
-    }
-# ------------------------------
-# Annotation Functions
-# ------------------------------
-def highlight_text(page, words, text, annotation):
-    """Highlight text and add annotation."""
-    text_instances = find_text_instances(words, text)
-    highlighted = False
-    for inst in text_instances:
-        highlight = page.add_highlight_annot(inst)
-        highlight.update()
-        comment = page.add_text_annot(inst[:2], annotation)
-        comment.update()
-        highlighted = True
-    return highlighted
-def find_text_instances(words, text):
-    """Find all instances of text in words."""
-    text_lower = text.lower()
-    text_words = text_lower.split()
-    instances = []
-    for i in range(len(words) - len(text_words) + 1):
-        if all(words[i+j][4].lower() == text_words[j] for j in range(len(text_words))):
-            inst = fitz.Rect(words[i][:4])
-            for j in range(1, len(text_words)):
-                inst = inst | fitz.Rect(words[i+j][:4])
-            instances.append(inst)
-    return instances
-def highlight_issues_in_pdf(file, inconsistent_refs: List[Tuple[int, str, str]], language_matches: List[Dict[str, Any]]) -> bytes:
-    """Highlight inconsistent references and add notes for language issues in a single PDF."""
     try:
-        if isinstance(file, str):
-            doc = fitz.open(file)
-        else:
-            doc = fitz.open(stream=file.read(), filetype="pdf")
-        added_notes = set()
-        for page_number, page in enumerate(doc, start=1):
-            words = page.get_text("words")
-            if inconsistent_refs:
-                for ref_num, ref_text, ref_style in inconsistent_refs:
-                    annotation_text = f"Reference {ref_num}: Inconsistent style ({ref_style}). Should be consolidated to {ref_style}."
-                    highlight_text(page, words, ref_text, annotation_text)
-            if language_matches:
-                for match in language_matches:
-                    issue_text = match['sentence']
-                    error_message = f"{match['message']}\nSuggested correction: {match['replacements'][0] if match['replacements'] else 'No suggestion'}"
-                    issue_key = (issue_text, error_message)
-                    if issue_key not in added_notes:
-                        if highlight_text(page, words, issue_text, error_message):
-                            added_notes.add(issue_key)
-        annotated_pdf_bytes = doc.write()
-        doc.close()
-        return annotated_pdf_bytes
     except Exception as e:
-        print(f"An error occurred while annotating the PDF: {str(e)}")
-        traceback.print_exc()
-        return b""
-# ------------------------------
-# Main Analysis Function
-# ------------------------------
-def analyze_pdf(file) -> Tuple[Dict[str, Any], bytes]:
-    """
-    Analyze the uploaded PDF and return analysis results and annotated PDF bytes.
-    """
     try:
-        pages_text = extract_pdf_text_by_page(file)
         full_text = extract_pdf_text(file)
-        full_text = label_authors(full_text)
-        # Perform analyses
-        metadata = check_metadata(full_text)
-        disclosures = check_disclosures(full_text)
-        figures_and_tables = check_figures_and_tables(full_text)
-        figure_order = check_figure_order(full_text)
-        references = check_references(full_text)
-        reference_order = check_reference_order(full_text)
-        reference_style = check_reference_style(full_text)
-        structure = check_structure(full_text)
-        language = check_language(full_text)
-        # Compile results
         results = {
-            "metadata": metadata,
-            "disclosures": disclosures,
-            "figures_and_tables": figures_and_tables,
-            "figure_order": figure_order,
-            "references": references,
-            "reference_order": reference_order,
-            "reference_style": reference_style,
-            "structure": structure,
-            "language": language
         }
-        # Handle annotations
-        inconsistent_refs = reference_style.get("inconsistent_refs", [])
-        language_matches = language.get("language_issues", {}).get("matches", [])
-        if inconsistent_refs or language_matches:
-            annotated_pdf_bytes = highlight_issues_in_pdf(file, inconsistent_refs, language_matches)
-        else:
-            annotated_pdf_bytes = None
-        return results, annotated_pdf_bytes
     except Exception as e:
-        error_message = {
-            "error": str(e),
-            "traceback": traceback.format_exc()
-        }
-        return error_message, None
 # ------------------------------
-# Highlight Processing Functions
 # ------------------------------
-def get_word_coordinates(doc: fitz.Document) -> Dict[int, List[Dict[str, Any]]]:
-    """Extract word coordinates from each page of the PDF."""
-    word_coordinates = {}
-    for page_num, page in enumerate(doc):
-        words = page.get_text("words")
-        word_coordinates[page_num] = [
-            {
-                "text": word[4],
-                "rect": fitz.Rect(word[:4]),
-                "origin": word[5:],
-            }
-            for word in words
-        ]
-    return word_coordinates
-def find_text_location(text: str, word_coordinates: Dict[int, List[Dict[str, Any]]]) -> Optional[Highlight]:
-    """Find the location of text in the PDF and return a Highlight object."""
-    text_lower = text.lower()
-    for page_num, words in word_coordinates.items():
-        for i in range(len(words)):
-            if words[i]["text"].lower() in text_lower:
-                # Find the complete phrase
-                rect = words[i]["rect"]
-                j = i + 1
-                while j < len(words) and j - i < len(text.split()):
-                    rect = rect | words[j]["rect"]
-                    j += 1
-                return Highlight(
-                    page=page_num,
-                    rect=(rect.x0, rect.y0, rect.x1, rect.y1),
-                    color="yellow",
-                    message=text,
-                    category="text"
-                )
-    return None
-# ------------------------------
-# Streamlit Interface
-# ------------------------------
-def create_sidebar():
-    """Create the sidebar with upload and analysis options."""
-    st.sidebar.title("PDF Analyzer")
-    uploaded_file = st.sidebar.file_uploader("Upload PDF", type=['pdf'])
-    analysis_options = st.sidebar.expander("Analysis Options", expanded=False)
-    with analysis_options:
-        options = {
-            "check_language": st.checkbox("Language Analysis", value=True),
-            "check_references": st.checkbox("Reference Analysis", value=True),
-            "check_structure": st.checkbox("Structure Analysis", value=True),
-        }
-    return uploaded_file, options
-def display_pdf_viewer(pdf_bytes: bytes, highlights: List[Highlight]):
-    """Display the PDF with highlights using a custom viewer."""
-    # Convert PDF bytes to base64
-    b64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
-    # Create custom HTML for PDF viewer
-    html_content = f"""
         <div style="position: relative; width: 100%; height: 800px;">
-            <iframe src="data:application/pdf;base64,{b64_pdf}"
                     width="100%"
                     height="100%"
                     style="border: none;">
             </iframe>
-            <div id="highlight-container">
-                {generate_highlight_overlays(highlights)}
-            </div>
         </div>
-        <style>
-            .highlight {{
-                position: absolute;
-                opacity: 0.3;
-                pointer-events: all;
-                cursor: pointer;
-                transition: opacity 0.2s;
-            }}
-            .highlight:hover {{
-                opacity: 0.5;
-            }}
-        </style>
     """
-    st.components.v1.html(html_content, height=800)
-def generate_highlight_overlays(highlights: List[Highlight]) -> str:
-    """Generate HTML for highlight overlays."""
-    overlay_html = ""
-    for i, highlight in enumerate(highlights):
-        overlay_html += f"""
-            <div class="highlight"
-                 style="left: {highlight.rect[0]}px;
-                        top: {highlight.rect[1]}px;
-                        width: {highlight.rect[2] - highlight.rect[0]}px;
-                        height: {highlight.rect[3] - highlight.rect[1]}px;
-                        background-color: {highlight.color};"
-                 onclick="showMessage({i})"
-                 title="{highlight.message}">
-            </div>
-        """
-    return overlay_html
-def display_analysis_results(results: AnalysisResult):
-    """Display analysis results in the sidebar."""
     st.sidebar.markdown("## Analysis Results")
-    # Display summary statistics
-    st.sidebar.markdown("### Summary")
-    for key, value in results.summary.items():
-        st.sidebar.metric(key, value)
-    # Display messages grouped by category
-    messages_by_category = {}
-    for message in results.messages:
-        category = message.get("category", "Other")
-        if category not in messages_by_category:
-            messages_by_category[category] = []
-        messages_by_category[category].append(message)
-    for category, messages in messages_by_category.items():
-        with st.sidebar.expander(f"{category} ({len(messages)})"):
-            for msg in messages:
-                st.markdown(f"**{msg['title']}**")
-                st.markdown(msg['description'])
-                st.markdown("---")
 def main():
     st.set_page_config(
@@ -516,45 +188,112 @@ def main():
         layout="wide",
         initial_sidebar_state="expanded"
     )
-    # Create sidebar and get user input
-    uploaded_file, options = create_sidebar()
     if uploaded_file is not None:
-        # Read PDF file
-        pdf_bytes = uploaded_file.read()
-        # Analyze PDF
         try:
-            results, annotated_pdf = analyze_pdf(io.BytesIO(pdf_bytes))
-            # Create two columns
             col1, col2 = st.columns([0.7, 0.3])
             with col1:
                 st.markdown("### Document Preview")
-                # Display PDF with highlights
-                if annotated_pdf:
-                    display_pdf_viewer(annotated_pdf, results.get("highlights", []))
-                else:
-                    display_pdf_viewer(pdf_bytes, [])
             with col2:
-                st.markdown("### Analysis Details")
-                display_analysis_results(results)
         except Exception as e:
-            st.error(f"Error analyzing PDF: {str(e)}")
             st.code(traceback.format_exc())
     else:
-        st.markdown("### Upload a PDF to begin analysis")
         st.markdown("""
-        This tool will analyze your PDF document for:
-        - Language issues and grammar
-        - Reference formatting and consistency
-        - Document structure
-        - Figure and table placement
         """)
 if __name__ == "__main__":
     main()

 from pdfminer.high_level import extract_text
 from pdfminer.layout import LAParams
 import language_tool_python
+from typing import List, Dict, Any, Tuple
 from collections import Counter
 import json
 import traceback
 import tempfile
 import os
 import base64
 # Set JAVA_HOME environment variable
 os.environ['JAVA_HOME'] = '/usr/lib/jvm/java-11-openjdk-amd64'
 # ------------------------------
 # Analysis Functions
 # ------------------------------
     """Checks for the presence of required terms in the text."""
     return {term: term.lower() in full_text.lower() for term in search_terms}
 def check_metadata(full_text: str) -> Dict[str, Any]:
     """Check for metadata elements."""
     return {
         "author_email": bool(re.search(r'\b[\w.-]+?@\w+?\.\w+?\b', full_text)),
         "list_of_authors": bool(re.search(r'Authors?:', full_text, re.IGNORECASE)),
         "keywords_list": bool(re.search(r'Keywords?:', full_text, re.IGNORECASE)),
+        "word_count": len(full_text.split())
     }
 def check_language_issues(full_text: str) -> Dict[str, Any]:
+    """Check for language issues."""
     try:
+        language_tool = language_tool_python.LanguageTool('en-US')
+        matches = language_tool.check(full_text)
+        issues = []
+        for match in matches:
+            issues.append({
+                "message": match.message,
+                "context": match.context,
+                "suggestions": match.replacements[:3] if match.replacements else [],
+                "category": match.category,
+                "rule_id": match.ruleId
+            })
+        return {
+            "total_issues": len(issues),
+            "issues": issues
+        }
     except Exception as e:
+        return {
+            "total_issues": 0,
+            "issues": [],
+            "error": str(e)
+        }
+def analyze_pdf(file) -> Dict[str, Any]:
+    """Main analysis function."""
     try:
+        # Extract text
         full_text = extract_pdf_text(file)
+        # Perform analysis
         results = {
+            "metadata": check_metadata(full_text),
+            "language": {
+                "issues": check_language_issues(full_text)
+            },
+            "structure": {
+                "has_abstract": bool(re.search(r'\bAbstract\b', full_text, re.IGNORECASE)),
+                "has_introduction": bool(re.search(r'\bIntroduction\b', full_text, re.IGNORECASE)),
+                "has_conclusion": bool(re.search(r'\bConclusion\b', full_text, re.IGNORECASE))
+            }
         }
+        return results
     except Exception as e:
+        return {"error": str(e), "traceback": traceback.format_exc()}
 # ------------------------------
+# PDF Display Functions
 # ------------------------------
+def display_pdf(pdf_bytes):
+    """Display PDF in Streamlit."""
+    base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
+    pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="800" type="application/pdf"></iframe>'
+    st.markdown(pdf_display, unsafe_allow_html=True)
+def get_pdf_display_html(pdf_bytes):
+    """Generate HTML for PDF display with highlight container."""
+    base64_pdf = base64.b64encode(pdf_bytes).decode('utf-8')
+    return f"""
         <div style="position: relative; width: 100%; height: 800px;">
+            <iframe src="data:application/pdf;base64,{base64_pdf}"
                     width="100%"
                     height="100%"
                     style="border: none;">
             </iframe>
+            <div id="highlight-container"></div>
         </div>
     """
+# ------------------------------
+# Streamlit Interface Functions
+# ------------------------------
+def render_sidebar():
+    """Render the sidebar with analysis options."""
+    st.sidebar.title("PDF Analysis Options")
+    options = {
+        "check_language": st.sidebar.checkbox("Check Language", value=True),
+        "check_structure": st.sidebar.checkbox("Check Structure", value=True),
+        "check_metadata": st.sidebar.checkbox("Check Metadata", value=True)
+    }
+    return options
+def display_analysis_results(results: Dict[str, Any]):
+    """Display analysis results in an organized manner."""
     st.sidebar.markdown("## Analysis Results")
+    # Display metadata results
+    if "metadata" in results:
+        with st.sidebar.expander("📋 Metadata Analysis", expanded=True):
+            metadata = results["metadata"]
+            st.markdown(f"**Word Count:** {metadata['word_count']}")
+            st.markdown(f"**Has Author List:** {'✅' if metadata['list_of_authors'] else '❌'}")
+            st.markdown(f"**Has Keywords:** {'✅' if metadata['keywords_list'] else '❌'}")
+    # Display language issues
+    if "language" in results and "issues" in results["language"]:
+        with st.sidebar.expander("🔤 Language Issues", expanded=True):
+            issues = results["language"]["issues"]
+            st.markdown(f"**Total Issues Found:** {issues['total_issues']}")
+            if issues['total_issues'] > 0:
+                for idx, issue in enumerate(issues['issues'], 1):
+                    st.markdown(f"""
+                    **Issue {idx}:**
+                    - Type: {issue['category']}
+                    - Message: {issue['message']}
+                    - Context: {issue['context']}
+                    - Suggestions: {', '.join(issue['suggestions']) if issue['suggestions'] else 'None'}
+                    ---
+                    """)
+    # Display structure analysis
+    if "structure" in results:
+        with st.sidebar.expander("🏗️ Structure Analysis", expanded=True):
+            structure = results["structure"]
+            st.markdown(f"**Has Abstract:** {'✅' if structure['has_abstract'] else '❌'}")
+            st.markdown(f"**Has Introduction:** {'✅' if structure['has_introduction'] else '❌'}")
+            st.markdown(f"**Has Conclusion:** {'✅' if structure['has_conclusion'] else '❌'}")
+# ------------------------------
+# Main Application
+# ------------------------------
 def main():
     st.set_page_config(
         layout="wide",
         initial_sidebar_state="expanded"
     )
+    # Main title
+    st.title("PDF Document Analyzer")
+    st.markdown("""
+    Upload a PDF document to analyze its structure, language, and metadata.
+    The analysis results will appear in the sidebar, and any issues found will be highlighted in the document.
+    """)
+    # Get analysis options from sidebar
+    options = render_sidebar()
+    # File uploader
+    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
     if uploaded_file is not None:
         try:
+            # Read PDF file
+            pdf_bytes = uploaded_file.read()
+            # Create two columns for layout
             col1, col2 = st.columns([0.7, 0.3])
             with col1:
                 st.markdown("### Document Preview")
+                # Display PDF
+                display_pdf(pdf_bytes)
             with col2:
+                st.markdown("### Analysis Progress")
+                # Show progress bar while analyzing
+                with st.spinner("Analyzing PDF..."):
+                    # Analyze PDF
+                    results = analyze_pdf(io.BytesIO(pdf_bytes))
+                    if "error" in results:
+                        st.error("Error during analysis:")
+                        st.code(results["error"])
+                        if "traceback" in results:
+                            with st.expander("Show error details"):
+                                st.code(results["traceback"])
+                    else:
+                        st.success("Analysis complete!")
+                        # Display summary metrics
+                        col2_1, col2_2 = st.columns(2)
+                        with col2_1:
+                            st.metric(
+                                "Language Issues",
+                                results.get("language", {}).get("issues", {}).get("total_issues", 0)
+                            )
+                        with col2_2:
+                            st.metric(
+                                "Word Count",
+                                results.get("metadata", {}).get("word_count", 0)
+                            )
+                        # Display detailed results in sidebar
+                        display_analysis_results(results)
         except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
             st.code(traceback.format_exc())
     else:
+        # Show instructions when no file is uploaded
         st.markdown("""
+        ### Instructions
+        1. Use the sidebar to select which aspects of the document you want to analyze
+        2. Upload a PDF file using the file uploader above
+        3. View the analysis results in the sidebar
+        4. Issues found will be highlighted in the document preview
+        ### Features
+        - **Language Analysis**: Checks for grammar, style, and clarity issues
+        - **Structure Analysis**: Verifies the presence of key document sections
+        - **Metadata Analysis**: Examines document metadata and formatting
         """)
+# ------------------------------
+# CSS Styles
+# ------------------------------
+def load_css():
+    """Load custom CSS styles."""
+    st.markdown("""
+        <style>
+        .highlight {
+            background-color: yellow;
+            opacity: 0.3;
+            position: absolute;
+            pointer-events: none;
+        }
+        .stButton>button {
+            width: 100%;
+        }
+        .sidebar .sidebar-content {
+            width: 100%;
+        }
+        </style>
+    """, unsafe_allow_html=True)
+# ------------------------------
+# Run Application
+# ------------------------------
 if __name__ == "__main__":
+    load_css()
     main()