Spaces:

ambrosfitz
/

history_map

Sleeping

App Files Files Community

ambrosfitz commited on Nov 8, 2024

Commit

1ef8711

verified ·

1 Parent(s): 43524e1

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -104

app.py CHANGED Viewed

@@ -1,156 +1,149 @@
 import gradio as gr
 import networkx as nx
 import matplotlib.pyplot as plt
 import spacy
-import pandas as pd
-import numpy as np
 from pathlib import Path
-# Load SpaCy model
 nlp = spacy.load("en_core_web_sm")
-# Categories and their colors
 CATEGORIES = {
-    "Main Themes": "#004d99",
-    "Events": "#006400",
-    "People": "#8b4513",
-    "Laws/Policies": "#4b0082",
-    "Concepts": "#800000"
 }
-def load_historical_data():
-    """Load and process the Unit 5 text data."""
     try:
         with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
-            content = f.read()
-        return content
     except FileNotFoundError:
-        return "Historical data file not found."
-def extract_entities(text):
-    """Extract named entities and important terms from text."""
-    doc = nlp(text)
-    entities = {}
-    # Extract named entities
-    for ent in doc.ents:
-        if ent.label_ in ["PERSON", "EVENT", "DATE", "LAW", "ORG"]:
-            if ent.text not in entities:
-                entities[ent.text] = {
-                    "type": ent.label_,
-                    "count": 1,
-                    "context": []
-                }
-            else:
-                entities[ent.text]["count"] += 1
-    return entities
-def find_related_terms(term, text, window_size=100):
-    """Find terms that appear near the search term."""
-    term = term.lower()
-    text = text.lower()
-    related = {}
-    # Find all occurrences of the term
-    index = text.find(term)
-    while index != -1:
-        # Get surrounding context
-        start = max(0, index - window_size)
-        end = min(len(text), index + len(term) + window_size)
-        context = text[start:end]
-        # Process context to find other entities
-        doc = nlp(context)
-        for ent in doc.ents:
-            if ent.text.lower() != term:
-                if ent.text not in related:
-                    related[ent.text] = {
-                        "type": ent.label_,
-                        "count": 1,
-                        "relevance": 1.0
-                    }
-                else:
-                    related[ent.text]["count"] += 1
-                    related[ent.text]["relevance"] += 0.5
-        index = text.find(term, index + 1)
-    return related
 def generate_context_map(term):
     """Generate a network visualization for the given term."""
-    if not term.strip():
         return None
-    # Load historical data
-    content = load_historical_data()
-    if content == "Historical data file not found.":
         return None
-    # Create network graph
-    G = nx.Graph()
-    # Find related terms
-    related_items = find_related_terms(term, content)
-    # Add central node
-    G.add_node(term, category="Main Themes")
-    # Add related nodes (limit to top 10 by relevance)
-    sorted_items = sorted(related_items.items(),
-                         key=lambda x: x[1]["relevance"],
-                         reverse=True)[:10]
-    for item_name, item_data in sorted_items:
-        G.add_node(item_name, category=item_data["type"])
-        G.add_edge(term, item_name,
-                  weight=item_data["relevance"],
-                  length=2.0/item_data["relevance"])
     # Create visualization
     plt.figure(figsize=(12, 12))
     plt.clf()
-    # Set up the layout
-    pos = nx.spring_layout(G, k=1, iterations=50)
-    # Draw nodes
     for category, color in CATEGORIES.items():
-        nodes = [node for node, attr in G.nodes(data=True)
-                if attr.get("category", "") == category]
-        nx.draw_networkx_nodes(G, pos, nodelist=nodes,
-                             node_color=color,
-                             node_size=2000)
     # Draw edges
-    nx.draw_networkx_edges(G, pos, edge_color='white',
-                          width=1, alpha=0.5)
     # Add labels
-    labels = {node: node for node in G.nodes()}
-    nx.draw_networkx_labels(G, pos, labels, font_size=8,
-                          font_color='white')
-    # Set dark background
-    plt.gca().set_facecolor('#1a1a1a')
-    plt.gcf().set_facecolor('#1a1a1a')
     # Add title
     plt.title(f"Historical Context Map for '{term}'",
-              color='white', pad=20)
     return plt.gcf()
 # Create Gradio interface
 iface = gr.Interface(
     fn=generate_context_map,
-    inputs=gr.Textbox(label="Enter a historical term from Unit 5",
-                     placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"),
     outputs=gr.Plot(),
     title="Historical Context Mapper",
-    description="This tool generates a network visualization showing the historical context and connections for terms from Unit 5 (1844-1877).",
-    theme="darkhuggingface",
     examples=[
         ["Civil War"],
         ["Abraham Lincoln"],

 import gradio as gr
 import networkx as nx
 import matplotlib.pyplot as plt
+from transformers import pipeline
 import spacy
+import torch
 from pathlib import Path
+# Load models
 nlp = spacy.load("en_core_web_sm")
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+# Define categories and their colors
 CATEGORIES = {
+    "Main Theme": "#004d99",
+    "Event": "#006400",
+    "Person": "#8b4513",
+    "Law": "#4b0082",
+    "Concept": "#800000"
 }
+def load_content():
+    """Load the Unit 5 content."""
     try:
         with open("Unit5_OCR.txt", "r", encoding="utf-8") as f:
+            return f.read()
     except FileNotFoundError:
+        return None
+def find_context(term, text, window_size=500):
+    """Find the relevant context around a term."""
+    term_lower = term.lower()
+    text_lower = text.lower()
+    # Find the term in text
+    index = text_lower.find(term_lower)
+    if index == -1:
+        return ""
+    # Get surrounding context
+    start = max(0, index - window_size)
+    end = min(len(text), index + len(term) + window_size)
+    return text[start:end]
+def categorize_term(term, doc):
+    """Categorize a term based on NER and custom rules."""
+    for ent in doc.ents:
+        if term.lower() in ent.text.lower():
+            if ent.label_ == "PERSON":
+                return "Person"
+            elif ent.label_ == "EVENT" or ent.label_ == "DATE":
+                return "Event"
+            elif ent.label_ == "LAW" or ent.label_ == "ORG":
+                return "Law"
+    # Custom categorization for common terms
+    themes = ["manifest destiny", "reconstruction", "civil war", "slavery"]
+    if term.lower() in themes:
+        return "Main Theme"
+    return "Concept"
 def generate_context_map(term):
     """Generate a network visualization for the given term."""
+    if not term or not term.strip():
         return None
+    # Load content
+    content = load_content()
+    if not content:
         return None
+    # Get context
+    context = find_context(term, content)
+    if not context:
+        return None
+    # Process context
+    doc = nlp(context)
+    # Create graph
+    G = nx.Graph()
+    # Add main term
+    term_category = categorize_term(term, doc)
+    G.add_node(term, category=term_category)
+    # Find related entities
+    related_entities = []
+    for ent in doc.ents:
+        if ent.text.lower() != term.lower():
+            related_entities.append({
+                'text': ent.text,
+                'category': categorize_term(ent.text, doc)
+            })
+    # Add top related entities (limit to 8)
+    for entity in related_entities[:8]:
+        G.add_node(entity['text'], category=entity['category'])
+        G.add_edge(term, entity['text'])
     # Create visualization
     plt.figure(figsize=(12, 12))
     plt.clf()
+    # Set dark background
+    plt.gca().set_facecolor('#1a1a1a')
+    plt.gcf().set_facecolor('#1a1a1a')
+    # Create layout
+    pos = nx.spring_layout(G, k=1)
+    # Draw nodes for each category
     for category, color in CATEGORIES.items():
+        node_list = [node for node, attr in G.nodes(data=True)
+                    if attr.get('category') == category]
+        if node_list:
+            nx.draw_networkx_nodes(G, pos,
+                                 nodelist=node_list,
+                                 node_color=color,
+                                 node_size=2000)
     # Draw edges
+    nx.draw_networkx_edges(G, pos, edge_color='white', width=1)
     # Add labels
+    nx.draw_networkx_labels(G, pos, font_size=8, font_color='white')
     # Add title
     plt.title(f"Historical Context Map for '{term}'",
+              color='white',
+              pad=20)
     return plt.gcf()
 # Create Gradio interface
 iface = gr.Interface(
     fn=generate_context_map,
+    inputs=gr.Textbox(
+        label="Enter a historical term from Unit 5",
+        placeholder="e.g., Civil War, Abraham Lincoln, Reconstruction"
+    ),
     outputs=gr.Plot(),
     title="Historical Context Mapper",
+    description="Enter a term from Unit 5 (1844-1877) to see its historical context and connections.",
     examples=[
         ["Civil War"],
         ["Abraham Lincoln"],