import streamlit as st
from PIL import Image
from pytesseract import pytesseract
import PyPDF2
import enum
import os
# Install Tesseract at runtime
if not os.path.exists("/usr/bin/tesseract"):
os.system("apt-get update && apt-get install -y tesseract-ocr libtesseract-dev")
import re
from collections import defaultdict
import folium
from streamlit_folium import st_folium
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import wikipedia
from transformers import pipeline
from openai import OpenAI
# NVIDIA OpenAI API Setup
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key="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" # Replace with actual API key
)
# Load Named Entity Recognition (NER) Model
nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
st.set_page_config(page_title="OCR & Historical Analysis", page_icon="π", layout="wide")
# Custom Styling
def style_text(text):
return f"""
{text}
"""
def find_related_documents(query):
try:
search_results = wikipedia.search(query, results=5)
links = [wikipedia.page(result).url for result in search_results]
return links
except Exception as e:
return [f"Error retrieving related documents: {str(e)}"]
def geocode_location(location):
geolocator = Nominatim(user_agent="streamlit_app")
try:
loc = geolocator.geocode(location, timeout=10)
return (loc.latitude, loc.longitude) if loc else None
except GeocoderTimedOut:
return None
def generate_historical_context_nvidia(text):
"""Use NVIDIA OpenAI API to generate a structured, summarized historical context."""
prompt_analysis = f"""
Analyze the following text and provide a historical context. Identify:
- Key historical events
- Significant figures involved
- The broader historical significance
Text: {text}
Provide a detailed response.
"""
prompt_summary = """
Summarize the historical context provided above in a concise and structured format:
- Limit to 5 bullet points
- Each bullet point should be under 100 words
- Avoid unnecessary explanations or preambleβreturn only the summary
"""
try:
# Step 1: Generate Detailed Historical Context
completion = client.chat.completions.create(
model="deepseek-ai/deepseek-r1",
messages=[
{"role": "system", "content": "You are a historian providing detailed historical insights."},
{"role": "user", "content": prompt_analysis}
],
temperature=0.4,
top_p=0.9,
max_tokens=4096,
stream=False
)
detailed_response = completion.choices[0].message.content.strip()
# Step 2: Summarize the Historical Context **without Monologue**
summary_completion = client.chat.completions.create(
model="deepseek-ai/deepseek-r1",
messages=[
{"role": "system", "content": "You are an expert summarizer."},
{"role": "user", "content": f"{detailed_response}\n\n{prompt_summary}"}
],
temperature=0.4,
top_p=0.9,
max_tokens=2048,
stream=False
)
# Extract only the structured summary
summary_response = summary_completion.choices[0].message.content.strip()
# Remove AI-generated explanations or redundant preamble
clean_summary = re.sub(r"^.*?\n\n", "", summary_response, flags=re.DOTALL)
return clean_summary if clean_summary else "No historical context found."
except Exception as e:
return f"Error retrieving AI-generated historical context: {str(e)}"
class OS(enum.Enum):
Mac = 0
Windows = 1
class Languages(enum.Enum):
English = "eng"
Filipino = "fil"
Spanish = "spa"
class ImageReader:
def __init__(self, os):
if os == OS.Windows:
pytesseract.tesseract_cmd = '/usr/bin/tesseract'
def extract_text(self, image: Image, lang: Languages):
extracted_text = pytesseract.image_to_string(image, lang=lang.value)
return ' '.join(extracted_text.split())
def extract_text_from_pdf(self, pdf_file, lang: Languages):
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = "".join(page.extract_text() or "" for page in pdf_reader.pages)
return text
def extract_key_details(self, text):
details = {"dates": set(), "names": set(), "locations": set()}
date_pattern = r'\b(?:\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4}|\d{4})\b'
details['dates'] = set(re.findall(date_pattern, text))
entities = nlp(text)
for entity in entities:
if "PER" in entity['entity']:
details['names'].add(entity['word'])
elif "LOC" in entity['entity']:
details['locations'].add(entity['word'])
return details
# UI Layout
st.title("π OCR & Historical Context Analyzer")
st.markdown("Extract text from images and PDFs, analyze named entities, and retrieve historical context.")
col1, col2 = st.columns([1, 2])
with col1:
selected_os = st.selectbox("π₯οΈ Select your OS", [OS.Windows, OS.Mac], format_func=lambda x: x.name)
selected_lang = st.selectbox("π Select language", list(Languages), format_func=lambda x: x.name)
uploaded_file = st.file_uploader("π Upload an image or PDF", type=["png", "jpg", "jpeg", "pdf"])
if uploaded_file:
ir = ImageReader(selected_os)
extracted_text = ""
if uploaded_file.type in ["image/png", "image/jpeg"]:
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_column_width=True)
extracted_text = ir.extract_text(image, selected_lang)
else:
extracted_text = ir.extract_text_from_pdf(uploaded_file, selected_lang)
st.markdown("### π Extracted Text:")
st.markdown(style_text(extracted_text), unsafe_allow_html=True)
key_details = ir.extract_key_details(extracted_text)
st.markdown("### π Extracted Key Details")
st.write(f"**π
Dates:** {', '.join(key_details['dates']) if key_details['dates'] else 'None found'}")
st.write(f"**π€ Names:** {', '.join(key_details['names']) if key_details['names'] else 'None found'}")
st.write(f"**π Locations:** {', '.join(key_details['locations']) if key_details['locations'] else 'None found'}")
combined_terms = ' '.join(key_details['dates'].union(key_details['locations']).union(key_details['names']))
historical_context = generate_historical_context_nvidia(combined_terms)
st.markdown("### ποΈ Historical Context")
st.markdown(style_text(historical_context), unsafe_allow_html=True)
st.markdown("### π Search the Web")
search_query = st.text_input("Enter a keyword or phrase:")
if search_query:
search_results = generate_historical_context_nvidia(search_query)
st.markdown(style_text(search_results), unsafe_allow_html=True)
related_docs = find_related_documents(combined_terms)
st.markdown("### π Related Historical Documents")
for link in related_docs:
st.markdown(f"[π {link}]({link})")
st.markdown("### πΊοΈ Map of Key Locations")
map_center = [10.0, 10.0]
map_obj = folium.Map(location=map_center, zoom_start=2)
for loc in key_details['locations']:
coords = geocode_location(loc)
if coords:
folium.Marker(coords, popup=loc).add_to(map_obj)
st_folium(map_obj, width=700, height=500)