import streamlit as st from PIL import Image from pytesseract import pytesseract import PyPDF2 import enum import os # Install Tesseract at runtime if not os.path.exists("/usr/bin/tesseract"): os.system("apt-get update && apt-get install -y tesseract-ocr libtesseract-dev") import re from collections import defaultdict import folium from streamlit_folium import st_folium from geopy.geocoders import Nominatim from geopy.exc import GeocoderTimedOut import wikipedia from transformers import pipeline from openai import OpenAI # NVIDIA OpenAI API Setup client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key="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" # Replace with actual API key ) # Load Named Entity Recognition (NER) Model nlp = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") st.set_page_config(page_title="OCR & Historical Analysis", page_icon="πŸ“œ", layout="wide") # Custom Styling def style_text(text): return f"""
{text}
""" def find_related_documents(query): try: search_results = wikipedia.search(query, results=5) links = [wikipedia.page(result).url for result in search_results] return links except Exception as e: return [f"Error retrieving related documents: {str(e)}"] def geocode_location(location): geolocator = Nominatim(user_agent="streamlit_app") try: loc = geolocator.geocode(location, timeout=10) return (loc.latitude, loc.longitude) if loc else None except GeocoderTimedOut: return None def generate_historical_context_nvidia(text): """Use NVIDIA OpenAI API to generate a structured, summarized historical context.""" prompt_analysis = f""" Analyze the following text and provide a historical context. Identify: - Key historical events - Significant figures involved - The broader historical significance Text: {text} Provide a detailed response. """ prompt_summary = """ Summarize the historical context provided above in a concise and structured format: - Limit to 5 bullet points - Each bullet point should be under 100 words - Avoid unnecessary explanations or preambleβ€”return only the summary """ try: # Step 1: Generate Detailed Historical Context completion = client.chat.completions.create( model="deepseek-ai/deepseek-r1", messages=[ {"role": "system", "content": "You are a historian providing detailed historical insights."}, {"role": "user", "content": prompt_analysis} ], temperature=0.4, top_p=0.9, max_tokens=4096, stream=False ) detailed_response = completion.choices[0].message.content.strip() # Step 2: Summarize the Historical Context **without Monologue** summary_completion = client.chat.completions.create( model="deepseek-ai/deepseek-r1", messages=[ {"role": "system", "content": "You are an expert summarizer."}, {"role": "user", "content": f"{detailed_response}\n\n{prompt_summary}"} ], temperature=0.4, top_p=0.9, max_tokens=2048, stream=False ) # Extract only the structured summary summary_response = summary_completion.choices[0].message.content.strip() # Remove AI-generated explanations or redundant preamble clean_summary = re.sub(r"^.*?\n\n", "", summary_response, flags=re.DOTALL) return clean_summary if clean_summary else "No historical context found." except Exception as e: return f"Error retrieving AI-generated historical context: {str(e)}" class OS(enum.Enum): Mac = 0 Windows = 1 class Languages(enum.Enum): English = "eng" Filipino = "fil" Spanish = "spa" class ImageReader: def __init__(self, os): if os == OS.Windows: pytesseract.tesseract_cmd = '/usr/bin/tesseract' def extract_text(self, image: Image, lang: Languages): extracted_text = pytesseract.image_to_string(image, lang=lang.value) return ' '.join(extracted_text.split()) def extract_text_from_pdf(self, pdf_file, lang: Languages): pdf_reader = PyPDF2.PdfReader(pdf_file) text = "".join(page.extract_text() or "" for page in pdf_reader.pages) return text def extract_key_details(self, text): details = {"dates": set(), "names": set(), "locations": set()} date_pattern = r'\b(?:\d{1,2}[/\-]\d{1,2}[/\-]\d{2,4}|\d{4})\b' details['dates'] = set(re.findall(date_pattern, text)) entities = nlp(text) for entity in entities: if "PER" in entity['entity']: details['names'].add(entity['word']) elif "LOC" in entity['entity']: details['locations'].add(entity['word']) return details # UI Layout st.title("πŸ“œ OCR & Historical Context Analyzer") st.markdown("Extract text from images and PDFs, analyze named entities, and retrieve historical context.") col1, col2 = st.columns([1, 2]) with col1: selected_os = st.selectbox("πŸ–₯️ Select your OS", [OS.Windows, OS.Mac], format_func=lambda x: x.name) selected_lang = st.selectbox("🌍 Select language", list(Languages), format_func=lambda x: x.name) uploaded_file = st.file_uploader("πŸ“‚ Upload an image or PDF", type=["png", "jpg", "jpeg", "pdf"]) if uploaded_file: ir = ImageReader(selected_os) extracted_text = "" if uploaded_file.type in ["image/png", "image/jpeg"]: image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) extracted_text = ir.extract_text(image, selected_lang) else: extracted_text = ir.extract_text_from_pdf(uploaded_file, selected_lang) st.markdown("### πŸ“ Extracted Text:") st.markdown(style_text(extracted_text), unsafe_allow_html=True) key_details = ir.extract_key_details(extracted_text) st.markdown("### πŸ” Extracted Key Details") st.write(f"**πŸ“… Dates:** {', '.join(key_details['dates']) if key_details['dates'] else 'None found'}") st.write(f"**πŸ‘€ Names:** {', '.join(key_details['names']) if key_details['names'] else 'None found'}") st.write(f"**πŸ“ Locations:** {', '.join(key_details['locations']) if key_details['locations'] else 'None found'}") combined_terms = ' '.join(key_details['dates'].union(key_details['locations']).union(key_details['names'])) historical_context = generate_historical_context_nvidia(combined_terms) st.markdown("### πŸ›οΈ Historical Context") st.markdown(style_text(historical_context), unsafe_allow_html=True) st.markdown("### 🌐 Search the Web") search_query = st.text_input("Enter a keyword or phrase:") if search_query: search_results = generate_historical_context_nvidia(search_query) st.markdown(style_text(search_results), unsafe_allow_html=True) related_docs = find_related_documents(combined_terms) st.markdown("### πŸ“š Related Historical Documents") for link in related_docs: st.markdown(f"[πŸ”— {link}]({link})") st.markdown("### πŸ—ΊοΈ Map of Key Locations") map_center = [10.0, 10.0] map_obj = folium.Map(location=map_center, zoom_start=2) for loc in key_details['locations']: coords = geocode_location(loc) if coords: folium.Marker(coords, popup=loc).add_to(map_obj) st_folium(map_obj, width=700, height=500)