File size: 4,169 Bytes
ca69a0e
8b18b7b
 
 
 
 
 
ae7d660
8b18b7b
 
 
 
 
 
fcfc162
8b18b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca69a0e
8b18b7b
 
 
 
 
 
 
 
ca69a0e
8b18b7b
 
 
 
 
 
 
 
ca69a0e
8b18b7b
 
 
 
 
 
 
ca69a0e
8b18b7b
ddb299c
8b18b7b
375547d
8b18b7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2271c1
8b18b7b
 
 
 
ae7d660
8b18b7b
 
 
375547d
8b18b7b
 
 
d2271c1
8b18b7b
 
375547d
8b18b7b
 
 
 
 
 
 
 
 
ae7d660
 
7be0cb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import streamlit as st
from transformers import pipeline, VisionEncoderDecoderModel, ViTImageProcessor
from PIL import Image
import fitz 
import logging
from concurrent.futures import ThreadPoolExecutor
import torch

# Setup logging
def setup_logging():
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
    )

# Load models globally for faster performance
@st.cache_resource
def load_models():
    logging.info("Loading Hugging Face models...")
    # Load image-to-text model from Hugging Face
    processor = ViTImageProcessor.from_pretrained("microsoft/vision-transformation-transformer")
    model = VisionEncoderDecoderModel.from_pretrained("microsoft/vision-transformation-transformer")
    
    # Load translation models
    translator_hi = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
    translator_ur = pipeline("translation", model="Helsinki-NLP/opus-mt-en-ur")
    
    # Summarization model
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
    
    return processor, model, translator_hi, translator_ur, summarizer

# Function to extract text from images using Hugging Face model
def extract_text_from_image(image, processor, model):
    logging.info("Extracting text from image...")
    # Preprocess image
    inputs = processor(images=image, return_tensors="pt")
    # Use the model to generate captions
    out = model.generate(**inputs)
    return processor.decode(out[0], skip_special_tokens=True)

# Function to extract text from PDFs
def extract_text_from_pdf(pdf_file):
    logging.info("Extracting text from PDF...")
    doc = fitz.open(pdf_file)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Function to process text in chunks for better performance
def process_chunks(text, model, chunk_size=500):
    chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
    results = []
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(lambda chunk: model(chunk, max_length=200), chunks))
    return " ".join([result[0]["translation_text"] for result in results])

# Main app logic
def main():
    setup_logging()
    st.title("Lab Report Analyzer")
    st.write("Upload a file (Image, PDF, or Text) to analyze and summarize the lab report in English, Hindi, and Urdu.")
    
    # Load models
    processor, model, translator_hi, translator_ur, summarizer = load_models()
    
    file = st.file_uploader("Upload a file (Image, PDF, or Text):", type=["jpg", "png", "jpeg", "pdf", "txt"])
    if file:
        text = ""
        try:
            if file.type in ["image/jpeg", "image/png", "image/jpg"]:
                image = Image.open(file)
                text = extract_text_from_image(image, processor, model)
            elif file.type == "application/pdf":
                text = extract_text_from_pdf(file)
            elif file.type == "text/plain":
                text = file.read().decode("utf-8")

            if text:
                with st.spinner("Analyzing the report..."):
                    # Generate summary
                    summary = summarizer(text, max_length=130, min_length=30)[0]["summary_text"]

                    # Generate translations
                    hindi_translation = process_chunks(text, translator_hi)
                    urdu_translation = process_chunks(text, translator_ur)

                    # Display results
                    st.subheader("Analysis Summary (English):")
                    st.write(summary)

                    st.subheader("Hindi Translation:")
                    st.write(hindi_translation)

                    st.subheader("Urdu Translation:")
                    st.write(urdu_translation)
            else:
                st.warning("No text could be extracted. Please check the file and try again.")
        except Exception as e:
            logging.error(f"Error processing the file: {e}")
            st.error("An error occurred while processing the file. Please try again.")
    else:
        st.info("Please upload a file to begin.")

if __name__ == "__main__":
    main()