import os
import gradio as gr
from transformers import AutoModelForCausalLM, pipeline
from PIL import Image
import pandas as pd
import pytesseract

# Install missing dependencies without flash_attn
os.system("pip install torch transformers pandas pillow pytesseract einops timm")

# Load models
text_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
tts_pipeline = pipeline("text-to-speech", model="parler-tts/parler-tts-large-v1")

# Function to process PDF files
def process_pdf(pdf):
    text = ""
    # Assuming each page in the PDF is processed into text
    for page in pdf.pages:
        text += pytesseract.image_to_string(page)
    return text

# Function to process CSV files
def process_csv(csv):
    df = pd.read_csv(csv)
    return df.to_string()

# Function to process images
def process_image(image):
    return pytesseract.image_to_string(image)

# Main function that handles all file types
def handle_files(file):
    if file.name.endswith('.pdf'):
        text = process_pdf(file)
    elif file.name.endswith('.csv'):
        text = process_csv(file)
    else:
        image = Image.open(file)
        text = process_image(image)
    
    # Generate audio from the text
    audio = tts_pipeline(text)
    
    return text, audio["audio"]

# Gradio interface
demo = gr.Interface(
    fn=handle_files,
    inputs=gr.File(type=["pdf", "csv", "image"]),
    outputs=[gr.Textbox(label="Extracted Text"), gr.Audio(label="Generated Audio")],
    title="AuditBidden - Public Procurement Auditor"
)

if __name__ == "__main__":
    demo.launch()