import os import gradio as gr from transformers import AutoModelForCausalLM, pipeline from PIL import Image import pandas as pd import pytesseract # Install missing dependencies without flash_attn os.system("pip install torch transformers pandas pillow pytesseract einops timm") # Load models text_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True) tts_pipeline = pipeline("text-to-speech", model="parler-tts/parler-tts-large-v1") # Function to process PDF files def process_pdf(pdf): text = "" # Assuming each page in the PDF is processed into text for page in pdf.pages: text += pytesseract.image_to_string(page) return text # Function to process CSV files def process_csv(csv): df = pd.read_csv(csv) return df.to_string() # Function to process images def process_image(image): return pytesseract.image_to_string(image) # Main function that handles all file types def handle_files(file): if file.name.endswith('.pdf'): text = process_pdf(file) elif file.name.endswith('.csv'): text = process_csv(file) else: image = Image.open(file) text = process_image(image) # Generate audio from the text audio = tts_pipeline(text) return text, audio["audio"] # Gradio interface demo = gr.Interface( fn=handle_files, inputs=gr.File(type=["pdf", "csv", "image"]), outputs=[gr.Textbox(label="Extracted Text"), gr.Audio(label="Generated Audio")], title="AuditBidden - Public Procurement Auditor" ) if __name__ == "__main__": demo.launch()