Spaces:

Takk8IS
/

AuditBidden

Runtime error

App Files Files Community

hellodav commited on Aug 26, 2024

Commit

ec854f5

verified ·

1 Parent(s): c6382bb

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -42

app.py CHANGED Viewed

@@ -1,56 +1,65 @@
-import os
 import gradio as gr
-from transformers import AutoModelForCausalLM, pipeline
-from PIL import Image
-import pandas as pd
-import pytesseract
-# Install missing dependencies without flash_attn
-os.system("pip install torch transformers pandas pillow pytesseract einops timm")
-# Load models
-text_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
-tts_pipeline = pipeline("text-to-speech", model="parler-tts/parler-tts-large-v1")
-# Function to process PDF files
-def process_pdf(pdf):
     text = ""
-    # Assuming each page in the PDF is processed into text
-    for page in pdf.pages:
-        text += pytesseract.image_to_string(page)
     return text
-# Function to process CSV files
-def process_csv(csv):
-    df = pd.read_csv(csv)
-    return df.to_string()
-# Function to process images
-def process_image(image):
-    return pytesseract.image_to_string(image)
-# Main function that handles all file types
-def handle_files(file):
     if file.name.endswith('.pdf'):
-        text = process_pdf(file)
     elif file.name.endswith('.csv'):
         text = process_csv(file)
     else:
-        image = Image.open(file)
-        text = process_image(image)
-    # Generate audio from the text
-    audio = tts_pipeline(text)
-    return text, audio["audio"]
-# Gradio interface
-demo = gr.Interface(
-    fn=handle_files,
-    inputs=gr.File(type=["pdf", "csv", "image"]),
-    outputs=[gr.Textbox(label="Extracted Text"), gr.Audio(label="Generated Audio")],
-    title="AuditBidden - Public Procurement Auditor"
-)
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import PyPDF2
+import csv
+import io
+# Load the model and tokenizer
+model_name = "your_fine_tuned_model_name"  # Replace with your actual model name
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name)
+def process_text(text):
+    inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=1000)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+def extract_text_from_pdf(file):
+    pdf_reader = PyPDF2.PdfFileReader(file)
     text = ""
+    for page in range(pdf_reader.numPages):
+        text += pdf_reader.getPage(page).extractText()
     return text
+def process_csv(file):
+    content = file.read().decode('utf-8')
+    csv_reader = csv.reader(io.StringIO(content))
+    rows = list(csv_reader)
+    return "\n".join([",".join(row) for row in rows])
+def analyze_document(file):
     if file.name.endswith('.pdf'):
+        text = extract_text_from_pdf(file)
     elif file.name.endswith('.csv'):
         text = process_csv(file)
     else:
+        return "Unsupported file format. Please upload a PDF or CSV file."
+    prompt = f"Analyze the following procurement document and provide a detailed audit report:\n\n{text}"
+    return process_text(prompt)
+def answer_question(question, context):
+    prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
+    return process_text(prompt)
+with gr.Blocks() as demo:
+    gr.Markdown("# AuditBidden: AI-Powered Public Procurement Auditor")
+    with gr.Tab("Document Analysis"):
+        file_input = gr.File(label="Upload Procurement Document (PDF or CSV)")
+        analyze_button = gr.Button("Analyze Document")
+        analysis_output = gr.Textbox(label="Audit Report")
+        analyze_button.click(analyze_document, inputs=file_input, outputs=analysis_output)
+    with gr.Tab("Q&A"):
+        context_input = gr.Textbox(label="Context (paste relevant procurement information)")
+        question_input = gr.Textbox(label="Question")
+        answer_button = gr.Button("Get Answer")
+        answer_output = gr.Textbox(label="Answer")
+        answer_button.click(answer_question, inputs=[question_input, context_input], outputs=answer_output)
+demo.launch()