hellodav commited on
Commit
ec854f5
·
verified ·
1 Parent(s): c6382bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -42
app.py CHANGED
@@ -1,56 +1,65 @@
1
- import os
2
  import gradio as gr
3
- from transformers import AutoModelForCausalLM, pipeline
4
- from PIL import Image
5
- import pandas as pd
6
- import pytesseract
 
7
 
8
- # Install missing dependencies without flash_attn
9
- os.system("pip install torch transformers pandas pillow pytesseract einops timm")
 
 
10
 
11
- # Load models
12
- text_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
13
- tts_pipeline = pipeline("text-to-speech", model="parler-tts/parler-tts-large-v1")
 
 
14
 
15
- # Function to process PDF files
16
- def process_pdf(pdf):
17
  text = ""
18
- # Assuming each page in the PDF is processed into text
19
- for page in pdf.pages:
20
- text += pytesseract.image_to_string(page)
21
  return text
22
 
23
- # Function to process CSV files
24
- def process_csv(csv):
25
- df = pd.read_csv(csv)
26
- return df.to_string()
 
27
 
28
- # Function to process images
29
- def process_image(image):
30
- return pytesseract.image_to_string(image)
31
-
32
- # Main function that handles all file types
33
- def handle_files(file):
34
  if file.name.endswith('.pdf'):
35
- text = process_pdf(file)
36
  elif file.name.endswith('.csv'):
37
  text = process_csv(file)
38
  else:
39
- image = Image.open(file)
40
- text = process_image(image)
41
 
42
- # Generate audio from the text
43
- audio = tts_pipeline(text)
 
 
 
 
 
 
 
44
 
45
- return text, audio["audio"]
46
-
47
- # Gradio interface
48
- demo = gr.Interface(
49
- fn=handle_files,
50
- inputs=gr.File(type=["pdf", "csv", "image"]),
51
- outputs=[gr.Textbox(label="Extracted Text"), gr.Audio(label="Generated Audio")],
52
- title="AuditBidden - Public Procurement Auditor"
53
- )
54
-
55
- if __name__ == "__main__":
56
- demo.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ import PyPDF2
5
+ import csv
6
+ import io
7
 
8
+ # Load the model and tokenizer
9
+ model_name = "your_fine_tuned_model_name" # Replace with your actual model name
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name)
12
 
13
+ def process_text(text):
14
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
15
+ with torch.no_grad():
16
+ outputs = model.generate(**inputs, max_length=1000)
17
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
18
 
19
+ def extract_text_from_pdf(file):
20
+ pdf_reader = PyPDF2.PdfFileReader(file)
21
  text = ""
22
+ for page in range(pdf_reader.numPages):
23
+ text += pdf_reader.getPage(page).extractText()
 
24
  return text
25
 
26
+ def process_csv(file):
27
+ content = file.read().decode('utf-8')
28
+ csv_reader = csv.reader(io.StringIO(content))
29
+ rows = list(csv_reader)
30
+ return "\n".join([",".join(row) for row in rows])
31
 
32
+ def analyze_document(file):
 
 
 
 
 
33
  if file.name.endswith('.pdf'):
34
+ text = extract_text_from_pdf(file)
35
  elif file.name.endswith('.csv'):
36
  text = process_csv(file)
37
  else:
38
+ return "Unsupported file format. Please upload a PDF or CSV file."
 
39
 
40
+ prompt = f"Analyze the following procurement document and provide a detailed audit report:\n\n{text}"
41
+ return process_text(prompt)
42
+
43
+ def answer_question(question, context):
44
+ prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
45
+ return process_text(prompt)
46
+
47
+ with gr.Blocks() as demo:
48
+ gr.Markdown("# AuditBidden: AI-Powered Public Procurement Auditor")
49
 
50
+ with gr.Tab("Document Analysis"):
51
+ file_input = gr.File(label="Upload Procurement Document (PDF or CSV)")
52
+ analyze_button = gr.Button("Analyze Document")
53
+ analysis_output = gr.Textbox(label="Audit Report")
54
+
55
+ analyze_button.click(analyze_document, inputs=file_input, outputs=analysis_output)
56
+
57
+ with gr.Tab("Q&A"):
58
+ context_input = gr.Textbox(label="Context (paste relevant procurement information)")
59
+ question_input = gr.Textbox(label="Question")
60
+ answer_button = gr.Button("Get Answer")
61
+ answer_output = gr.Textbox(label="Answer")
62
+
63
+ answer_button.click(answer_question, inputs=[question_input, context_input], outputs=answer_output)
64
+
65
+ demo.launch()