import base64 import os import re from io import BytesIO from pathlib import Path import gradio as gr import pandas as pd from langchain.schema.output_parser import OutputParserException from PIL import Image import categories from categories import Category from main import process_image, process_pdf HF_TOKEN = os.getenv("HF_TOKEN") PDF_IFRAME = """
""" hf_writer_normal = gr.HuggingFaceDatasetSaver( HF_TOKEN, "automatic-reimbursement-tool-demo", separate_dirs=False ) hf_writer_incorrect = gr.HuggingFaceDatasetSaver( HF_TOKEN, "automatic-reimbursement-tool-demo-incorrect", separate_dirs=False ) # with open("examples/example1.pdf", "rb") as pdf_file: # base64_pdf = base64.b64encode(pdf_file.read()) # example_paths = [] # current_file_path = None # def ignore_examples(function): # def new_function(*args, **kwargs): # global example_paths, current_file_path # if current_file_path not in example_paths: # return function(*args, **kwargs) def display_file(input_files): global current_file_paths # Initialize the list of current file paths current_file_paths = [file.name for file in input_files] if not input_files: return gr.HTML.update(visible=False), gr.Image.update(visible=False) # Check if there's any PDF file among the uploaded files pdf_base64 = None for input_file in input_files: if input_file.name.endswith(".pdf"): with open(input_file.name, "rb") as pdf_file: pdf_base64 = base64.b64encode(pdf_file.read()).decode() break # Assuming only one PDF is present if pdf_base64: return gr.HTML.update(PDF_IFRAME.format(pdf_base64), visible=True), gr.Image.update(visible=False) else: # You can choose to display the first image in the list or handle multiple images differently image = Image.open(input_files[0].name) return gr.HTML.update(visible=False), gr.Image.update(image, visible=True) def show_intermediate_outputs(show_intermediate): if show_intermediate: return gr.Accordion.update(visible=True) else: return gr.Accordion.update(visible=False) def show_share_contact(share_result): return gr.Textbox.update(visible=share_result) def clear_inputs(): return gr.File.update(value=None) def clear_outputs(input_file): if input_file: return None, None, None, None def extract_text(input_file): """Takes the input file and updates the extracted text""" if not input_file: gr.Error("Please upload a file to continue!") return gr.Textbox.update() # Send change to preprocessed image or to extracted text if input_file.name.endswith(".pdf"): text = process_pdf(Path(input_file.name), extract_only=True) else: text = process_image(Path(input_file.name), extract_only=True) return text def categorize_text(text): """Takes the extracted text and updates the category""" category = categories.categorize_text(text) return category def query(category, text): """Takes the extracted text and category and updates the chatbot in two steps: 1. Construct a prompt 2. Generate a response """ #category = Category[category] chain = categories.category_modules[category].chain formatted_prompt = chain.prompt.format_prompt( text=text, format_instructions=chain.output_parser.get_format_instructions(), ) question = f"" if len(formatted_prompt.messages) > 1: question += f"**System:**\n{formatted_prompt.messages[0].content}" question += f"\n\n**Human:**\n{formatted_prompt.messages[-1].content}" yield gr.Chatbot.update([[question, "Generating..."]]) result = chain.generate( input_list=[ { "text": text, "format_instructions": chain.output_parser.get_format_instructions(), } ] ) answer = result.generations[0][0].text yield gr.Chatbot.update([[question, answer]]) PARSING_REGEXP = r"\*\*System:\*\*\n([\s\S]+)\n\n\*\*Human:\*\*\n([\s\S]+)" def parse(category, chatbot): """Takes the chatbot prompt and response and updates the extracted information""" global PARSING_REGEXP chatbot_responses = [] for response in chatbot: chatbot_responses.append(response[1]) if not chatbot_responses: # Handle the case when there are no chatbot responses return {"status": "No responses available"} answer = chatbot_responses[-1] # try: # answer = next(chatbot)[1] # except StopIteration: # answer = "" if category not in Category.__members__: # Handle the case when an invalid category is provided answer="test" #category = Category[category] chain = categories.category_modules[category].chain yield {"status": "Parsing response..."} try: information = chain.output_parser.parse(answer) information = information.json() if information else {} except OutputParserException as e: information = { "details": str(e), "output": e.llm_output, } yield information def activate_flags(): return gr.Button.update(interactive=True), gr.Button.update(interactive=True) def deactivate_flags(): return gr.Button.update(interactive=False), gr.Button.update(interactive=False) def flag_if_shared(flag_method): def proxy(share_result, request: gr.Request, *args, **kwargs): if share_result: return flag_method(request, *args, **kwargs) return proxy def process_and_output_files(input_files): output_data = [] for file in input_files: # Extract and categorize text for each file text = extract_text(file) category = categorize_text(text) chatbot_response = query(category, text) # Convert the generator to a list #parsed_info = parse(category, chatbot_response) # Append the relevant data for this file to the output_data list output_data.append({ "File Name": file.name, "Extracted Text": text, "Category": category, "Chatbot Response": chatbot_response # Access the first element as a list #"Parsed Information": parsed_info, }) return output_data with gr.Blocks(title="Automatic Reimbursement Tool Demo") as page: gr.Markdown("