Spaces:

mgbam
/

SmartDocAnalyzer

Sleeping

App Files Files Community

mgbam commited on Jan 9

Commit

03e932b

verified ·

1 Parent(s): 504839f

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -151

app.py CHANGED Viewed

@@ -1,153 +1,201 @@
-import streamlit as st
-import transformers
-import altair as alt
-import pandas as pd
-from difflib import SequenceMatcher
-# ------------------------------
-# Simple Authentication Setup
-# ------------------------------
-# Define a simple password for demonstration purposes.
-PASSWORD = "password123"
-# Initialize authentication state
-if 'authenticated' not in st.session_state:
-    st.session_state['authenticated'] = False
-# Simple password input in the sidebar for authentication
-if not st.session_state['authenticated']:
-    st.sidebar.title("Login")
-    password_input = st.sidebar.text_input("Enter password:", type="password")
-    if st.sidebar.button("Login"):
-        if password_input == PASSWORD:
-            st.session_state['authenticated'] = True
-            st.sidebar.success("Authenticated!")
-        else:
-            st.sidebar.error("Incorrect password. Please try again.")
-    st.stop()  # Stop app execution until authenticated
-st.sidebar.write("Welcome!")
-# ------------------------------
-# Load Models
-# ------------------------------
-@st.cache_resource
-def load_qwen():
-    return transformers.pipeline(
-        "text2text-generation",
-        model="Qwen/Qwen2.5-14B",
-        device_map="auto"
-    )
-@st.cache_resource
-def load_phi():
-    return transformers.pipeline(
-        "text-generation",
-        model="microsoft/phi-4",
-        model_kwargs={"torch_dtype": "auto"},
-        device_map="auto"
-    )
-qwen_pipeline = load_qwen()
-phi_pipeline = load_phi()
-# ------------------------------
-# Utility Functions
-# ------------------------------
-def summarize_document(document_text):
-    prompt = f"Summarize the following document and highlight key insights:\n\n{document_text}"
-    summary = qwen_pipeline(prompt, max_new_tokens=1024)[0]['generated_text']
-    return summary
-def answer_question(summary, question):
-    prompt = f"Based on the following summary:\n\n{summary}\n\nAnswer the question: {question}"
-    answer = phi_pipeline(prompt, max_new_tokens=256)[0]['generated_text']
-    return answer
-def find_similar_chunks(original, output):
-    matcher = SequenceMatcher(None, original, output)
-    segments = []
-    left = 0
-    for _, j, n in matcher.get_matching_blocks():
-        if left < j:
-            segments.append({'text': output[left:j], 'match': False})
-        segments.append({'text': output[j:j+n], 'match': True})
-        left = j+n
-    return segments
-# ------------------------------
-# Streamlit App Layout
-# ------------------------------
-st.title("SmartDoc Analyzer")
-st.markdown("Analyze Financial & Health Documents with AI")
-# Tabs for different functionalities
-tabs = st.tabs(["Document Summarization", "Interactive Q&A", "Visualization & Data Extraction"])
-# -------- Document Summarization Tab --------
-with tabs[0]:
-    st.header("Document Summarization")
-    document_text = st.text_area("Paste Document Text:", height=300)
-    if st.button("Summarize Document"):
-        if document_text:
-            summary = summarize_document(document_text)
-            st.subheader("Summary")
-            st.write(summary)
-            # Save summary in session for use in Q&A tab
-            st.session_state['last_summary'] = summary
-        else:
-            st.warning("Please paste document text to summarize.")
-# -------- Interactive Q&A Tab --------
-with tabs[1]:
-    st.header("Interactive Q&A")
-    default_summary = st.session_state.get('last_summary', '')
-    summary_context = st.text_area("Summary Context:", value=default_summary, height=150)
-    question = st.text_input("Enter your question about the document:")
-    if st.button("Get Answer"):
-        if summary_context and question:
-            answer = answer_question(summary_context, question)
-            st.subheader("Answer")
-            st.write(answer)
-        else:
-            st.warning("Please provide both a summary context and a question.")
-# -------- Visualization & Data Extraction Tab --------
-with tabs[2]:
-    st.header("Visualization & Data Extraction")
-    st.subheader("Visualization Placeholder")
-    st.markdown("An interactive chart can be displayed here using Altair or Plotly.")
-    # Example static Altair chart (replace with dynamic data extraction logic)
-    data = pd.DataFrame({
-        'Year': [2019, 2020, 2021, 2022],
-        'Revenue': [150, 200, 250, 300]
-    })
-    chart = alt.Chart(data).mark_line(point=True).encode(
-        x='Year:O',
-        y='Revenue:Q',
-        tooltip=['Year', 'Revenue']
-    ).interactive()
-    st.altair_chart(chart, use_container_width=True)
-    st.subheader("Data Extraction Placeholder")
-    st.markdown("Implement NLP techniques or model prompts to extract structured data here.")
-    uploaded_file = st.file_uploader("Upload a document file for extraction", type=["pdf", "docx", "txt"])
-    if uploaded_file is not None:
-        st.info("File uploaded successfully. Data extraction logic would process this file.")
-        # Add logic to extract tables, key figures, etc. from the uploaded file.
-# ------------------------------
-# Safety & Compliance Layer (Placeholder)
-# ------------------------------
-st.sidebar.markdown("### Safety & Compliance")
-st.sidebar.info(
-    "This tool provides AI-driven insights. "
-    "Please note that summaries and answers are for informational purposes only and should not be "
-    "considered professional financial or medical advice."
 )
-# ------------------------------
-# End of Application
-# ------------------------------

+import gradio as gr
+import base64
+import os
+import re
+from io import BytesIO
+from PIL import Image
+from huggingface_hub import InferenceClient
+from mistralai import Mistral
+from feifeilib.feifeichat import feifeichat  # Assuming this utility is still relevant or replace with SmartDocAnalyzer logic as needed.
+# Initialize Hugging Face inference clients
+client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
+client.headers["x-use-cache"] = "0"
+api_key = os.getenv("MISTRAL_API_KEY")
+Mistralclient = Mistral(api_key=api_key)
+# Gradio interface setup for SmartDocAnalyzer
+SmartDocAnalyzer = gr.ChatInterface(
+    feifeichat,  # This should be replaced with a suitable function for SmartDocAnalyzer if needed.
+    type="messages",
+    multimodal=True,
+    additional_inputs=[
+        gr.Checkbox(label="Enable Analyzer Mode", value=True),
+        gr.Dropdown(
+            [
+                "meta-llama/Llama-3.3-70B-Instruct",
+                "CohereForAI/c4ai-command-r-plus-08-2024",
+                "Qwen/Qwen2.5-72B-Instruct",
+                "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
+                "NousResearch/Hermes-3-Llama-3.1-8B",
+                "mistralai/Mistral-Nemo-Instruct-2411",
+                "microsoft/phi-4"
+            ],
+            value="mistralai/Mistral-Nemo-Instruct-2411",
+            show_label=False,
+            container=False
+        ),
+        gr.Radio(
+            ["pixtral", "Vision"],
+            value="pixtral",
+            show_label=False,
+            container=False
+        )
+    ],
+    title="SmartDocAnalyzer",
+    description="An advanced document analysis tool powered by AI."
 )
+SmartDocAnalyzer.launch()
+def encode_image(image_path):
+    """
+    Encode the image at the given path to a base64 JPEG.
+    Resizes image height to 512 pixels while maintaining aspect ratio.
+    """
+    try:
+        image = Image.open(image_path).convert("RGB")
+        base_height = 512
+        h_percent = (base_height / float(image.size[1]))
+        w_size = int((float(image.size[0]) * float(h_percent)))
+        image = image.resize((w_size, base_height), Image.LANCZOS)
+        buffered = BytesIO()
+        image.save(buffered, format="JPEG")
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+    except FileNotFoundError:
+        print(f"Error: The file {image_path} was not found.")
+    except Exception as e:
+        print(f"Error: {e}")
+    return None
+def feifeiprompt(feifei_select=True, message_text="", history=""):
+    """
+    Constructs a prompt for the chatbot based on message text and history.
+    Enhancements for SmartDocAnalyzer context can be added here.
+    """
+    input_prompt = []
+    # Special handling for drawing requests
+    if message_text.startswith("画") or message_text.startswith("draw"):
+        feifei_photo = (
+            "You are FeiFei. Background: FeiFei was born in Tokyo and is a natural-born photographer, "
+            "hailing from a family with a long history in photography... [truncated for brevity]"
+        )
+        message_text = message_text.replace("画", "").replace("draw", "")
+        message_text = f"提示词是'{message_text}',根据提示词帮我生成一张高质量照片的一句话英文回复"
+        system_prompt = {"role": "system", "content": feifei_photo}
+        user_input_part = {"role": "user", "content": str(message_text)}
+        return [system_prompt, user_input_part]
+    # Default prompt construction for FeiFei character
+    if feifei_select:
+        feifei = (
+            "[Character Name]: Aifeifei (AI Feifei) [Gender]: Female [Age]: 19 years old ... "
+            "[Identity]: User's virtual girlfriend"
+        )
+        system_prompt = {"role": "system", "content": feifei}
+        user_input_part = {"role": "user", "content": str(message_text)}
+        pattern = re.compile(r"gradio")
+        if history:
+            history = [item for item in history if not pattern.search(str(item["content"]))]
+            input_prompt = [system_prompt] + history + [user_input_part]
+        else:
+            input_prompt = [system_prompt, user_input_part]
+    else:
+        input_prompt = [{"role": "user", "content": str(message_text)}]
+    return input_prompt
+def feifeiimgprompt(message_files, message_text, image_mod):
+    """
+    Handles image-based prompts for either 'Vision' or 'pixtral' modes.
+    """
+    message_file = message_files[0]
+    base64_image = encode_image(message_file)
+    if base64_image is None:
+        return
+    # Vision mode using meta-llama model
+    if image_mod == "Vision":
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": message_text},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
+            ]
+        }]
+        stream = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            messages=messages,
+            max_tokens=500,
+            stream=True
+        )
+        temp = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                temp += chunk.choices[0].delta.content
+                yield temp
+    # Pixtral mode using Mistral model
+    else:
+        model = "pixtral-large-2411"
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": message_text},
+                {"type": "image_url", "image_url": f"data:image/jpeg;base64,{base64_image}"}
+            ]
+        }]
+        partial_message = ""
+        for chunk in Mistralclient.chat.stream(model=model, messages=messages):
+            if chunk.data.choices[0].delta.content is not None:
+                partial_message += chunk.data.choices[0].delta.content
+                yield partial_message
+def feifeichatmod(additional_dropdown, input_prompt):
+    """
+    Chooses the appropriate chat model based on the dropdown selection.
+    """
+    if additional_dropdown == "mistralai/Mistral-Nemo-Instruct-2411":
+        model = "mistral-large-2411"
+        stream_response = Mistralclient.chat.stream(model=model, messages=input_prompt)
+        partial_message = ""
+        for chunk in stream_response:
+            if chunk.data.choices[0].delta.content is not None:
+                partial_message += chunk.data.choices[0].delta.content
+                yield partial_message
+    else:
+        stream = client.chat.completions.create(
+            model=additional_dropdown,
+            messages=input_prompt,
+            temperature=0.5,
+            max_tokens=1024,
+            top_p=0.7,
+            stream=True
+        )
+        temp = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                temp += chunk.choices[0].delta.content
+                yield temp
+def feifeichat(message, history, feifei_select, additional_dropdown, image_mod):
+    """
+    Main chat function that decides between image-based and text-based handling.
+    This function can be further enhanced for SmartDocAnalyzer-specific logic.
+    """
+    message_text = message.get("text", "")
+    message_files = message.get("files", [])
+    if message_files:
+        # Process image input
+        yield from feifeiimgprompt(message_files, message_text, image_mod)
+    else:
+        # Process text input
+        input_prompt = feifeiprompt(feifei_select, message_text, history)
+        yield from feifeichatmod(additional_dropdown, input_prompt)
+# Enhancement Note:
+# For the SmartDocAnalyzer space, consider integrating document parsing,
+# OCR functionalities, semantic analysis of documents, and more advanced
+# error handling as needed. This template serves as a starting point.