Spaces:

wvsuaidev
/

vqa_authorship_verification

Sleeping

App Files Files Community

louiecerv commited on Feb 14

Commit

bbc9651

1 Parent(s): e10e233

sync with remote

Browse files

Files changed (2) hide show

app.py +144 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import streamlit as st
+import os
+import google.generativeai as genai
+from huggingface_hub import hf_hub_download
+import base64
+MODEL_ID = "gemini-2.0-flash-exp"  # Keep the model ID as is
+try:
+    api_key = os.getenv("GEMINI_API_KEY")
+    model_id = MODEL_ID
+    genai.configure(api_key=api_key)
+except Exception as e:
+    st.error(f"Error: {e}")
+    st.stop
+model = genai.GenerativeModel(MODEL_ID)
+chat = model.start_chat()
+def download_pdf():
+    """
+    Downloads the PDF file from the Hugging Face Hub using the correct repo path and filename.
+    """
+    try:
+        hf_token = os.getenv("HF_TOKEN")
+        repo_id = "wvsuaidev/authorship_verfication_dataset"  # Corrected dataset repo path
+        filename = "Authorship_Verification_Linguistic_Divergence.pdf"
+        filepath = hf_hub_download(repo_id=repo_id, filename=filename, token=hf_token, repo_type="dataset")
+        return filepath
+    except Exception as e:
+        st.error(f"Failed to download PDF from Hugging Face Hub: {e}")
+        st.stop()  # Stop if the download fails
+# Initialize conversation history in Streamlit session state
+if "conversation_history" not in st.session_state:
+    st.session_state.conversation_history = []
+if "uploaded_file_part" not in st.session_state:  # Store the file *part*
+    st.session_state.uploaded_file_part = None
+if "uploaded_pdf_path" not in st.session_state:
+    st.session_state.uploaded_pdf_path = download_pdf()
+def multimodal_prompt(pdf_path, text_prompt):
+    """
+    Sends a multimodal prompt to Gemini, handling file uploads efficiently.
+    Args:
+        pdf_path: The path to the PDF file.
+        text_prompt: The text prompt for the model.
+    Returns:
+        The model's response as a string, or an error message.
+    """
+    try:
+        if st.session_state.uploaded_file_part is None:  # First time, upload
+            pdf_part = genai.upload_file(pdf_path, mime_type="application/pdf")
+            st.session_state.uploaded_file_part = pdf_part
+            prompt = [text_prompt, pdf_part] # First turn includes the actual file
+        else: # Subsequent turns, reference the file
+            prompt = [text_prompt, st.session_state.uploaded_file_part] # Subsequent turns include the file reference
+        response = chat.send_message(prompt)
+        # Update conversation history
+        st.session_state.conversation_history.append({"role": "user", "content": text_prompt, "has_pdf": True})
+        st.session_state.conversation_history.append({"role": "assistant", "content": response.text})
+        return response.text
+    except Exception as e:
+        return f"An error occurred: {e}"
+def display_download_button(file_path, file_name):
+    try:
+        with open(file_path, "rb") as f:
+            file_bytes = f.read()
+        b64 = base64.b64encode(file_bytes).decode()
+        href = f'<a href="data:application/pdf;base64,{b64}" download="{file_name}">Download the source document (PDF)</a>'
+        st.markdown(href, unsafe_allow_html=True)
+    except FileNotFoundError:
+        st.error("File not found for download.")
+    except Exception as e:
+        st.error(f"Error during download: {e}")
+# --- Main Page ---
+st.title("📚 Authorship Attribution and Verification")
+about = """
+**How to use this App**
+This app leverages Gemini 2.0 to provide insights on the provided document.
+Select a question from the dropdown menu or enter your own question to get
+Gemini's generated response based on the provided document.
+"""
+with st.expander("How to use this App"):
+    st.markdown(about)
+# --- Q and A Tab ---
+st.header("Questions and Answers")
+# Generate 5 questions based on the selected role
+questions = [
+    "What are the key differences between Authorship Attribution (AA) and Authorship Verification (AV)?",
+    "What is the 'non-comparability problem' in authorship verification, and why is it significant?",
+    "How does the proposed DV-Distance metric address the non-comparability problem?",
+    "Explain the concept of Normal Writing Style (NWS) and its role in the proposed method.",
+    "How are Deviation Vectors (DVs) calculated, and what do they represent?",
+    "Describe the two main methods proposed in the paper: DV-Distance and DV-Projection.",
+    "What are the advantages and limitations of the unsupervised DV-Distance method?",
+    "How does the supervised DV-Projection method improve upon the DV-Distance method?",
+    "What language models were used in the study, and why?",
+    "What datasets were used to evaluate the proposed methods?",
+    "What evaluation metrics were used in the study?",
+    "How did the proposed methods perform compared to the baselines and state-of-the-art methods?",
+    "What were the key findings and trends observed in the experiments?",
+    "Why did the AWD-LSTM based DV-Distance method consistently outperform the RoBERTa based DV-Distance method?",
+    "For what types of documents were the proposed methods most suitable?",
+    "How do the authors explain the performance differences across different document types?",
+    "What are the potential real-world applications of this research?",
+    "What are the limitations of the proposed methods?",
+    "What are some possible directions for future research in this area?",
+    "How does this research contribute to the broader field of Natural Language Processing (NLP)?"
+]
+# Create a selection box
+selected_question = st.selectbox("Choose a question", questions)
+# Display a checkbox
+if st.checkbox('Check this box to ask a question not listed above'):
+    # If the checkbox is checked, display a text box
+    selected_question = st.text_input('Enter a question')
+if st.button("Ask AI"):
+    with st.spinner("AI is thinking..."):
+        if st.session_state.uploaded_pdf_path is None:
+            st.session_state.uploaded_pdf_path = download_pdf()
+        filepath = st.session_state.uploaded_pdf_path
+        text_prompt = f"Use the provided document to answer the following question: {selected_question}.  Cite the relevant sections of the IRR."
+        response = multimodal_prompt(filepath, text_prompt)  # Use the downloaded filepath
+        st.markdown(f"**Response:** {response}")
+if st.session_state.uploaded_pdf_path:
+    display_download_button(st.session_state.uploaded_pdf_path, "Visual_Understanding.pdf")
+st.markdown("[Visit our Hugging Face Space!](https://huggingface.co/wvsuaidev)")
+st.markdown("© 2025 WVSU AI Dev Team 🤖 ✨")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+streamlit
+huggingface_hub
+google-generativeai