sync with remote
Browse files- app.py +144 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
import google.generativeai as genai
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
+
import base64
|
6 |
+
|
7 |
+
MODEL_ID = "gemini-2.0-flash-exp" # Keep the model ID as is
|
8 |
+
try:
|
9 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
10 |
+
model_id = MODEL_ID
|
11 |
+
genai.configure(api_key=api_key)
|
12 |
+
except Exception as e:
|
13 |
+
st.error(f"Error: {e}")
|
14 |
+
st.stop
|
15 |
+
|
16 |
+
model = genai.GenerativeModel(MODEL_ID)
|
17 |
+
chat = model.start_chat()
|
18 |
+
|
19 |
+
def download_pdf():
|
20 |
+
"""
|
21 |
+
Downloads the PDF file from the Hugging Face Hub using the correct repo path and filename.
|
22 |
+
"""
|
23 |
+
try:
|
24 |
+
hf_token = os.getenv("HF_TOKEN")
|
25 |
+
repo_id = "wvsuaidev/authorship_verfication_dataset" # Corrected dataset repo path
|
26 |
+
filename = "Authorship_Verification_Linguistic_Divergence.pdf"
|
27 |
+
filepath = hf_hub_download(repo_id=repo_id, filename=filename, token=hf_token, repo_type="dataset")
|
28 |
+
return filepath
|
29 |
+
except Exception as e:
|
30 |
+
st.error(f"Failed to download PDF from Hugging Face Hub: {e}")
|
31 |
+
st.stop() # Stop if the download fails
|
32 |
+
|
33 |
+
# Initialize conversation history in Streamlit session state
|
34 |
+
if "conversation_history" not in st.session_state:
|
35 |
+
st.session_state.conversation_history = []
|
36 |
+
if "uploaded_file_part" not in st.session_state: # Store the file *part*
|
37 |
+
st.session_state.uploaded_file_part = None
|
38 |
+
if "uploaded_pdf_path" not in st.session_state:
|
39 |
+
st.session_state.uploaded_pdf_path = download_pdf()
|
40 |
+
|
41 |
+
def multimodal_prompt(pdf_path, text_prompt):
|
42 |
+
"""
|
43 |
+
Sends a multimodal prompt to Gemini, handling file uploads efficiently.
|
44 |
+
Args:
|
45 |
+
pdf_path: The path to the PDF file.
|
46 |
+
text_prompt: The text prompt for the model.
|
47 |
+
Returns:
|
48 |
+
The model's response as a string, or an error message.
|
49 |
+
"""
|
50 |
+
try:
|
51 |
+
if st.session_state.uploaded_file_part is None: # First time, upload
|
52 |
+
pdf_part = genai.upload_file(pdf_path, mime_type="application/pdf")
|
53 |
+
st.session_state.uploaded_file_part = pdf_part
|
54 |
+
prompt = [text_prompt, pdf_part] # First turn includes the actual file
|
55 |
+
else: # Subsequent turns, reference the file
|
56 |
+
|
57 |
+
prompt = [text_prompt, st.session_state.uploaded_file_part] # Subsequent turns include the file reference
|
58 |
+
|
59 |
+
response = chat.send_message(prompt)
|
60 |
+
|
61 |
+
# Update conversation history
|
62 |
+
st.session_state.conversation_history.append({"role": "user", "content": text_prompt, "has_pdf": True})
|
63 |
+
st.session_state.conversation_history.append({"role": "assistant", "content": response.text})
|
64 |
+
return response.text
|
65 |
+
|
66 |
+
except Exception as e:
|
67 |
+
return f"An error occurred: {e}"
|
68 |
+
|
69 |
+
def display_download_button(file_path, file_name):
|
70 |
+
try:
|
71 |
+
with open(file_path, "rb") as f:
|
72 |
+
file_bytes = f.read()
|
73 |
+
b64 = base64.b64encode(file_bytes).decode()
|
74 |
+
href = f'<a href="data:application/pdf;base64,{b64}" download="{file_name}">Download the source document (PDF)</a>'
|
75 |
+
st.markdown(href, unsafe_allow_html=True)
|
76 |
+
except FileNotFoundError:
|
77 |
+
st.error("File not found for download.")
|
78 |
+
except Exception as e:
|
79 |
+
st.error(f"Error during download: {e}")
|
80 |
+
|
81 |
+
|
82 |
+
# --- Main Page ---
|
83 |
+
st.title("📚 Authorship Attribution and Verification")
|
84 |
+
about = """
|
85 |
+
**How to use this App**
|
86 |
+
This app leverages Gemini 2.0 to provide insights on the provided document.
|
87 |
+
Select a question from the dropdown menu or enter your own question to get
|
88 |
+
Gemini's generated response based on the provided document.
|
89 |
+
"""
|
90 |
+
|
91 |
+
with st.expander("How to use this App"):
|
92 |
+
st.markdown(about)
|
93 |
+
|
94 |
+
# --- Q and A Tab ---
|
95 |
+
st.header("Questions and Answers")
|
96 |
+
|
97 |
+
# Generate 5 questions based on the selected role
|
98 |
+
|
99 |
+
questions = [
|
100 |
+
"What are the key differences between Authorship Attribution (AA) and Authorship Verification (AV)?",
|
101 |
+
"What is the 'non-comparability problem' in authorship verification, and why is it significant?",
|
102 |
+
"How does the proposed DV-Distance metric address the non-comparability problem?",
|
103 |
+
"Explain the concept of Normal Writing Style (NWS) and its role in the proposed method.",
|
104 |
+
"How are Deviation Vectors (DVs) calculated, and what do they represent?",
|
105 |
+
"Describe the two main methods proposed in the paper: DV-Distance and DV-Projection.",
|
106 |
+
"What are the advantages and limitations of the unsupervised DV-Distance method?",
|
107 |
+
"How does the supervised DV-Projection method improve upon the DV-Distance method?",
|
108 |
+
"What language models were used in the study, and why?",
|
109 |
+
"What datasets were used to evaluate the proposed methods?",
|
110 |
+
"What evaluation metrics were used in the study?",
|
111 |
+
"How did the proposed methods perform compared to the baselines and state-of-the-art methods?",
|
112 |
+
"What were the key findings and trends observed in the experiments?",
|
113 |
+
"Why did the AWD-LSTM based DV-Distance method consistently outperform the RoBERTa based DV-Distance method?",
|
114 |
+
"For what types of documents were the proposed methods most suitable?",
|
115 |
+
"How do the authors explain the performance differences across different document types?",
|
116 |
+
"What are the potential real-world applications of this research?",
|
117 |
+
"What are the limitations of the proposed methods?",
|
118 |
+
"What are some possible directions for future research in this area?",
|
119 |
+
"How does this research contribute to the broader field of Natural Language Processing (NLP)?"
|
120 |
+
]
|
121 |
+
|
122 |
+
# Create a selection box
|
123 |
+
selected_question = st.selectbox("Choose a question", questions)
|
124 |
+
|
125 |
+
# Display a checkbox
|
126 |
+
if st.checkbox('Check this box to ask a question not listed above'):
|
127 |
+
# If the checkbox is checked, display a text box
|
128 |
+
selected_question = st.text_input('Enter a question')
|
129 |
+
|
130 |
+
if st.button("Ask AI"):
|
131 |
+
with st.spinner("AI is thinking..."):
|
132 |
+
if st.session_state.uploaded_pdf_path is None:
|
133 |
+
st.session_state.uploaded_pdf_path = download_pdf()
|
134 |
+
|
135 |
+
filepath = st.session_state.uploaded_pdf_path
|
136 |
+
text_prompt = f"Use the provided document to answer the following question: {selected_question}. Cite the relevant sections of the IRR."
|
137 |
+
response = multimodal_prompt(filepath, text_prompt) # Use the downloaded filepath
|
138 |
+
st.markdown(f"**Response:** {response}")
|
139 |
+
|
140 |
+
if st.session_state.uploaded_pdf_path:
|
141 |
+
display_download_button(st.session_state.uploaded_pdf_path, "Visual_Understanding.pdf")
|
142 |
+
|
143 |
+
st.markdown("[Visit our Hugging Face Space!](https://huggingface.co/wvsuaidev)")
|
144 |
+
st.markdown("© 2025 WVSU AI Dev Team 🤖 ✨")
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
huggingface_hub
|
3 |
+
google-generativeai
|