louiecerv commited on
Commit
bbc9651
·
1 Parent(s): e10e233

sync with remote

Browse files
Files changed (2) hide show
  1. app.py +144 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import google.generativeai as genai
4
+ from huggingface_hub import hf_hub_download
5
+ import base64
6
+
7
+ MODEL_ID = "gemini-2.0-flash-exp" # Keep the model ID as is
8
+ try:
9
+ api_key = os.getenv("GEMINI_API_KEY")
10
+ model_id = MODEL_ID
11
+ genai.configure(api_key=api_key)
12
+ except Exception as e:
13
+ st.error(f"Error: {e}")
14
+ st.stop
15
+
16
+ model = genai.GenerativeModel(MODEL_ID)
17
+ chat = model.start_chat()
18
+
19
+ def download_pdf():
20
+ """
21
+ Downloads the PDF file from the Hugging Face Hub using the correct repo path and filename.
22
+ """
23
+ try:
24
+ hf_token = os.getenv("HF_TOKEN")
25
+ repo_id = "wvsuaidev/authorship_verfication_dataset" # Corrected dataset repo path
26
+ filename = "Authorship_Verification_Linguistic_Divergence.pdf"
27
+ filepath = hf_hub_download(repo_id=repo_id, filename=filename, token=hf_token, repo_type="dataset")
28
+ return filepath
29
+ except Exception as e:
30
+ st.error(f"Failed to download PDF from Hugging Face Hub: {e}")
31
+ st.stop() # Stop if the download fails
32
+
33
+ # Initialize conversation history in Streamlit session state
34
+ if "conversation_history" not in st.session_state:
35
+ st.session_state.conversation_history = []
36
+ if "uploaded_file_part" not in st.session_state: # Store the file *part*
37
+ st.session_state.uploaded_file_part = None
38
+ if "uploaded_pdf_path" not in st.session_state:
39
+ st.session_state.uploaded_pdf_path = download_pdf()
40
+
41
+ def multimodal_prompt(pdf_path, text_prompt):
42
+ """
43
+ Sends a multimodal prompt to Gemini, handling file uploads efficiently.
44
+ Args:
45
+ pdf_path: The path to the PDF file.
46
+ text_prompt: The text prompt for the model.
47
+ Returns:
48
+ The model's response as a string, or an error message.
49
+ """
50
+ try:
51
+ if st.session_state.uploaded_file_part is None: # First time, upload
52
+ pdf_part = genai.upload_file(pdf_path, mime_type="application/pdf")
53
+ st.session_state.uploaded_file_part = pdf_part
54
+ prompt = [text_prompt, pdf_part] # First turn includes the actual file
55
+ else: # Subsequent turns, reference the file
56
+
57
+ prompt = [text_prompt, st.session_state.uploaded_file_part] # Subsequent turns include the file reference
58
+
59
+ response = chat.send_message(prompt)
60
+
61
+ # Update conversation history
62
+ st.session_state.conversation_history.append({"role": "user", "content": text_prompt, "has_pdf": True})
63
+ st.session_state.conversation_history.append({"role": "assistant", "content": response.text})
64
+ return response.text
65
+
66
+ except Exception as e:
67
+ return f"An error occurred: {e}"
68
+
69
+ def display_download_button(file_path, file_name):
70
+ try:
71
+ with open(file_path, "rb") as f:
72
+ file_bytes = f.read()
73
+ b64 = base64.b64encode(file_bytes).decode()
74
+ href = f'<a href="data:application/pdf;base64,{b64}" download="{file_name}">Download the source document (PDF)</a>'
75
+ st.markdown(href, unsafe_allow_html=True)
76
+ except FileNotFoundError:
77
+ st.error("File not found for download.")
78
+ except Exception as e:
79
+ st.error(f"Error during download: {e}")
80
+
81
+
82
+ # --- Main Page ---
83
+ st.title("📚 Authorship Attribution and Verification")
84
+ about = """
85
+ **How to use this App**
86
+ This app leverages Gemini 2.0 to provide insights on the provided document.
87
+ Select a question from the dropdown menu or enter your own question to get
88
+ Gemini's generated response based on the provided document.
89
+ """
90
+
91
+ with st.expander("How to use this App"):
92
+ st.markdown(about)
93
+
94
+ # --- Q and A Tab ---
95
+ st.header("Questions and Answers")
96
+
97
+ # Generate 5 questions based on the selected role
98
+
99
+ questions = [
100
+ "What are the key differences between Authorship Attribution (AA) and Authorship Verification (AV)?",
101
+ "What is the 'non-comparability problem' in authorship verification, and why is it significant?",
102
+ "How does the proposed DV-Distance metric address the non-comparability problem?",
103
+ "Explain the concept of Normal Writing Style (NWS) and its role in the proposed method.",
104
+ "How are Deviation Vectors (DVs) calculated, and what do they represent?",
105
+ "Describe the two main methods proposed in the paper: DV-Distance and DV-Projection.",
106
+ "What are the advantages and limitations of the unsupervised DV-Distance method?",
107
+ "How does the supervised DV-Projection method improve upon the DV-Distance method?",
108
+ "What language models were used in the study, and why?",
109
+ "What datasets were used to evaluate the proposed methods?",
110
+ "What evaluation metrics were used in the study?",
111
+ "How did the proposed methods perform compared to the baselines and state-of-the-art methods?",
112
+ "What were the key findings and trends observed in the experiments?",
113
+ "Why did the AWD-LSTM based DV-Distance method consistently outperform the RoBERTa based DV-Distance method?",
114
+ "For what types of documents were the proposed methods most suitable?",
115
+ "How do the authors explain the performance differences across different document types?",
116
+ "What are the potential real-world applications of this research?",
117
+ "What are the limitations of the proposed methods?",
118
+ "What are some possible directions for future research in this area?",
119
+ "How does this research contribute to the broader field of Natural Language Processing (NLP)?"
120
+ ]
121
+
122
+ # Create a selection box
123
+ selected_question = st.selectbox("Choose a question", questions)
124
+
125
+ # Display a checkbox
126
+ if st.checkbox('Check this box to ask a question not listed above'):
127
+ # If the checkbox is checked, display a text box
128
+ selected_question = st.text_input('Enter a question')
129
+
130
+ if st.button("Ask AI"):
131
+ with st.spinner("AI is thinking..."):
132
+ if st.session_state.uploaded_pdf_path is None:
133
+ st.session_state.uploaded_pdf_path = download_pdf()
134
+
135
+ filepath = st.session_state.uploaded_pdf_path
136
+ text_prompt = f"Use the provided document to answer the following question: {selected_question}. Cite the relevant sections of the IRR."
137
+ response = multimodal_prompt(filepath, text_prompt) # Use the downloaded filepath
138
+ st.markdown(f"**Response:** {response}")
139
+
140
+ if st.session_state.uploaded_pdf_path:
141
+ display_download_button(st.session_state.uploaded_pdf_path, "Visual_Understanding.pdf")
142
+
143
+ st.markdown("[Visit our Hugging Face Space!](https://huggingface.co/wvsuaidev)")
144
+ st.markdown("© 2025 WVSU AI Dev Team 🤖 ✨")
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ huggingface_hub
3
+ google-generativeai