VinitT commited on
Commit
7b41846
·
verified ·
1 Parent(s): a95e178

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Set up HuggingFace cache directory
11
+ RUN mkdir -p /code/.cache/huggingface && chmod -R 777 /code/.cache/huggingface
12
+ ENV HF_HOME /code/.cache/huggingface
13
+
14
+ # Install Python dependencies
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY . .
20
+
21
+ # Expose the port that Hugging Face Spaces expects
22
+ EXPOSE 7860
23
+
24
+ # Set environment variables
25
+ ENV FLASK_APP=app.py
26
+ ENV FLASK_ENV=production
27
+ ENV PYTHONUNBUFFERED=1
28
+ # Add Space-specific environment variables
29
+ ENV HOST=0.0.0.0
30
+ ENV PORT=7860
31
+
32
+ # Run the application with the correct host and port for Spaces
33
+ CMD ["python", "-c", "from app import app; app.run(host='0.0.0.0', port=7860)"]
app.py ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, redirect, url_for, flash, session, send_from_directory
2
+ from flask_login import LoginManager, UserMixin, login_user, login_required, logout_user, current_user
3
+ from flask_wtf.csrf import CSRFProtect
4
+ from flask_wtf import FlaskForm
5
+ from wtforms import StringField, PasswordField, SubmitField
6
+ from wtforms.validators import DataRequired
7
+ from werkzeug.security import generate_password_hash, check_password_hash
8
+ from langchain_google_genai import ChatGoogleGenerativeAI, HarmCategory, HarmBlockThreshold # Replaced Groq with Google Gemini
9
+ import arxiv
10
+ import requests
11
+ import PyPDF2
12
+ from io import BytesIO
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain_groq import ChatGroq
15
+ from langchain.memory import ConversationBufferMemory
16
+ from langchain_huggingface import HuggingFaceEmbeddings # Updated: Use the dedicated langchain-huggingface package
17
+ import numpy as np
18
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError
19
+ from functools import lru_cache
20
+ import time
21
+ import os
22
+ from dotenv import load_dotenv
23
+ import json
24
+ from datetime import datetime
25
+ import firebase_admin
26
+ from firebase_admin import credentials, auth
27
+ from huggingface_hub import login
28
+
29
+ # Load environment variables
30
+ load_dotenv()
31
+
32
+ # Configure logging
33
+ import logging
34
+ logging.basicConfig(level=logging.INFO)
35
+ logger = logging.getLogger(__name__)
36
+
37
+ app = Flask(__name__,
38
+ static_folder='static',
39
+ static_url_path='/static',
40
+ template_folder='templates'
41
+ )
42
+ app.secret_key = os.getenv('FLASK_SECRET_KEY')
43
+
44
+ # Initialize CSRF protection
45
+ csrf = CSRFProtect()
46
+ csrf.init_app(app)
47
+
48
+ # Initialize Flask-Login
49
+ login_manager = LoginManager()
50
+ login_manager.init_app(app)
51
+ login_manager.login_view = 'login'
52
+
53
+ # Initialize Groq
54
+ # groq_api_key = os.getenv('GROQ_API_KEY')
55
+ # llm = ChatGroq(
56
+ # temperature=0.3,
57
+ # groq_api_key=groq_api_key,
58
+ # model_name="qwen-qwq-32b"
59
+ # )
60
+
61
+ # Get the API key from environment variables
62
+ google_api_key = os.getenv('GOOGLE_API_KEY')
63
+ if not google_api_key:
64
+ raise ValueError("GOOGLE_API_KEY not found. Please set it in your .env file.")
65
+
66
+ llm = ChatGoogleGenerativeAI(
67
+ model="gemini-2.5-flash",
68
+ google_api_key=google_api_key,
69
+ temperature=0.3,
70
+ safety_settings={
71
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
72
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
73
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
74
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
75
+ }
76
+ )
77
+
78
+ # Initialize embeddings with proper cache directory
79
+ embeddings_model = HuggingFaceEmbeddings(
80
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
81
+ )
82
+ # Constants
83
+ MAX_CHUNKS = 50
84
+ MAX_RESPONSE_LENGTH = 4028
85
+ CACHE_DURATION = 3600 # 1 hour in seconds
86
+
87
+ # Form Classes
88
+ class LoginForm(FlaskForm):
89
+ username = StringField('Username', validators=[DataRequired()])
90
+ password = PasswordField('Password', validators=[DataRequired()])
91
+ submit = SubmitField('Login')
92
+
93
+ class RegisterForm(FlaskForm):
94
+ username = StringField('Username', validators=[DataRequired()])
95
+ password = PasswordField('Password', validators=[DataRequired()])
96
+ submit = SubmitField('Register')
97
+
98
+ # User class for Flask-Login
99
+ class User(UserMixin):
100
+ def __init__(self, user_id, email):
101
+ self.id = user_id
102
+ self.email = email
103
+
104
+ def generate_analysis(chunks):
105
+ analysis_prompts = {
106
+ 'executive_summary': """
107
+ ## 🧠 Role
108
+ You are an AI assistant that explains research papers in a way that makes reading the original paper unnecessary. Your explanations should be **clear, engaging, and easy to understand**, even for someone who is not deeply familiar with the subject.
109
+
110
+ ## 🎯 Goal
111
+ Given any research paper, provide a **simple breakdown** covering:
112
+
113
+ ### 1️⃣ What problem does this paper solve?
114
+ - Explain the **issue the paper addresses**.
115
+ - Why is this problem **important**?
116
+ - What **challenges** existed before this research?
117
+
118
+ ### 2️⃣ How does it solve the problem?
119
+ - Summarize the **key idea, method, or approach** used in the paper.
120
+ - If applicable, break it down into **steps or components**.
121
+ - Compare it to **previous solutions** and highlight what makes it better.
122
+
123
+ ### 3️⃣ Why does this matter? (Real-world impact & applications)
124
+ - How can this research be **used in practice**?
125
+ - What **industries or fields** benefit from it?
126
+ - Does it improve **efficiency, accuracy, cost, or scalability**?
127
+
128
+ ### 4️⃣ Explain with a simple analogy (if applicable)
129
+ - Use a **real-life example** to explain complex ideas.
130
+ - Keep it **relatable** (e.g., compare it to something like cooking, traveling, or streaming music).
131
+
132
+ ### 5️⃣ Key findings & results
133
+ - Summarize the **main results** in simple terms.
134
+ - If possible, include **numbers, graphs, or comparisons** for clarity.
135
+
136
+ ### 6️⃣ Limitations & Future Work
137
+ - Mention any **weaknesses** or areas for improvement.
138
+ - What are the **next steps** for research in this area?
139
+
140
+ ### 7️⃣ Final Takeaway (One-liner summary)
141
+ - Provide a **quick summary** of the research in a **single sentence**.
142
+
143
+ ---
144
+
145
+ ## 🎨 Tone & Style
146
+ ✔ **Simple & clear language** – Avoid jargon unless necessary.
147
+ ✔ **Step-by-step explanations** – Organize information logically.
148
+ ✔ **Engaging & structured** – Use bullet points, lists, or tables when needed.
149
+ ✔ **Make it feel like a story** – Guide the reader smoothly from problem to solution.
150
+
151
+ ---
152
+
153
+ ## ⚡ How to Use This Prompt
154
+ 1️⃣ Enter the **title, abstract, or full text** of any research paper.
155
+ 2️⃣ AI will generate a **detailed explanation** that makes the paper easy to understand.
156
+ 3️⃣ Use it for **blog posts, study guides, or an AI-powered research assistant**.
157
+
158
+
159
+ Remember: The output should be properly formatted in markdown while providing comprehensive coverage of the paper's content."""
160
+ }
161
+
162
+ analysis_results = {}
163
+
164
+ for aspect, prompt in analysis_prompts.items():
165
+ try:
166
+ # Clean and join the chunks
167
+ context = "\n\n".join(
168
+ chunk.encode('ascii', 'ignore').decode('ascii')
169
+ for chunk in chunks[:3]
170
+ )
171
+ response = llm.invoke(
172
+ f"""Based on the following context from a research paper, {prompt}
173
+
174
+ Context:
175
+ {context}
176
+
177
+ Additional Instructions:
178
+ - Provide specific examples and evidence from the text
179
+ - Use clear, academic language
180
+ - Maintain objectivity
181
+ - Include relevant quotes or data points
182
+ - Structure your response logically
183
+ - Use markdown formatting for clarity
184
+
185
+ Please provide a clear and specific response.""",
186
+ )
187
+ analysis_results[aspect] = response.content[:MAX_RESPONSE_LENGTH]
188
+ except Exception as e:
189
+ analysis_results[aspect] = f"Analysis failed: {str(e)}"
190
+
191
+ return analysis_results
192
+
193
+ def process_pdf(pdf_url):
194
+ try:
195
+ print(f"Starting PDF processing for: {pdf_url}")
196
+
197
+ response = requests.get(pdf_url, timeout=30)
198
+ response.raise_for_status()
199
+ pdf_file = BytesIO(response.content)
200
+
201
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
202
+ # Clean and normalize the text
203
+ text = " ".join(
204
+ page.extract_text().encode('ascii', 'ignore').decode('ascii')
205
+ for page in pdf_reader.pages
206
+ )
207
+
208
+ if not text.strip():
209
+ return {'error': 'No text could be extracted from the PDF'}
210
+
211
+ text_splitter = RecursiveCharacterTextSplitter(
212
+ chunk_size=2000,
213
+ chunk_overlap=200,
214
+ length_function=len,
215
+ separators=["\n\n", "\n", " ", ""]
216
+ )
217
+
218
+ chunks = text_splitter.split_text(text)[:MAX_CHUNKS]
219
+
220
+ analysis = generate_analysis(chunks)
221
+ return {
222
+ 'success': True,
223
+ 'analysis': analysis
224
+ }
225
+
226
+ except Exception as e:
227
+ return {'error': f"PDF processing failed: {str(e)}"}
228
+
229
+
230
+ @login_manager.user_loader
231
+ def load_user(user_id):
232
+ if 'user_data' in session:
233
+ user_data = session['user_data']
234
+ return User(user_data['uid'], user_data['email'])
235
+ return None
236
+
237
+ # User management functions
238
+ def load_users():
239
+ try:
240
+ with open('users.json', 'r') as f:
241
+ return json.load(f)
242
+ except FileNotFoundError:
243
+ return {}
244
+
245
+ def save_users(users):
246
+ with open('users.json', 'w') as f:
247
+ json.dump(users, f)
248
+
249
+ # Routes
250
+ @app.route('/')
251
+ @login_required
252
+ def index():
253
+ return render_template('index.html')
254
+
255
+ @app.route('/login', methods=['GET'])
256
+ def login():
257
+ if current_user.is_authenticated:
258
+ return redirect(url_for('index'))
259
+ return render_template('login.html')
260
+
261
+ @app.route('/register', methods=['GET'])
262
+ def register():
263
+ if current_user.is_authenticated:
264
+ print("User is already authenticated")
265
+ return redirect(url_for('index'))
266
+ return render_template('register.html')
267
+
268
+ @app.route('/verify-token', methods=['POST'])
269
+ def verify_token():
270
+ try:
271
+ data = request.json
272
+ if not data or not data.get('uid') or not data.get('email'):
273
+ return jsonify({'error': 'Missing required data'}), 400
274
+
275
+ # Store user data in session
276
+ session['user_data'] = {
277
+ 'uid': data['uid'],
278
+ 'email': data['email']
279
+ }
280
+
281
+ # Create and login user
282
+ user = User(data['uid'], data['email'])
283
+ login_user(user)
284
+
285
+ return jsonify({'success': True, 'redirect': url_for('index')})
286
+ except Exception as e:
287
+ print(f"Verification error: {str(e)}") # Add logging
288
+ return jsonify({'error': str(e)}), 500
289
+
290
+ @app.route('/logout')
291
+ @login_required
292
+ def logout():
293
+ logout_user()
294
+ session.clear()
295
+ return redirect(url_for('login'))
296
+
297
+ @app.route('/search', methods=['POST'])
298
+ @login_required
299
+ def search():
300
+ try:
301
+ data = request.get_json()
302
+ paper_name = data.get('paper_name')
303
+ sort_by = data.get('sort_by', 'relevance') # Default to relevance
304
+ max_results = data.get('max_results', 20) # Increase to get more candidates for filtering
305
+
306
+ if not paper_name:
307
+ return jsonify({'error': 'No search query provided'}), 400
308
+
309
+ # Configure sorting based on user preference
310
+ sort_options = {
311
+ 'relevance': arxiv.SortCriterion.Relevance,
312
+ 'recent': arxiv.SortCriterion.SubmittedDate
313
+ }
314
+
315
+ sort_criterion = sort_options.get(sort_by, arxiv.SortCriterion.Relevance)
316
+
317
+ # Perform the search
318
+ search = arxiv.Search(
319
+ query=paper_name,
320
+ max_results=max_results,
321
+ sort_by=sort_criterion
322
+ )
323
+
324
+ results = []
325
+ for paper in search.results():
326
+ # Extract citation count if available (not directly provided by arXiv API)
327
+ citation_count = 0
328
+
329
+ # You could integrate with a citation API here (e.g., Semantic Scholar)
330
+ # For now, we'll use proxies for popularity like:
331
+ # - Papers with DOIs (published in journals) tend to be more established
332
+ # - Papers with more authors often have more visibility
333
+ # - More recent papers in the results might indicate ongoing relevance
334
+
335
+ has_doi = hasattr(paper, 'doi') and paper.doi is not None
336
+ author_count = len(paper.authors)
337
+
338
+ # Calculate a simple "popularity score" (this is a heuristic)
339
+ popularity_score = (10 if has_doi else 0) + min(author_count, 5)
340
+
341
+ results.append({
342
+ 'title': paper.title,
343
+ 'authors': ', '.join(author.name for author in paper.authors),
344
+ 'abstract': paper.summary,
345
+ 'pdf_link': paper.pdf_url,
346
+ 'arxiv_link': paper.entry_id,
347
+ 'published': paper.published.strftime('%Y-%m-%d'),
348
+ 'category': paper.primary_category,
349
+ 'comment': paper.comment if hasattr(paper, 'comment') else None,
350
+ 'doi': paper.doi if hasattr(paper, 'doi') else None,
351
+ 'popularity_score': popularity_score # Add popularity score
352
+ })
353
+
354
+ # Sort results by our popularity score (higher is better)
355
+ results.sort(key=lambda x: x['popularity_score'], reverse=True)
356
+
357
+ return jsonify(results)
358
+
359
+ except Exception as e:
360
+ print(f"Search error: {str(e)}")
361
+ return jsonify({'error': f'Failed to search papers: {str(e)}'}), 500
362
+
363
+ @app.route('/perform-rag', methods=['POST'])
364
+ @login_required
365
+ def perform_rag():
366
+ try:
367
+ pdf_url = request.json.get('pdf_url')
368
+ if not pdf_url:
369
+ return jsonify({'error': 'PDF URL is required'}), 400
370
+
371
+ result = process_pdf(pdf_url)
372
+
373
+ if 'error' in result:
374
+ return jsonify({'error': result['error']}), 500
375
+
376
+ return jsonify(result)
377
+
378
+ except Exception as e:
379
+ return jsonify({'error': str(e)}), 500
380
+
381
+ @app.route('/chat-with-paper', methods=['POST'])
382
+ @login_required
383
+ def chat_with_paper():
384
+ try:
385
+ pdf_url = request.json.get('pdf_url')
386
+ question = request.json.get('question')
387
+
388
+ if not pdf_url or not question:
389
+ return jsonify({'error': 'PDF URL and question are required'}), 400
390
+
391
+ # Get PDF text and create chunks
392
+ response = requests.get(pdf_url, timeout=30)
393
+ response.raise_for_status()
394
+ pdf_file = BytesIO(response.content)
395
+
396
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
397
+ text = " ".join(page.extract_text() for page in pdf_reader.pages)
398
+
399
+ if not text.strip():
400
+ return jsonify({'error': 'No text could be extracted from the PDF'})
401
+
402
+ # Create text chunks
403
+ text_splitter = RecursiveCharacterTextSplitter(
404
+ chunk_size=2000,
405
+ chunk_overlap=200,
406
+ length_function=len
407
+ )
408
+ chunks = text_splitter.split_text(text)[:MAX_CHUNKS]
409
+
410
+ # Generate embeddings for chunks
411
+ chunk_embeddings = embeddings_model.embed_documents(chunks)
412
+
413
+ # Generate embedding for the question
414
+ question_embedding = embeddings_model.embed_query(question)
415
+
416
+ # Find most relevant chunks using cosine similarity
417
+ similarities = []
418
+ for chunk_embedding in chunk_embeddings:
419
+ similarity = np.dot(question_embedding, chunk_embedding) / (
420
+ np.linalg.norm(question_embedding) * np.linalg.norm(chunk_embedding)
421
+ )
422
+ similarities.append(similarity)
423
+
424
+ # Get top 3 most relevant chunks
425
+ top_chunk_indices = np.argsort(similarities)[-3:][::-1]
426
+ relevant_chunks = [chunks[i] for i in top_chunk_indices]
427
+
428
+ # Construct prompt with relevant context
429
+ context = "\n\n".join(relevant_chunks)
430
+ prompt = f"""Based on the following relevant excerpts from the research paper, please answer this question: {question}
431
+
432
+ Context from paper:
433
+ {context}
434
+
435
+ Please provide a clear, specific, and accurate response based solely on the information provided in these excerpts. If the answer cannot be fully determined from the given context, please indicate this in your response."""
436
+
437
+ # Generate response using Groq
438
+ response = llm.invoke(prompt)
439
+
440
+ # Format and return response
441
+ formatted_response = response.content.strip()
442
+
443
+ # Add source citations
444
+ source_info = "\n\nThis response is based on specific sections from the paper."
445
+
446
+ return jsonify({
447
+ 'response': formatted_response + source_info,
448
+ 'relevance_scores': [float(similarities[i]) for i in top_chunk_indices]
449
+ })
450
+
451
+ except Exception as e:
452
+ print(f"Chat error: {str(e)}")
453
+ return jsonify({'error': f'Failed to process request: {str(e)}'}), 500
454
+
455
+ @app.route('/api/data', methods=['GET'])
456
+ def get_data():
457
+ try:
458
+ # Example: Get documents from a collection
459
+ docs = load_users()
460
+ data = [{doc_id: doc_data} for doc_id, doc_data in docs.items()]
461
+ return jsonify(data), 200
462
+ except Exception as e:
463
+ return jsonify({"error": str(e)}), 500
464
+
465
+ # Add error handlers
466
+ @app.errorhandler(404)
467
+ def not_found_error(error):
468
+ logger.error(f'Page not found: {request.url}')
469
+ return render_template('404.html'), 404
470
+
471
+ @app.errorhandler(500)
472
+ def internal_error(error):
473
+ logger.error(f'Server Error: {error}')
474
+ return render_template('500.html'), 500
475
+
476
+ @app.before_request
477
+ def log_request_info():
478
+ logger.info(f'Request URL: {request.url}')
479
+ logger.info(f'Request Method: {request.method}')
480
+
481
+ # Add cache control for static files
482
+ @app.after_request
483
+ def add_header(response):
484
+ if 'Cache-Control' not in response.headers:
485
+ response.headers['Cache-Control'] = 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0, max-age=0'
486
+ response.headers['Pragma'] = 'no-cache'
487
+ response.headers['Expires'] = '-1'
488
+ return response
489
+
490
+ # Serve static files with version parameter
491
+ @app.context_processor
492
+ def utility_processor():
493
+ def versioned_url(filename):
494
+ version = datetime.now().strftime("%Y%m%d%H%M%S")
495
+ return url_for('static', filename=filename, v=version)
496
+ return dict(versioned_url=versioned_url)
497
+
498
+ if __name__ == '__main__':
499
+ port = int(os.environ.get('PORT', 7860))
500
+ debug = os.environ.get('DEBUG', 'False').lower() == 'true'
501
+ app.run(host='0.0.0.0', port=port, debug=debug)
auth_utils.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from firebase_admin import auth
2
+ from typing import Dict, Optional
3
+
4
+ def create_user(email: str, password: str) -> Dict:
5
+ """Create a new user with email and password"""
6
+ try:
7
+ user = auth.create_user(
8
+ email=email,
9
+ password=password,
10
+ email_verified=False
11
+ )
12
+ return {
13
+ "success": True,
14
+ "user_id": user.uid,
15
+ "email": user.email
16
+ }
17
+ except auth.EmailAlreadyExistsError:
18
+ return {
19
+ "success": False,
20
+ "error": "Email already exists"
21
+ }
22
+ except Exception as e:
23
+ return {
24
+ "success": False,
25
+ "error": str(e)
26
+ }
27
+
28
+ def verify_token(id_token: str) -> Optional[Dict]:
29
+ """Verify Firebase ID token"""
30
+ try:
31
+ decoded_token = auth.verify_id_token(id_token)
32
+ return decoded_token
33
+ except Exception as e:
34
+ return None
35
+
36
+ def get_user_by_email(email: str) -> Optional[Dict]:
37
+ """Get user by email"""
38
+ try:
39
+ user = auth.get_user_by_email(email)
40
+ return {
41
+ "user_id": user.uid,
42
+ "email": user.email,
43
+ "email_verified": user.email_verified
44
+ }
45
+ except auth.UserNotFoundError:
46
+ return None
47
+ except Exception:
48
+ return None
49
+
50
+ def delete_user(uid: str) -> bool:
51
+ """Delete a user by UID"""
52
+ try:
53
+ auth.delete_user(uid)
54
+ return True
55
+ except Exception:
56
+ return False
db_utils.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from firebase_admin import firestore
2
+ from typing import Dict, List, Optional
3
+
4
+ def add_user_data(db: firestore.Client, user_id: str, data: Dict) -> bool:
5
+ """Add user data to Firestore"""
6
+ try:
7
+ db.collection('users').document(user_id).set(data)
8
+ return True
9
+ except Exception:
10
+ return False
11
+
12
+ def get_user_data(db: firestore.Client, user_id: str) -> Optional[Dict]:
13
+ """Get user data from Firestore"""
14
+ try:
15
+ doc = db.collection('users').document(user_id).get()
16
+ if doc.exists:
17
+ return doc.to_dict()
18
+ return None
19
+ except Exception:
20
+ return None
21
+
22
+ def update_user_data(db: firestore.Client, user_id: str, data: Dict) -> bool:
23
+ """Update user data in Firestore"""
24
+ try:
25
+ db.collection('users').document(user_id).update(data)
26
+ return True
27
+ except Exception:
28
+ return False
29
+
30
+ def delete_user_data(db: firestore.Client, user_id: str) -> bool:
31
+ """Delete user data from Firestore"""
32
+ try:
33
+ db.collection('users').document(user_id).delete()
34
+ return True
35
+ except Exception:
36
+ return False
firebase_config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import firebase_admin
2
+ from firebase_admin import credentials, firestore, auth
3
+
4
+ def initialize_firebase():
5
+ # Initialize Firebase Admin SDK
6
+ cred = credentials.Certificate('researchradarai-firebase-adminsdk-fbsvc-281fee7dee.json')
7
+ firebase_admin.initialize_app(cred, {
8
+ 'projectId': 'researchradarai',
9
+ 'storageBucket': 'researchradarai.firebasestorage.app'
10
+ })
11
+
12
+ # Initialize Firestore client
13
+ db = firestore.client()
14
+ return db
firestore.rules ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rules_version = '2';
2
+ service cloud.firestore {
3
+ match /databases/{database}/documents {
4
+ // User profiles
5
+ match /users/{userId} {
6
+ allow read: if request.auth != null && request.auth.uid == userId;
7
+ allow write: if request.auth != null && request.auth.uid == userId;
8
+ }
9
+
10
+ // Default deny
11
+ match /{document=**} {
12
+ allow read, write: if false;
13
+ }
14
+ }
15
+ }
init_db.py ADDED
@@ -0,0 +1 @@
 
 
1
+
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask-login
3
+ flask-wtf
4
+ werkzeug
5
+ arxiv
6
+ requests
7
+ PyPDF2
8
+ langchain
9
+ langchain-groq
10
+ langchain-community
11
+ langchain-huggingface
12
+ python-dotenv
13
+ numpy
14
+ sentence-transformers
15
+ firebase-admin
16
+ google-cloud-firestore
17
+ google-auth
18
+ google-auth-oauthlib
19
+ google-auth-httplib2
20
+ wtforms
21
+ huggingface-hub
22
+ transformers
23
+ torch
24
+ langchain_google_genai
researchradarai-firebase-adminsdk-fbsvc-281fee7dee.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "researchradarai",
4
+ "private_key_id": "281fee7dee23aa4df991aeffb650e418e51d08d0",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCfvkAA+uP5qyW3\nBPd9l5767K73DdRV60NBzygv9O9PjSDHCTIqpVdOMXwWSO8E4+9O5R013Dg8bVTm\ncMAZB+HnrBEWoYHFu7Jqzh0KI+WVejFlZ0rkYcekOfZ0sG43meIq9pFm8ehsIz/k\nC2Ow7FFeEd0b493oqvV3wF9c8u9l5jZVgbU7J8nyo3K638v+V22D+BPmgHEksbk2\nEiyHsypcDZmSV6woo8SCrh2vs2vSCHvqx05X6f0MxRBcmtZS6WcJJ+vvntktilpc\n+Q3hoPYLmMXwaZCaaujU12uKHYudGCrZf5PXVsPplSd63ztW2smw1oEDgYwuoxON\nmbqJwnZ9AgMBAAECggEAFI7qiSBriJfdzGESQj0/fkb0TuKeah79/TveOIB1mq+s\nq+Y4vkOgfyAKj9+mYI2+daaHjNqg+NM6nZxId1rhWEHNF97Z5wsR74GZO9MvhSjr\nBIQ3n47J9Q5p883k/A43jOnwqpFaz9f1grFzN14igVBxPAP7wimGWnlijIF4t+H6\n4HVxXCmhzz3IZcZYUKX//jg6r5lpFIVxZK5cIWDhmf9sKlD6GUR+3gKr7MQpzRJx\nsOfYbBe+KbHN0azSMNyKFNUvsDXUmGVTpjqun7Zuw9XUe6GyCM4WC9mN8+IPTZqm\n6AjnSgp9JUwZ+tJjuUo1ZSFblS2iNflUpioLsMbIDQKBgQDecLqA1vxY6+VW4v8M\n3+TuNEUe96LZA/ypLoBi07GhncxN9g2LuWnRpDyiZmbey3UQmFk/QrUem/j0UNzY\nYfLcfuE5UmXFUeySS+jJfjqvXp+996zYfSYVbOEba1QJjBPcOFsaXlk2AP0cWv36\nFRWF/Iug4kbX+JGyK+LvGQEaHwKBgQC31/rmW5m7XNA61gtbtK5Fg6KKBJB/27YL\n7aLSOEUmMYgfRiLB666y+4jdZDA3EkjKX88ddiVuoVyZuDGTsr0/ifG8hGOoSw0u\nvbykMBezPLPRkQyD1TBiErxBXa++WyNaHLEJFymCOCja7Xvxc141U8pSnKUGmUr1\nr/sxTCgT4wKBgF4It78poyoQJzaQ5ZNSvxu3+xR5SV2GovP+VYXxyiDxbWHzx4wu\nyL44OO3Kbmh3CDkIvonQsHKNKzRVTtcmqR1vgdTSsXU2CdVaw8ESXMqwLMWJA4fN\neCEMkykdOTyD/A1XwaOnCP2cc2PeT4m+CghHV9uebKZ2TVlN1jSPlHivAoGBAI14\nppqd4q9LvPGJxTPM1k6n/Ww4FvpNGMoVFDaxFoVNmHJ4hka0Fruk1K2Ja5D1gQ46\nrCb/w85eXePs2jnOUdOTU3K+bfITzxEo8QFoANTs4XNjKz5Hz/OodzXV4meZupqd\neZ6FNGwAy2+tULN9FAH1eLwZzuRFEmn+Ak7tS+oBAoGANo1QB4ZTX6NNBaz8zdGX\ngClsWRCY1wKg7bK87bnwv9u7nSyh+7ud7pjA9Km06kFSUrgEKiFS8IensxRwaE3T\ndteGp/+bOwkCIQW3w8d7bG7SiZuJ1UWoaJC7vN+eHaUSDM9+OzaK9cjWZDf0O/6D\nsxlQDuwbqOFEa/MGqZr8ZqU=\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "[email protected]",
7
+ "client_id": "114293117382677247598",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40researchradarai.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
text.txt ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request, jsonify, redirect, url_for, flash, session
2
+ from flask_login import LoginManager, UserMixin, login_user, login_required, logout_user, current_user
3
+ from flask_wtf.csrf import CSRFProtect
4
+ from flask_wtf import FlaskForm
5
+ from wtforms import StringField, PasswordField, SubmitField
6
+ from wtforms.validators import DataRequired
7
+ from werkzeug.security import generate_password_hash, check_password_hash
8
+ import arxiv
9
+ import requests
10
+ import PyPDF2
11
+ from io import BytesIO
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain_groq import ChatGroq
14
+ from langchain.memory import ConversationBufferMemory
15
+ from langchain_community.embeddings import HuggingFaceEmbeddings
16
+ import numpy as np
17
+ from concurrent.futures import ThreadPoolExecutor, TimeoutError
18
+ from functools import lru_cache
19
+ import time
20
+ import os
21
+ from dotenv import load_dotenv
22
+ import json
23
+ from datetime import datetime
24
+ from flask_sqlalchemy import SQLAlchemy
25
+ from config import Config
26
+
27
+ # Load environment variables
28
+ load_dotenv()
29
+
30
+ # Initialize Flask extensions
31
+ db = SQLAlchemy()
32
+ login_manager = LoginManager()
33
+
34
+ def create_app():
35
+ app = Flask(__name__)
36
+ app.config.from_object(Config)
37
+
38
+ # Initialize extensions
39
+ db.init_app(app)
40
+ login_manager.init_app(app)
41
+ login_manager.login_view = 'login'
42
+
43
+ with app.app_context():
44
+ # Import routes after db initialization
45
+ from routes import init_routes
46
+ init_routes(app)
47
+
48
+ # Create database tables
49
+ db.create_all()
50
+
51
+ # Test database connection
52
+ try:
53
+ version = db.session.execute('SELECT VERSION()').scalar()
54
+ print(f"Connected to PostgreSQL: {version}")
55
+ except Exception as e:
56
+ print(f"Database connection error: {str(e)}")
57
+ raise e
58
+
59
+ return app
60
+
61
+ # Initialize CSRF protection
62
+ csrf = CSRFProtect()
63
+ csrf.init_app(app)
64
+
65
+ # Initialize Groq
66
+ groq_api_key = os.getenv('GROQ_API_KEY')
67
+ llm = ChatGroq(
68
+ temperature=0.1,
69
+ groq_api_key=groq_api_key,
70
+ model_name="mixtral-8x7b-32768"
71
+ )
72
+
73
+ # Initialize embeddings
74
+ embeddings_model = HuggingFaceEmbeddings(
75
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
76
+ )
77
+
78
+ # Constants
79
+ MAX_CHUNKS = 50
80
+ MAX_RESPONSE_LENGTH = 4000
81
+ CACHE_DURATION = 3600 # 1 hour in seconds
82
+
83
+ # Form Classes
84
+ class LoginForm(FlaskForm):
85
+ username = StringField('Username', validators=[DataRequired()])
86
+ password = PasswordField('Password', validators=[DataRequired()])
87
+ submit = SubmitField('Login')
88
+
89
+ class RegisterForm(FlaskForm):
90
+ username = StringField('Username', validators=[DataRequired()])
91
+ password = PasswordField('Password', validators=[DataRequired()])
92
+ submit = SubmitField('Register')
93
+
94
+ # User class
95
+ class User(UserMixin):
96
+ def __init__(self, user_id, username):
97
+ self.id = user_id
98
+ self.username = username
99
+
100
+ @staticmethod
101
+ def get(user_id):
102
+ users = load_users()
103
+ user_data = users.get(str(user_id))
104
+ if user_data:
105
+ return User(user_id=user_data['id'], username=user_data['username'])
106
+ return None
107
+
108
+ # User management functions
109
+ def load_users():
110
+ try:
111
+ with open('users.json', 'r') as f:
112
+ return json.load(f)
113
+ except FileNotFoundError:
114
+ return {}
115
+
116
+ def save_users(users):
117
+ with open('users.json', 'w') as f:
118
+ json.dump(users, f)
119
+
120
+ @login_manager.user_loader
121
+ def load_user(user_id):
122
+ return User.get(user_id)
123
+
124
+ # PDF Processing and Analysis
125
+ def process_pdf(pdf_url):
126
+ try:
127
+ print(f"Starting PDF processing for: {pdf_url}")
128
+
129
+ response = requests.get(pdf_url, timeout=30)
130
+ response.raise_for_status()
131
+ pdf_file = BytesIO(response.content)
132
+
133
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
134
+ # Clean and normalize the text
135
+ text = " ".join(
136
+ page.extract_text().encode('ascii', 'ignore').decode('ascii')
137
+ for page in pdf_reader.pages
138
+ )
139
+
140
+ if not text.strip():
141
+ return {'error': 'No text could be extracted from the PDF'}
142
+
143
+ text_splitter = RecursiveCharacterTextSplitter(
144
+ chunk_size=1000,
145
+ chunk_overlap=200,
146
+ length_function=len,
147
+ separators=["\n\n", "\n", " ", ""]
148
+ )
149
+
150
+ chunks = text_splitter.split_text(text)[:MAX_CHUNKS]
151
+
152
+ analysis = generate_analysis(chunks)
153
+ return {
154
+ 'success': True,
155
+ 'analysis': analysis
156
+ }
157
+
158
+ except Exception as e:
159
+ return {'error': f"PDF processing failed: {str(e)}"}
160
+
161
+ def generate_analysis(chunks):
162
+ analysis_prompts = {
163
+ 'executive_summary': "Provide a concise executive summary of this research paper.",
164
+ 'problem_analysis': "What is the main research problem and objectives?",
165
+ 'methodology': "Describe the key methodology and approach.",
166
+ 'findings': "What are the main findings and conclusions?",
167
+ 'contributions': "What are the key contributions of this work?"
168
+ }
169
+
170
+ analysis_results = {}
171
+
172
+ for aspect, prompt in analysis_prompts.items():
173
+ try:
174
+ # Clean and join the chunks
175
+ context = "\n\n".join(
176
+ chunk.encode('ascii', 'ignore').decode('ascii')
177
+ for chunk in chunks[:3]
178
+ )
179
+ response = llm.invoke(
180
+ f"""Based on the following context from a research paper, {prompt}
181
+
182
+ Context:
183
+ {context}
184
+
185
+ Please provide a clear and specific response."""
186
+ )
187
+ analysis_results[aspect] = response.content[:MAX_RESPONSE_LENGTH]
188
+ except Exception as e:
189
+ analysis_results[aspect] = f"Analysis failed: {str(e)}"
190
+
191
+ return analysis_results
192
+
193
+ # Routes
194
+ @app.route('/')
195
+ @login_required
196
+ def index():
197
+ return render_template('index.html')
198
+
199
+ @app.route('/login', methods=['GET', 'POST'])
200
+ def login():
201
+ if current_user.is_authenticated:
202
+ return redirect(url_for('index'))
203
+
204
+ form = LoginForm()
205
+ if form.validate_on_submit():
206
+ username = form.username.data
207
+ password = form.password.data
208
+
209
+ users = load_users()
210
+ user_found = None
211
+
212
+ for user_id, user_data in users.items():
213
+ if user_data['username'] == username:
214
+ user_found = user_data
215
+ break
216
+
217
+ if user_found and check_password_hash(user_found['password_hash'], password):
218
+ user = User(user_id=user_found['id'], username=username)
219
+ login_user(user, remember=True)
220
+ return redirect(url_for('index'))
221
+
222
+ flash('Invalid username or password')
223
+
224
+ return render_template('login.html', form=form)
225
+
226
+ @app.route('/register', methods=['GET', 'POST'])
227
+ def register():
228
+ if current_user.is_authenticated:
229
+ return redirect(url_for('index'))
230
+
231
+ form = RegisterForm()
232
+ if form.validate_on_submit():
233
+ username = form.username.data
234
+ password = form.password.data
235
+
236
+ users = load_users()
237
+
238
+ if any(user['username'] == username for user in users.values()):
239
+ flash('Username already exists')
240
+ return render_template('register.html', form=form)
241
+
242
+ user_id = str(len(users) + 1)
243
+ users[user_id] = {
244
+ 'id': user_id,
245
+ 'username': username,
246
+ 'password_hash': generate_password_hash(password)
247
+ }
248
+
249
+ save_users(users)
250
+
251
+ user = User(user_id=user_id, username=username)
252
+ login_user(user)
253
+
254
+ return redirect(url_for('index'))
255
+
256
+ return render_template('register.html', form=form)
257
+
258
+ @app.route('/logout')
259
+ @login_required
260
+ def logout():
261
+ logout_user()
262
+ return redirect(url_for('login'))
263
+
264
+ @app.route('/search', methods=['POST'])
265
+ @login_required
266
+ def search():
267
+ try:
268
+ data = request.get_json()
269
+ paper_name = data.get('paper_name')
270
+ sort_by = data.get('sort_by', 'relevance')
271
+ max_results = data.get('max_results', 10)
272
+
273
+ if not paper_name:
274
+ return jsonify({'error': 'No search query provided'}), 400
275
+
276
+ # Map sort_by to arxiv.SortCriterion
277
+ sort_mapping = {
278
+ 'relevance': arxiv.SortCriterion.Relevance,
279
+ 'lastUpdated': arxiv.SortCriterion.LastUpdatedDate,
280
+ 'submitted': arxiv.SortCriterion.SubmittedDate
281
+ }
282
+ sort_criterion = sort_mapping.get(sort_by, arxiv.SortCriterion.Relevance)
283
+
284
+ # Perform the search
285
+ search = arxiv.Search(
286
+ query=paper_name,
287
+ max_results=max_results,
288
+ sort_by=sort_criterion
289
+ )
290
+
291
+ results = []
292
+ for paper in search.results():
293
+ results.append({
294
+ 'title': paper.title,
295
+ 'authors': ', '.join(author.name for author in paper.authors),
296
+ 'abstract': paper.summary,
297
+ 'pdf_link': paper.pdf_url,
298
+ 'arxiv_link': paper.entry_id,
299
+ 'published': paper.published.strftime('%Y-%m-%d'),
300
+ 'category': paper.primary_category,
301
+ 'comment': paper.comment if hasattr(paper, 'comment') else None,
302
+ 'doi': paper.doi if hasattr(paper, 'doi') else None
303
+ })
304
+
305
+ return jsonify(results)
306
+
307
+ except Exception as e:
308
+ print(f"Search error: {str(e)}")
309
+ return jsonify({'error': f'Failed to search papers: {str(e)}'}), 500
310
+
311
+ @app.route('/perform-rag', methods=['POST'])
312
+ @login_required
313
+ def perform_rag():
314
+ try:
315
+ pdf_url = request.json.get('pdf_url')
316
+ if not pdf_url:
317
+ return jsonify({'error': 'PDF URL is required'}), 400
318
+
319
+ result = process_pdf(pdf_url)
320
+
321
+ if 'error' in result:
322
+ return jsonify({'error': result['error']}), 500
323
+
324
+ return jsonify(result)
325
+
326
+ except Exception as e:
327
+ return jsonify({'error': str(e)}), 500
328
+
329
+ @app.route('/chat-with-paper', methods=['POST'])
330
+ @login_required
331
+ def chat_with_paper():
332
+ try:
333
+ pdf_url = request.json.get('pdf_url')
334
+ question = request.json.get('question')
335
+
336
+ if not pdf_url or not question:
337
+ return jsonify({'error': 'PDF URL and question are required'}), 400
338
+
339
+ # Get PDF text and create chunks
340
+ response = requests.get(pdf_url, timeout=30)
341
+ response.raise_for_status()
342
+ pdf_file = BytesIO(response.content)
343
+
344
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
345
+ text = " ".join(page.extract_text() for page in pdf_reader.pages)
346
+
347
+ if not text.strip():
348
+ return jsonify({'error': 'No text could be extracted from the PDF'})
349
+
350
+ # Create text chunks
351
+ text_splitter = RecursiveCharacterTextSplitter(
352
+ chunk_size=1000,
353
+ chunk_overlap=200,
354
+ length_function=len
355
+ )
356
+ chunks = text_splitter.split_text(text)[:MAX_CHUNKS]
357
+
358
+ # Generate embeddings for chunks
359
+ chunk_embeddings = embeddings_model.embed_documents(chunks)
360
+
361
+ # Generate embedding for the question
362
+ question_embedding = embeddings_model.embed_query(question)
363
+
364
+ # Find most relevant chunks using cosine similarity
365
+ similarities = []
366
+ for chunk_embedding in chunk_embeddings:
367
+ similarity = np.dot(question_embedding, chunk_embedding) / (
368
+ np.linalg.norm(question_embedding) * np.linalg.norm(chunk_embedding)
369
+ )
370
+ similarities.append(similarity)
371
+
372
+ # Get top 3 most relevant chunks
373
+ top_chunk_indices = np.argsort(similarities)[-3:][::-1]
374
+ relevant_chunks = [chunks[i] for i in top_chunk_indices]
375
+
376
+ # Construct prompt with relevant context
377
+ context = "\n\n".join(relevant_chunks)
378
+ prompt = f"""Based on the following relevant excerpts from the research paper, please answer this question: {question}
379
+
380
+ Context from paper:
381
+ {context}
382
+
383
+ Please provide a clear, specific, and accurate response based solely on the information provided in these excerpts. If the answer cannot be fully determined from the given context, please indicate this in your response."""
384
+
385
+ # Generate response using Groq
386
+ response = llm.invoke(prompt)
387
+
388
+ # Format and return response
389
+ formatted_response = response.content.strip()
390
+
391
+ # Add source citations
392
+ source_info = "\n\nThis response is based on specific sections from the paper."
393
+
394
+ return jsonify({
395
+ 'response': formatted_response + source_info,
396
+ 'relevance_scores': [float(similarities[i]) for i in top_chunk_indices]
397
+ })
398
+
399
+ except Exception as e:
400
+ print(f"Chat error: {str(e)}")
401
+ return jsonify({'error': f'Failed to process request: {str(e)}'}), 500
402
+
403
+ if __name__ == '__main__':
404
+ app.run(debug=True)
users.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"1": {"id": "1", "username": "vinit", "password_hash": "scrypt:32768:8:1$eMBXSaH1nm6rZx3c$31f72052845c5c5c67006aa4cf8b29231560bbc67acd56ec4261f655a04b91719579b808d9ed31d73995e13e1f9cf535a04ed528fb1af67b58df8aaf86ab415d"}, "2": {"id": "2", "username": "tavde", "password_hash": "scrypt:32768:8:1$RgIBXfJjYPp20gLj$c62da6c0d894fa04998d666b7944f94c27125d5983a71cf4b57624b8a99e4ada4bad85fcc56ff4e9d08be218af842971d93c07d1c8e89fad60b57aea56dbc587"}, "3": {"id": "3", "username": "divax.shah", "password_hash": "scrypt:32768:8:1$TqaCWPWa9h5yvQzv$0ea4dd33e2dc026f471e24d1f94ed3ed6381400c5d1dfcf099ea66cf32d7c81a4bef0b0e143d1b797e655e9e6cec460a9b2277ec5157efbfc95f0714e4d2377e"}, "4": {"id": "4", "username": "vidhi", "password_hash": "scrypt:32768:8:1$bUMFGywKUGvXpwW4$f3dada6114fa257d4ff7f08906b08d55f2a656d61f4bc6399504c05b03a353d6a2856fae2bc598b848c2b6a41cf82ab64679429b7a8b35680072ad29e3e528b0"}}