Spaces:
Sleeping
Sleeping
Commit
·
cd14485
1
Parent(s):
55a0b15
Add logging for query results and improve data path handling
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
@@ -19,6 +21,11 @@ persistent_storage = os.path.exists("/data")
|
|
19 |
if persistent_storage:
|
20 |
# Use persistent storage
|
21 |
print("Using persistent storage")
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
# Embedding model details
|
24 |
embedding_model_name = "nomadicsynth/research-compass-arxiv-abstracts-embedding-model"
|
@@ -360,10 +367,29 @@ Return only the JSON object. All key names and string values must be in double q
|
|
360 |
# })
|
361 |
|
362 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
|
364 |
"""Find papers synergistic with the given abstract using FAISS with cosine similarity"""
|
365 |
global dataset
|
366 |
|
|
|
|
|
|
|
367 |
# Normalize the abstract for cosine similarity
|
368 |
abstract = abstract.replace("\n", " ")
|
369 |
# Replace multiple whitespaces with a single space
|
@@ -397,6 +423,9 @@ def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
|
|
397 |
}
|
398 |
papers.append(paper_dict)
|
399 |
|
|
|
|
|
|
|
400 |
return papers
|
401 |
|
402 |
|
@@ -626,14 +655,15 @@ def create_interface():
|
|
626 |
)
|
627 |
|
628 |
# Set up logging directories
|
629 |
-
|
630 |
-
|
631 |
-
os.makedirs(
|
|
|
632 |
|
633 |
# Set up loggers
|
634 |
paper_match_logger.setup(
|
635 |
[abstract_input, paper_details_output, paper_feedback, paper_expert, paper_comment],
|
636 |
-
|
637 |
)
|
638 |
analysis_logger.setup(
|
639 |
[
|
@@ -644,7 +674,7 @@ def create_interface():
|
|
644 |
analysis_expert,
|
645 |
analysis_comment,
|
646 |
],
|
647 |
-
|
648 |
)
|
649 |
|
650 |
# Display paper details when row is selected
|
|
|
1 |
+
import datetime
|
2 |
import json
|
3 |
import os
|
4 |
+
import uuid
|
5 |
|
6 |
import gradio as gr
|
7 |
import pandas as pd
|
|
|
21 |
if persistent_storage:
|
22 |
# Use persistent storage
|
23 |
print("Using persistent storage")
|
24 |
+
data_path = "/data"
|
25 |
+
else:
|
26 |
+
# Use local storage
|
27 |
+
print("Using local storage")
|
28 |
+
data_path = "./data"
|
29 |
|
30 |
# Embedding model details
|
31 |
embedding_model_name = "nomadicsynth/research-compass-arxiv-abstracts-embedding-model"
|
|
|
367 |
# })
|
368 |
|
369 |
|
370 |
+
def log_query_and_results(query_id: str, query: str, results: list[dict]):
|
371 |
+
"""Log the query and results to a file."""
|
372 |
+
log_entry = {
|
373 |
+
"timestamp": datetime.now().isoformat(),
|
374 |
+
"query_id": query_id,
|
375 |
+
"query": query,
|
376 |
+
"results": results,
|
377 |
+
}
|
378 |
+
log_file = os.path.join(data_path, "query_results_log.jsonl")
|
379 |
+
with open(log_file, "a") as f:
|
380 |
+
f.write(json.dumps(log_entry) + "\n")
|
381 |
+
|
382 |
+
# print a short summary of the log entry with timestamp
|
383 |
+
print(f"[{log_entry['timestamp']}] Query ID: {query_id}, Results Count: {len(results)}")
|
384 |
+
|
385 |
+
|
386 |
def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
|
387 |
"""Find papers synergistic with the given abstract using FAISS with cosine similarity"""
|
388 |
global dataset
|
389 |
|
390 |
+
# Generate a unique ID for the query
|
391 |
+
query_id = str(uuid.uuid4())
|
392 |
+
|
393 |
# Normalize the abstract for cosine similarity
|
394 |
abstract = abstract.replace("\n", " ")
|
395 |
# Replace multiple whitespaces with a single space
|
|
|
423 |
}
|
424 |
papers.append(paper_dict)
|
425 |
|
426 |
+
# Log the query and results
|
427 |
+
log_query_and_results(query_id, abstract, papers)
|
428 |
+
|
429 |
return papers
|
430 |
|
431 |
|
|
|
655 |
)
|
656 |
|
657 |
# Set up logging directories
|
658 |
+
flagged_paper_matches_path = data_path + "/flagged_paper_matches"
|
659 |
+
flagged_analyses_path = data_path + "/flagged_analyses"
|
660 |
+
os.makedirs(flagged_paper_matches_path, exist_ok=True)
|
661 |
+
os.makedirs(flagged_analyses_path, exist_ok=True)
|
662 |
|
663 |
# Set up loggers
|
664 |
paper_match_logger.setup(
|
665 |
[abstract_input, paper_details_output, paper_feedback, paper_expert, paper_comment],
|
666 |
+
flagged_paper_matches_path,
|
667 |
)
|
668 |
analysis_logger.setup(
|
669 |
[
|
|
|
674 |
analysis_expert,
|
675 |
analysis_comment,
|
676 |
],
|
677 |
+
flagged_analyses_path,
|
678 |
)
|
679 |
|
680 |
# Display paper details when row is selected
|