Spaces:

OrganizedProgrammers
/

FastAPI_Neo4j

Running

App Files Files Community

adrienbrdne commited on 15 days ago

Commit

fbf2452

verified ·

1 Parent(s): 8ac0b12

Update api.py

Browse files

Files changed (1) hide show

api.py +246 -255

api.py CHANGED Viewed

@@ -1,255 +1,246 @@
-import os
-import requests
-from bs4 import BeautifulSoup
-from fastapi import FastAPI, HTTPException
-from neo4j import GraphDatabase, basic_auth
-import google.generativeai as genai
-import logging # Import du module logging
-# --- Configuration du Logging ---
-# Configuration de base du logger pour afficher les messages INFO et supérieurs.
-# Le format inclut le timestamp, le niveau du log, et le message.
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler() # Affichage des logs dans la console (stderr par défaut)
-        # Vous pourriez ajouter ici un logging.FileHandler("app.log") pour écrire dans un fichier
-    ]
-)
-logger = logging.getLogger(__name__) # Création d'une instance de logger pour ce module
-# --- Configuration des variables d'environnement ---
-NEO4J_URI = os.getenv("NEO4J_URI")
-NEO4J_USER = os.getenv("NEO4J_USER")
-NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-# Validation des configurations essentielles
-if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
-    logger.critical("ERREUR CRITIQUE: Les variables d'environnement NEO4J_URI, NEO4J_USER, et NEO4J_PASSWORD doivent être définies.")
-    # Dans une application réelle, vous pourriez vouloir quitter ou empêcher FastAPI de démarrer.
-    # Pour l'instant, nous laissons l'application essayer et échouer lors de l'exécution si elles manquent.
-# Initialisation de l'application FastAPI
-app = FastAPI(
-    title="Arxiv to Neo4j Importer",
-    description="API pour récupérer les données d'articles de recherche depuis Arxiv, les résumer avec Gemini, et les ajouter à Neo4j.",
-    version="1.0.0"
-)
-# --- Initialisation du client API Gemini ---
-gemini_model = None
-if GEMINI_API_KEY:
-    try:
-        genai.configure(api_key=GEMINI_API_KEY)
-        gemini_model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-05-20") # Modèle spécifié
-        logger.info("Client API Gemini initialisé avec succès.")
-    except Exception as e:
-        logger.warning(f"AVERTISSEMENT: Échec de l'initialisation du client API Gemini: {e}. La génération de résumés sera affectée.")
-else:
-    logger.warning("AVERTISSEMENT: La variable d'environnement GEMINI_API_KEY n'est pas définie. La génération de résumés sera désactivée.")
-# --- Fonctions Utilitaires (Adaptées de votre script) ---
-def get_content(number: str, node_type: str) -> str:
-    """Récupère le contenu HTML brut depuis Arxiv ou d'autres sources."""
-    redirect_links = {
-        "Patent": f"https://patents.google.com/patent/{number}/en",
-        "ResearchPaper": f"https://arxiv.org/abs/{number}"
-    }
-    url = redirect_links.get(node_type)
-    if not url:
-        logger.warning(f"Type de noeud inconnu: {node_type} pour le numéro {number}")
-        return ""
-    try:
-        response = requests.get(url, timeout=10) # Ajout d'un timeout
-        response.raise_for_status() # Lève une HTTPError pour les mauvaises réponses (4XX ou 5XX)
-        return response.content.decode('utf-8', errors='replace').replace("\n", "")
-    except requests.exceptions.RequestException as e:
-        logger.error(f"Erreur de requête pour {node_type} numéro: {number} à l'URL {url}: {e}")
-        return ""
-    except Exception as e:
-        logger.error(f"Une erreur inattendue est survenue dans get_content pour {number}: {e}")
-        return ""
-def extract_research_paper_arxiv(rp_number: str, node_type: str) -> dict:
-    """Extrait les informations d'un article de recherche Arxiv et génère un résumé."""
-    raw_content = get_content(rp_number, node_type)
-    rp_data = {
-        "document": f"Arxiv {rp_number}", # ID pour l'article
-        "arxiv_id": rp_number,
-        "title": "Erreur lors de la récupération du contenu ou contenu non trouvé",
-        "abstract": "Erreur lors de la récupération du contenu ou contenu non trouvé",
-        "summary": "Résumé non généré" # Résumé par défaut
-    }
-    if not raw_content:
-        logger.warning(f"Aucun contenu récupéré pour l'ID Arxiv: {rp_number}")
-        return rp_data # Retourne les données d'erreur par défaut
-    try:
-        soup = BeautifulSoup(raw_content, 'html.parser')
-        # Extraction du Titre
-        title_tag = soup.find('h1', class_='title')
-        if title_tag and title_tag.find('span', class_='descriptor'):
-            title_text_candidate = title_tag.find('span', class_='descriptor').next_sibling
-            if title_text_candidate and isinstance(title_text_candidate, str):
-                 rp_data["title"] = title_text_candidate.strip()
-            else:
-                rp_data["title"] = title_tag.get_text(separator=" ", strip=True).replace("Title:", "").strip()
-        elif title_tag : # Fallback si le span descriptor n'est pas là mais h1.title existe
-             rp_data["title"] = title_tag.get_text(separator=" ", strip=True).replace("Title:", "").strip()
-        # Extraction de l'Abstract
-        abstract_tag = soup.find('blockquote', class_='abstract')
-        if abstract_tag:
-            abstract_text = abstract_tag.get_text(strip=True)
-            if abstract_text.lower().startswith('abstract'):
-                abstract_text = abstract_text[len('abstract'):].strip()
-            rp_data["abstract"] = abstract_text
-        # Marquer si le titre ou l'abstract ne sont toujours pas trouvés
-        if rp_data["title"] == "Erreur lors de la récupération du contenu ou contenu non trouvé" and not title_tag:
-            rp_data["title"] = "Titre non trouvé sur la page"
-        if rp_data["abstract"] == "Erreur lors de la récupération du contenu ou contenu non trouvé" and not abstract_tag:
-            rp_data["abstract"] = "Abstract non trouvé sur la page"
-        # Génération du résumé avec l'API Gemini si disponible et si l'abstract existe
-        if gemini_model and rp_data["abstract"] and \
-           not rp_data["abstract"].startswith("Erreur lors de la récupération du contenu") and \
-           not rp_data["abstract"].startswith("Abstract non trouvé"):
-            prompt = f"""Vous êtes un expert en standardisation 3GPP. Résumez les informations clés du document fourni en anglais technique simple, pertinent pour identifier les problèmes clés potentiels.
-            Concentrez-vous sur les défis, les lacunes ou les aspects nouveaux.
-            Voici le document: <document>{rp_data['abstract']}<document>"""
-            try:
-                response = gemini_model.generate_content(prompt)
-                rp_data["summary"] = response.text
-                logger.info(f"Résumé généré pour l'ID Arxiv: {rp_number}")
-            except Exception as e:
-                logger.error(f"Erreur lors de la génération du résumé avec Gemini pour l'ID Arxiv {rp_number}: {e}")
-                rp_data["summary"] = "Erreur lors de la génération du résumé (échec API)"
-        elif not gemini_model:
-            rp_data["summary"] = "Résumé non généré (client API Gemini non disponible)"
-        else:
-            rp_data["summary"] = "Résumé non généré (Abstract indisponible ou problématique)"
-    except Exception as e:
-        logger.error(f"Erreur lors de l'analyse du contenu pour l'ID Arxiv {rp_number}: {e}")
-    return rp_data
-def add_nodes_to_neo4j(driver, data_list: list, node_label: str):
-    """Ajoute une liste de noeuds à Neo4j dans une seule transaction."""
-    if not data_list:
-        logger.warning("Aucune donnée fournie à add_nodes_to_neo4j.")
-        return 0
-    query = (
-        f"UNWIND $data as properties "
-        f"MERGE (n:{node_label} {{arxiv_id: properties.arxiv_id}}) " # Utilise MERGE pour l'idempotence
-        f"ON CREATE SET n = properties "
-        f"ON MATCH SET n += properties" # Met à jour les propriétés si le noeud existe déjà
-    )
-    try:
-        with driver.session(database="neo4j") as session: # Spécifier la base de données si non défaut
-            result = session.execute_write(lambda tx: tx.run(query, data=data_list).consume())
-            nodes_created = result.counters.nodes_created
-            nodes_updated = result.counters.properties_set - (nodes_created * len(data_list[0])) if data_list and nodes_created >=0 else result.counters.properties_set # Estimation
-            if nodes_created > 0:
-                logger.info(f"{nodes_created} nouveau(x) noeud(s) {node_label} ajouté(s) avec succès.")
-            # properties_set compte toutes les propriétés définies, y compris sur les noeuds créés.
-            # Pour les noeuds mis à jour, il faut une logique plus fine si on veut un compte exact des noeuds *juste* mis à jour.
-            # Le plus simple est de regarder si des propriétés ont été mises à jour au-delà de la création.
-            # Note: result.counters.properties_set compte le nombre total de propriétés définies ou mises à jour.
-            # Si un noeud est créé, toutes ses propriétés sont "set". Si un noeud est matché, les propriétés sont "set" via ON MATCH.
-            # Un compte plus précis des "noeuds mis à jour (non créés)" est plus complexe avec UNWIND et MERGE.
-            # On peut se contenter de savoir combien de noeuds ont été affectés au total.
-            summary = result.summary
-            affected_nodes = summary.counters.nodes_created + summary.counters.nodes_deleted # ou autre logique selon la requête
-            logger.info(f"Opération MERGE pour {node_label}: {summary.counters.nodes_created} créé(s), {summary.counters.properties_set} propriétés affectées.")
-            return nodes_created # Retourne le nombre de noeuds effectivement créés
-    except Exception as e:
-        logger.error(f"Erreur Neo4j - Échec de l'ajout/mise à jour des noeuds {node_label}: {e}")
-        raise HTTPException(status_code=500, detail=f"Erreur base de données Neo4j: {e}")
-# --- Endpoint FastAPI ---
-@app.post("/add_research_paper/{arxiv_id}", status_code=201) # 201 Created pour la création réussie
-async def add_single_research_paper(arxiv_id: str):
-    """
-    Récupère un article de recherche d'Arxiv par son ID, extrait les informations,
-    génère un résumé, et l'ajoute/met à jour comme un noeud 'ResearchPaper' dans Neo4j.
-    """
-    node_type = "ResearchPaper"
-    logger.info(f"Traitement de la requête pour l'ID Arxiv: {arxiv_id}")
-    if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
-        logger.error("Les détails de connexion à la base de données Neo4j ne sont pas configurés sur le serveur.")
-        raise HTTPException(status_code=500, detail="Les détails de connexion à la base de données Neo4j ne sont pas configurés sur le serveur.")
-    # Étape 1: Extraire les données de l'article
-    paper_data = extract_research_paper_arxiv(arxiv_id, node_type)
-    if paper_data["title"].startswith("Erreur lors de la récupération du contenu") or paper_data["title"] == "Titre non trouvé sur la page":
-        logger.warning(f"Impossible de récupérer ou d'analyser le contenu pour l'ID Arxiv {arxiv_id}. Titre: {paper_data['title']}")
-        raise HTTPException(status_code=404, detail=f"Impossible de récupérer ou d'analyser le contenu pour l'ID Arxiv {arxiv_id}. Titre: {paper_data['title']}")
-    # Étape 2: Ajouter/Mettre à jour dans Neo4j
-    driver = None # Initialisation pour le bloc finally
-    try:
-        auth_token = basic_auth(NEO4J_USER, NEO4J_PASSWORD)
-        driver = GraphDatabase.driver(NEO4J_URI, auth=auth_token)
-        driver.verify_connectivity()
-        logger.info("Connecté avec succès à Neo4j.")
-        nodes_created_count = add_nodes_to_neo4j(driver, [paper_data], node_type)
-        if nodes_created_count > 0 :
-            message = f"L'article de recherche {arxiv_id} a été ajouté avec succès à Neo4j."
-            status_code = 201 # Created
-        else:
-            # Si MERGE a trouvé un noeud existant et l'a mis à jour, nodes_created_count sera 0.
-            # On considère cela comme un succès (idempotence).
-            message = f"L'article de recherche {arxiv_id} a été traité (potentiellement mis à jour s'il existait déjà)."
-            status_code = 200 # OK (car pas de nouvelle création, mais opération réussie)
-        logger.info(message)
-        return {
-            "message": message,
-            "data": paper_data,
-            "status_code_override": status_code # Pour information, FastAPI utilisera le status_code de l'endpoint ou celui de l'HTTPException
-        }
-    except HTTPException as e: # Re-lever les HTTPExceptions
-        logger.error(f"HTTPException lors de l'opération Neo4j pour {arxiv_id}: {e.detail}")
-        raise e
-    except Exception as e:
-        logger.error(f"Une erreur inattendue est survenue lors de l'opération Neo4j pour {arxiv_id}: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Une erreur serveur inattendue est survenue: {e}")
-    finally:
-        if driver:
-            driver.close()
-            logger.info("Connexion Neo4j fermée.")
-# --- Pour exécuter cette application (exemple avec uvicorn) ---
-# 1. Sauvegardez ce code sous main.py
-# 2. Définissez les variables d'environnement: NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD, GEMINI_API_KEY
-# 3. Installez les dépendances: pip install fastapi uvicorn requests beautifulsoup4 neo4j google-generativeai python-dotenv
-#    (python-dotenv est utile pour charger les fichiers .env localement)
-# 4. Exécutez avec Uvicorn: uvicorn main:app --reload
-#
-# Exemple d'utilisation avec curl après avoir démarré le serveur:
-# curl -X POST http://127.0.0.1:8000/add_research_paper/2305.12345
-# (Remplacez 2305.12345 par un ID Arxiv valide)

+import os
+import requests
+from bs4 import BeautifulSoup
+from fastapi import FastAPI, HTTPException
+from neo4j import GraphDatabase, basic_auth
+import google.generativeai as genai
+import logging # Import logging module
+# --- Logging Configuration ---
+# Basic logger configuration to display INFO messages and above.
+# The format includes timestamp, log level, and message.
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.StreamHandler() # Display logs in the console (stderr by default)
+        # You could add a logging.FileHandler("app.log") here to write to a file
+    ]
+)
+logger = logging.getLogger(__name__) # Create a logger instance for this module
+# --- Environment Variable Configuration ---
+NEO4J_URI = os.getenv("NEO4J_URI")
+NEO4J_USER = os.getenv("NEO4J_USER")
+NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+# Validation of essential configurations
+if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
+    logger.critical("CRITICAL ERROR: NEO4J_URI, NEO4J_USER, and NEO4J_PASSWORD environment variables must be set.")
+    # In a real application, you might want to exit or prevent FastAPI from starting.
+    # For now, we let the application try and fail at runtime if they are missing.
+# Initialize FastAPI application
+app = FastAPI(
+    title="Arxiv to Neo4j Importer",
+    description="API to fetch research paper data from Arxiv, summarize it with Gemini, and add it to Neo4j.",
+    version="1.0.0"
+)
+# --- Gemini API Client Initialization ---
+gemini_model = None
+if GEMINI_API_KEY:
+    try:
+        genai.configure(api_key=GEMINI_API_KEY)
+        gemini_model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-05-20") # Specified model
+        logger.info("Gemini API client initialized successfully.")
+    except Exception as e:
+        logger.warning(f"WARNING: Failed to initialize Gemini API client: {e}. Summary generation will be affected.")
+else:
+    logger.warning("WARNING: GEMINI_API_KEY environment variable not set. Summary generation will be disabled.")
+# --- Utility Functions (Adapted from your script) ---
+def get_content(number: str, node_type: str) -> str:
+    """Fetches raw HTML content from Arxiv or other sources."""
+    redirect_links = {
+        "Patent": f"https://patents.google.com/patent/{number}/en",
+        "ResearchPaper": f"https://arxiv.org/abs/{number}"
+    }
+    url = redirect_links.get(node_type)
+    if not url:
+        logger.warning(f"Unknown node type: {node_type} for number {number}")
+        return ""
+    try:
+        response = requests.get(url, timeout=10) # Added a timeout
+        response.raise_for_status() # Raises HTTPError for bad responses (4XX or 5XX)
+        return response.content.decode('utf-8', errors='replace').replace("\n", "")
+    except requests.exceptions.RequestException as e:
+        logger.error(f"Request error for {node_type} number: {number} at URL {url}: {e}")
+        return ""
+    except Exception as e:
+        logger.error(f"An unexpected error occurred in get_content for {number}: {e}")
+        return ""
+def extract_research_paper_arxiv(rp_number: str, node_type: str) -> dict:
+    """Extracts information from an Arxiv research paper and generates a summary."""
+    raw_content = get_content(rp_number, node_type)
+    rp_data = {
+        "document": f"Arxiv {rp_number}", # ID for the paper
+        "arxiv_id": rp_number,
+        "title": "Error fetching content or content not found",
+        "abstract": "Error fetching content or content not found",
+        "summary": "Summary not generated" # Default summary
+    }
+    if not raw_content:
+        logger.warning(f"No content fetched for Arxiv ID: {rp_number}")
+        return rp_data # Returns default error data
+    try:
+        soup = BeautifulSoup(raw_content, 'html.parser')
+        # Extract Title
+        title_tag = soup.find('h1', class_='title')
+        if title_tag and title_tag.find('span', class_='descriptor'):
+            title_text_candidate = title_tag.find('span', class_='descriptor').next_sibling
+            if title_text_candidate and isinstance(title_text_candidate, str):
+                 rp_data["title"] = title_text_candidate.strip()
+            else:
+                rp_data["title"] = title_tag.get_text(separator=" ", strip=True).replace("Title:", "").strip()
+        elif title_tag : # Fallback if the span descriptor is not there but h1.title exists
+             rp_data["title"] = title_tag.get_text(separator=" ", strip=True).replace("Title:", "").strip()
+        # Extract Abstract
+        abstract_tag = soup.find('blockquote', class_='abstract')
+        if abstract_tag:
+            abstract_text = abstract_tag.get_text(strip=True)
+            if abstract_text.lower().startswith('abstract'): # Check if "abstract" (case-insensitive) is at the beginning
+                # Find the first occurrence of ':' after "abstract" or just remove "abstract" prefix
+                prefix_end = abstract_text.lower().find('abstract') + len('abstract')
+                if prefix_end < len(abstract_text) and abstract_text[prefix_end] == ':':
+                    prefix_end += 1 # Include the colon in removal
+                abstract_text = abstract_text[prefix_end:].strip()
+            rp_data["abstract"] = abstract_text
+        # Mark if title or abstract are still not found
+        if rp_data["title"] == "Error fetching content or content not found" and not title_tag:
+            rp_data["title"] = "Title not found on page"
+        if rp_data["abstract"] == "Error fetching content or content not found" and not abstract_tag:
+            rp_data["abstract"] = "Abstract not found on page"
+        # Generate summary with Gemini API if available and abstract exists
+        if gemini_model and rp_data["abstract"] and \
+           not rp_data["abstract"].startswith("Error fetching content") and \
+           not rp_data["abstract"].startswith("Abstract not found"):
+            # English prompt for Gemini
+            prompt = f"""You are a 3GPP standardization expert. Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues.
+            Focus on challenges, gaps, or novel aspects.
+            Here is the document: <document>{rp_data['abstract']}<document>"""
+            try:
+                response = gemini_model.generate_content(prompt)
+                rp_data["summary"] = response.text
+                logger.info(f"Summary generated for Arxiv ID: {rp_number}")
+            except Exception as e:
+                logger.error(f"Error generating summary with Gemini for Arxiv ID {rp_number}: {e}")
+                rp_data["summary"] = "Error generating summary (API failure)"
+        elif not gemini_model:
+            rp_data["summary"] = "Summary not generated (Gemini API client not available)"
+        else:
+            rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
+    except Exception as e:
+        logger.error(f"Error parsing content for Arxiv ID {rp_number}: {e}")
+    return rp_data
+def add_nodes_to_neo4j(driver, data_list: list, node_label: str):
+    """Adds a list of nodes to Neo4j in a single transaction."""
+    if not data_list:
+        logger.warning("No data provided to add_nodes_to_neo4j.")
+        return 0
+    query = (
+        f"UNWIND $data as properties "
+        f"MERGE (n:{node_label} {{arxiv_id: properties.arxiv_id}}) " # Use MERGE for idempotency
+        f"ON CREATE SET n = properties "
+        f"ON MATCH SET n += properties" # Update properties if the node already exists
+    )
+    try:
+        with driver.session(database="neo4j") as session: # Specify database if not default
+            result = session.execute_write(lambda tx: tx.run(query, data=data_list).consume())
+            nodes_created = result.counters.nodes_created
+            if nodes_created > 0:
+                logger.info(f"{nodes_created} new {node_label} node(s) added successfully.")
+            summary = result.summary
+            logger.info(f"MERGE operation for {node_label}: {summary.counters.nodes_created} created, {summary.counters.properties_set} properties affected.")
+            return nodes_created # Return the number of nodes actually created
+    except Exception as e:
+        logger.error(f"Neo4j Error - Failed to add/update {node_label} nodes: {e}")
+        raise HTTPException(status_code=500, detail=f"Neo4j database error: {e}")
+# --- FastAPI Endpoint ---
+@app.post("/add_research_paper/{arxiv_id}", status_code=201) # 201 Created for successful creation
+async def add_single_research_paper(arxiv_id: str):
+    """
+    Fetches a research paper from Arxiv by its ID, extracts information,
+    generates a summary, and adds/updates it as a 'ResearchPaper' node in Neo4j.
+    """
+    node_type = "ResearchPaper"
+    logger.info(f"Processing request for Arxiv ID: {arxiv_id}")
+    if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
+        logger.error("Neo4j database connection details are not configured on the server.")
+        raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
+    # Step 1: Extract paper data
+    paper_data = extract_research_paper_arxiv(arxiv_id, node_type)
+    if paper_data["title"].startswith("Error fetching content") or paper_data["title"] == "Title not found on page":
+        logger.warning(f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
+        raise HTTPException(status_code=404, detail=f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
+    # Step 2: Add/Update in Neo4j
+    driver_instance = None # Initialize for the finally block
+    try:
+        auth_token = basic_auth(NEO4J_USER, NEO4J_PASSWORD)
+        driver_instance = GraphDatabase.driver(NEO4J_URI, auth=auth_token)
+        driver_instance.verify_connectivity()
+        logger.info("Successfully connected to Neo4j.")
+        nodes_created_count = add_nodes_to_neo4j(driver_instance, [paper_data], node_type)
+        if nodes_created_count > 0 :
+            message = f"Research paper {arxiv_id} was successfully added to Neo4j."
+            status_code_response = 201 # Created
+        else:
+            # If MERGE found an existing node and updated it, nodes_created_count will be 0.
+            # This is considered a success (idempotency).
+            message = f"Research paper {arxiv_id} was processed (potentially updated if it already existed)."
+            status_code_response = 200 # OK (because no new creation, but operation successful)
+        logger.info(message)
+        # Note: FastAPI uses the status_code from the decorator or HTTPException.
+        # This custom status_code_response is for the JSON body if needed, but the actual HTTP response status
+        # will be 201 (from decorator) unless an HTTPException overrides it or we change the decorator based on logic.
+        # For simplicity here, we'll return it in the body and let the decorator's 201 stand if no error.
+        # A more advanced setup might change the response status dynamically.
+        return {
+            "message": message,
+            "data": paper_data,
+            "response_status_info": status_code_response
+        }
+    except HTTPException as e: # Re-raise HTTPExceptions
+        logger.error(f"HTTPException during Neo4j operation for {arxiv_id}: {e.detail}")
+        raise e
+    except Exception as e:
+        logger.error(f"An unexpected error occurred during Neo4j operation for {arxiv_id}: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
+    finally:
+        if driver_instance:
+            driver_instance.close()
+            logger.info("Neo4j connection closed.")