adrienbrdne commited on
Commit
ee947c3
·
verified ·
1 Parent(s): 12ab024

Update api.py

Browse files
Files changed (1) hide show
  1. api.py +61 -7
api.py CHANGED
@@ -80,7 +80,7 @@ def get_content(number: str, node_type: str) -> str:
80
  logger.error(f"An unexpected error occurred in get_content for {number}: {e}")
81
  return ""
82
 
83
- def extract_research_paper_arxiv(rp_number: str, node_type: str = "ResearchPaper") -> dict:
84
  """Extracts information from an Arxiv research paper and generates a summary."""
85
 
86
  rp_data = {
@@ -154,7 +154,7 @@ def extract_research_paper_arxiv(rp_number: str, node_type: str = "ResearchPaper
154
  rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
155
  return rp_data
156
 
157
- def extract_patent_data(patent_number: str, node_type: str = "Patent"):
158
  """
159
  Extracts information from a Google Patents page with robust error handling.
160
  """
@@ -223,19 +223,19 @@ def extract_patent_data(patent_number: str, node_type: str = "Patent"):
223
 
224
  prompt = f"""You are a 3GPP standardization expert. Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues.
225
  Focus on challenges, gaps, or novel aspects.
226
- Here is the document: <document>{rp_data['abstract']}<document>"""
227
 
228
  try:
229
  model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
230
  response = model.generate_content(prompt)
231
 
232
- rp_data["summary"] = response.text
233
  logger.info(f"Summary generated for Patent ID: {patent_number}")
234
  except Exception as e:
235
  logger.error(f"Error generating summary with Gemini for Patent ID {patent_number}: {e}")
236
- rp_data["summary"] = "Error generating summary (API failure)"
237
  else:
238
- rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
239
  return patent_data
240
 
241
  def add_nodes_to_neo4j(driver, data_list: list, node_type: str):
@@ -291,7 +291,7 @@ async def add_single_research_paper(arxiv_id: str):
291
  raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
292
 
293
  # Step 1: Extract paper data
294
- paper_data = extract_research_paper_arxiv(arxiv_id, node_type)
295
 
296
  if paper_data["title"].startswith("Error fetching content") or paper_data["title"] == "Title not found on page":
297
  logger.warning(f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
@@ -325,6 +325,60 @@ async def add_single_research_paper(arxiv_id: str):
325
  except Exception as e:
326
  logger.error(f"An unexpected error occurred during Neo4j operation for {arxiv_id}: {e}", exc_info=True)
327
  raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  finally:
329
  if driver_instance:
330
  driver_instance.close()
 
80
  logger.error(f"An unexpected error occurred in get_content for {number}: {e}")
81
  return ""
82
 
83
+ def extract_arxiv(rp_number: str, node_type: str = "ResearchPaper") -> dict:
84
  """Extracts information from an Arxiv research paper and generates a summary."""
85
 
86
  rp_data = {
 
154
  rp_data["summary"] = "Summary not generated (Abstract unavailable or problematic)"
155
  return rp_data
156
 
157
+ def extract_google_patents(patent_number: str, node_type: str = "Patent"):
158
  """
159
  Extracts information from a Google Patents page with robust error handling.
160
  """
 
223
 
224
  prompt = f"""You are a 3GPP standardization expert. Summarize the key information in the provided document in simple technical English relevant to identifying potential Key Issues.
225
  Focus on challenges, gaps, or novel aspects.
226
+ Here is the document: <document>{patent_data['description']}<document>"""
227
 
228
  try:
229
  model = genai.GenerativeModel("gemini-2.5-flash-preview-05-20")
230
  response = model.generate_content(prompt)
231
 
232
+ patent_data["summary"] = response.text
233
  logger.info(f"Summary generated for Patent ID: {patent_number}")
234
  except Exception as e:
235
  logger.error(f"Error generating summary with Gemini for Patent ID {patent_number}: {e}")
236
+ patent_data["summary"] = "Error generating summary (API failure)"
237
  else:
238
+ rp_data["summary"] = "Summary not generated (Description unavailable or problematic)"
239
  return patent_data
240
 
241
  def add_nodes_to_neo4j(driver, data_list: list, node_type: str):
 
291
  raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
292
 
293
  # Step 1: Extract paper data
294
+ paper_data = extract_arxiv(arxiv_id, node_type)
295
 
296
  if paper_data["title"].startswith("Error fetching content") or paper_data["title"] == "Title not found on page":
297
  logger.warning(f"Could not fetch or parse content for Arxiv ID {arxiv_id}. Title: {paper_data['title']}")
 
325
  except Exception as e:
326
  logger.error(f"An unexpected error occurred during Neo4j operation for {arxiv_id}: {e}", exc_info=True)
327
  raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
328
+ finally:
329
+ if driver_instance:
330
+ driver_instance.close()
331
+ logger.info("Neo4j connection closed.")
332
+
333
+
334
+ @app.post("/add_patent/{patent_id}", status_code=201) # 201 Created for successful creation
335
+ async def add_single_patent(patent_id: str):
336
+ """
337
+ Fetches a patent from Google Patents by its ID, extracts information,
338
+ generates a summary, and adds/updates it as a 'Patent' node in Neo4j.
339
+ """
340
+ node_type = "Patent"
341
+ logger.info(f"Processing request for Patent ID: {patent_id}")
342
+
343
+ if not NEO4J_URI or not NEO4J_USER or not NEO4J_PASSWORD:
344
+ logger.error("Neo4j database connection details are not configured on the server.")
345
+ raise HTTPException(status_code=500, detail="Neo4j database connection details are not configured on the server.")
346
+
347
+ # Step 1: Extract patent data
348
+ patent_data = extract_google_patents(patent_id, node_type)
349
+
350
+ if patent_data["title"].startswith("Error fetching content") or patent_data["title"] == "Title not found on page":
351
+ logger.warning(f"Could not fetch or parse content for Patent ID {patent_id}. Title: {patent_data['title']}")
352
+ raise HTTPException(status_code=404, detail=f"Could not fetch or parse content for Patent ID {patent_id}. Title: {patent_data['title']}")
353
+
354
+ # Step 2: Add/Update in Neo4j
355
+ driver_instance = None # Initialize for the finally block
356
+ try:
357
+ auth_token = basic_auth(NEO4J_USER, NEO4J_PASSWORD)
358
+ driver_instance = GraphDatabase.driver(NEO4J_URI, auth=auth_token)
359
+ driver_instance.verify_connectivity()
360
+ logger.info("Successfully connected to Neo4j.")
361
+
362
+ nodes_created_count = add_nodes_to_neo4j(driver_instance, [patent_data], node_type)
363
+
364
+ if nodes_created_count > 0 :
365
+ logger.info(f"Patent {patent_id} was successfully added to Neo4j.")
366
+ status_code_response = 201 # Created
367
+
368
+ # Note: FastAPI uses the status_code from the decorator or HTTPException.
369
+ # This custom status_code_response is for the JSON body if needed, but the actual HTTP response status
370
+ # will be 201 (from decorator) unless an HTTPException overrides it or we change the decorator based on logic.
371
+ # For simplicity here, we'll return it in the body and let the decorator's 201 stand if no error.
372
+ # A more advanced setup might change the response status dynamically.
373
+
374
+ return {"data": patent_data}
375
+
376
+ except HTTPException as e: # Re-raise HTTPExceptions
377
+ logger.error(f"HTTPException during Neo4j operation for {patent_id}: {e.detail}")
378
+ raise e
379
+ except Exception as e:
380
+ logger.error(f"An unexpected error occurred during Neo4j operation for {patent_id}: {e}", exc_info=True)
381
+ raise HTTPException(status_code=500, detail=f"An unexpected server error occurred: {e}")
382
  finally:
383
  if driver_instance:
384
  driver_instance.close()