VyLala commited on
Commit
543a1c1
Β·
verified Β·
1 Parent(s): 688427b

Update mtdna_backend.py

Browse files
Files changed (1) hide show
  1. mtdna_backend.py +34 -28
mtdna_backend.py CHANGED
@@ -265,50 +265,56 @@ def summarize_results(accession):
265
  # except Exception as e:
266
  # print(f"⚠️ Failed to save known output to Google Sheets: {e}")
267
  try:
268
- df_new = pd.DataFrame(save_rows, columns=[
269
- "Sample ID", "Predicted Country", "Country Explanation",
270
- "Predicted Sample Type", "Sample Type Explanation",
 
271
  "Sources", "Query_cost", "Time cost"
272
  ])
273
-
274
- # βœ… Google Sheets API setup
275
  creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
276
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
277
  creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
278
  client = gspread.authorize(creds)
279
-
280
- # βœ… Open the known_samples sheet
281
  spreadsheet = client.open("known_samples")
282
  sheet = spreadsheet.sheet1
283
- required_columns = [
284
- "Sample ID", "Predicted Country", "Country Explanation",
285
- "Predicted Sample Type", "Sample Type Explanation",
286
- "Sources", "Query_cost", "Time cost"
287
- ]
288
-
289
- # βœ… Read old data
290
  existing_data = sheet.get_all_values()
291
- if existing_data and existing_data[0] == required_columns:
292
  df_old = pd.DataFrame(existing_data[1:], columns=existing_data[0])
293
  else:
294
- df_old = pd.DataFrame(columns=required_columns)
 
 
 
 
295
 
296
-
297
- # βœ… Set index and update old values with new ones
298
  df_old.set_index("Sample ID", inplace=True)
299
  df_new.set_index("Sample ID", inplace=True)
300
- df_old.update(df_new) # overwrite old values for same Sample ID
301
-
302
- # βœ… Append any new Sample ID not already in old
303
- combined = pd.concat([df_old, df_new[~df_new.index.isin(df_old.index)]])
304
- df_combined = combined.reset_index()
305
-
306
- # βœ… Clear and write back
 
 
 
 
 
 
 
 
307
  sheet.clear()
308
- sheet.update([df_combined.columns.tolist()] + df_combined.values.tolist())
309
-
 
310
  except Exception as e:
311
- print(f"⚠️ Failed to save known output to Google Sheets: {e}")
312
 
313
 
314
  return rows#, summary, labelAncient_Modern, explain_label
 
265
  # except Exception as e:
266
  # print(f"⚠️ Failed to save known output to Google Sheets: {e}")
267
  try:
268
+ # Prepare as DataFrame
269
+ df_new = pd.DataFrame(match_rows, columns=[
270
+ "Sample ID", "Predicted Country", "Country Explanation",
271
+ "Predicted Sample Type", "Sample Type Explanation",
272
  "Sources", "Query_cost", "Time cost"
273
  ])
274
+
275
+ # βœ… Setup Google Sheets
276
  creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
277
  scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
278
  creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
279
  client = gspread.authorize(creds)
 
 
280
  spreadsheet = client.open("known_samples")
281
  sheet = spreadsheet.sheet1
282
+
283
+ # βœ… Read existing data
 
 
 
 
 
284
  existing_data = sheet.get_all_values()
285
+ if existing_data:
286
  df_old = pd.DataFrame(existing_data[1:], columns=existing_data[0])
287
  else:
288
+ df_old = pd.DataFrame(columns=[
289
+ "Sample ID", "Actual_country", "Actual_sample_type", "Country Explanation",
290
+ "Match_country", "Match_sample_type", "Predicted Country", "Predicted Sample Type",
291
+ "Query_cost", "Sample Type Explanation", "Sources", "Time cost"
292
+ ])
293
 
294
+ # βœ… Index by Sample ID
 
295
  df_old.set_index("Sample ID", inplace=True)
296
  df_new.set_index("Sample ID", inplace=True)
297
+
298
+ # βœ… Update only matching fields
299
+ update_columns = [
300
+ "Predicted Country", "Predicted Sample Type", "Country Explanation",
301
+ "Sample Type Explanation", "Sources", "Query_cost", "Time cost"
302
+ ]
303
+ for idx, row in df_new.iterrows():
304
+ if idx not in df_old.index:
305
+ df_old.loc[idx] = "" # new row, fill empty first
306
+ for col in update_columns:
307
+ if pd.notna(row[col]) and row[col] != "":
308
+ df_old.at[idx, col] = row[col]
309
+
310
+ # βœ… Reset and write back
311
+ df_old.reset_index(inplace=True)
312
  sheet.clear()
313
+ sheet.update([df_old.columns.values.tolist()] + df_old.values.tolist())
314
+ print("βœ… Match results saved to known_samples.")
315
+
316
  except Exception as e:
317
+ print(f"❌ Failed to update known_samples: {e}")
318
 
319
 
320
  return rows#, summary, labelAncient_Modern, explain_label