Spaces:

VyLala
/

mtDNALocation

Running

App Files Files Community

VyLala commited on Jul 7

Commit

543a1c1

verified ·

1 Parent(s): 688427b

Update mtdna_backend.py

Browse files

Files changed (1) hide show

mtdna_backend.py +34 -28

mtdna_backend.py CHANGED Viewed

@@ -265,50 +265,56 @@ def summarize_results(accession):
     # except Exception as e:
     #     print(f"⚠️ Failed to save known output to Google Sheets: {e}")
     try:
-        df_new = pd.DataFrame(save_rows, columns=[
-            "Sample ID", "Predicted Country", "Country Explanation",
-            "Predicted Sample Type", "Sample Type Explanation",
             "Sources", "Query_cost", "Time cost"
         ])
-        # ✅ Google Sheets API setup
         creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
         creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
         client = gspread.authorize(creds)
-        # ✅ Open the known_samples sheet
         spreadsheet = client.open("known_samples")
         sheet = spreadsheet.sheet1
-        required_columns = [
-            "Sample ID", "Predicted Country", "Country Explanation",
-            "Predicted Sample Type", "Sample Type Explanation",
-            "Sources", "Query_cost", "Time cost"
-        ]
-        # ✅ Read old data
         existing_data = sheet.get_all_values()
-        if existing_data and existing_data[0] == required_columns:
             df_old = pd.DataFrame(existing_data[1:], columns=existing_data[0])
         else:
-            df_old = pd.DataFrame(columns=required_columns)
-        # ✅ Set index and update old values with new ones
         df_old.set_index("Sample ID", inplace=True)
         df_new.set_index("Sample ID", inplace=True)
-        df_old.update(df_new)  # overwrite old values for same Sample ID
-        # ✅ Append any new Sample ID not already in old
-        combined = pd.concat([df_old, df_new[~df_new.index.isin(df_old.index)]])
-        df_combined = combined.reset_index()
-        # ✅ Clear and write back
         sheet.clear()
-        sheet.update([df_combined.columns.tolist()] + df_combined.values.tolist())
     except Exception as e:
-        print(f"⚠️ Failed to save known output to Google Sheets: {e}")
     return rows#, summary, labelAncient_Modern, explain_label

     # except Exception as e:
     #     print(f"⚠️ Failed to save known output to Google Sheets: {e}")
     try:
+        # Prepare as DataFrame
+        df_new = pd.DataFrame(match_rows, columns=[
+            "Sample ID", "Predicted Country", "Country Explanation",
+            "Predicted Sample Type", "Sample Type Explanation",
             "Sources", "Query_cost", "Time cost"
         ])
+        # ✅ Setup Google Sheets
         creds_dict = json.loads(os.environ["GCP_CREDS_JSON"])
         scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
         creds = ServiceAccountCredentials.from_json_keyfile_dict(creds_dict, scope)
         client = gspread.authorize(creds)
         spreadsheet = client.open("known_samples")
         sheet = spreadsheet.sheet1
+        # ✅ Read existing data
         existing_data = sheet.get_all_values()
+        if existing_data:
             df_old = pd.DataFrame(existing_data[1:], columns=existing_data[0])
         else:
+            df_old = pd.DataFrame(columns=[
+                "Sample ID", "Actual_country", "Actual_sample_type", "Country Explanation",
+                "Match_country", "Match_sample_type", "Predicted Country", "Predicted Sample Type",
+                "Query_cost", "Sample Type Explanation", "Sources", "Time cost"
+            ])
+        # ✅ Index by Sample ID
         df_old.set_index("Sample ID", inplace=True)
         df_new.set_index("Sample ID", inplace=True)
+        # ✅ Update only matching fields
+        update_columns = [
+            "Predicted Country", "Predicted Sample Type", "Country Explanation",
+            "Sample Type Explanation", "Sources", "Query_cost", "Time cost"
+        ]
+        for idx, row in df_new.iterrows():
+            if idx not in df_old.index:
+                df_old.loc[idx] = ""  # new row, fill empty first
+            for col in update_columns:
+                if pd.notna(row[col]) and row[col] != "":
+                    df_old.at[idx, col] = row[col]
+        # ✅ Reset and write back
+        df_old.reset_index(inplace=True)
         sheet.clear()
+        sheet.update([df_old.columns.values.tolist()] + df_old.values.tolist())
+        print("✅ Match results saved to known_samples.")
     except Exception as e:
+        print(f"❌ Failed to update known_samples: {e}")
     return rows#, summary, labelAncient_Modern, explain_label