{ "cells": [ { "cell_type": "markdown", "source": [ "### Create db structure" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 22, "outputs": [], "source": [ "import json\n", "import re" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:49:05.706326Z", "start_time": "2025-01-07T19:49:05.697814Z" } } }, { "cell_type": "code", "execution_count": 23, "outputs": [], "source": [ "base_url = \"https://raw.githubusercontent.com/piadonabauer/magicbrush-dev/main/images\"" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:49:06.010163Z", "start_time": "2025-01-07T19:49:06.006475Z" } } }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-01-07T19:49:06.347366Z", "start_time": "2025-01-07T19:49:06.324433Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data saved at mongo_init.json\n" ] } ], "source": [ "output_data = []\n", "\n", "def extract_turn(output_filename):\n", " match = re.search(r\"output(\\d+)\", output_filename)\n", " return int(match.group(1)) if match else None\n", "\n", "with open(\"edit_sessions.json\", \"r\") as file:\n", " edit_sessions = json.load(file)\n", "\n", "for id, sessions in edit_sessions.items():\n", " for session in sessions:\n", " input_link = f\"{base_url}/{id}/{session['input']}\"\n", " output_link = f\"{base_url}/{id}/{session['output']}\"\n", "\n", " turn = extract_turn(session['output'])\n", " if turn is None:\n", " print(f\"No turn value found in {session['output']} - skip.\")\n", " continue\n", "\n", " document = {\n", " \"meta_information\": {\n", " \"id\": int(id),\n", " \"turn\": int(turn),\n", " \"input_img_link\": input_link,\n", " \"output_img_link\": output_link,\n", " \"instruction\": session[\"instruction\"]\n", " },\n", " \"ratings\": []\n", " }\n", " output_data.append(document)\n", "\n", "output_json_path = \"mongo_init.json\"\n", "with open(output_json_path, \"w\") as outfile:\n", " json.dump(output_data, outfile, indent=4)\n", "\n", "print(f\"Data saved at {output_json_path}\")" ] }, { "cell_type": "markdown", "source": [ "### Upload structure to mongo db" ], "metadata": { "collapsed": false } }, { "cell_type": "code", "execution_count": 25, "outputs": [], "source": [ "from pymongo import MongoClient\n", "from dotenv import load_dotenv\n", "import os" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:49:07.701178Z", "start_time": "2025-01-07T19:49:07.700478Z" } } }, { "cell_type": "code", "execution_count": 26, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.8.0\n" ] } ], "source": [ "import pymongo\n", "print(pymongo.__version__)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:49:07.987410Z", "start_time": "2025-01-07T19:49:07.983489Z" } } }, { "cell_type": "code", "execution_count": 29, "outputs": [], "source": [ "#os.environ.pop('MONGO_PASSWORD', None)\n", "load_dotenv() # load gitignore\n", "\n", "mongo_user = os.getenv('MONGO_USER')\n", "mongo_password = os.getenv('MONGO_PASSWORD')\n", "cluster_url = os.getenv('MONGO_CLUSTER_URL')\n", "#print(mongo_user, mongo_password, cluster_url)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:50:05.798336Z", "start_time": "2025-01-07T19:50:05.794142Z" } } }, { "cell_type": "code", "execution_count": 30, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Data added.\n" ] } ], "source": [ "connection_url = f\"mongodb+srv://{mongo_user}:{mongo_password}@{cluster_url}\"\n", "client = MongoClient(connection_url)\n", "\n", "db = client[\"thesis\"]\n", "collection = db[\"labeling\"]\n", "\n", "with open(output_json_path, \"r\") as infile:\n", " documents = json.load(infile)\n", " collection.insert_many(documents)\n", "\n", "print(\"Data added.\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2025-01-07T19:50:08.311238Z", "start_time": "2025-01-07T19:50:06.147140Z" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false } } ], "metadata": { "kernelspec": { "name": "enviro", "language": "python", "display_name": "Python (enviro)" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }