Upload folder using huggingface_hub
Browse files- requirements.txt +5 -0
- .DS_Store +0 -0
- .idea/gradio.iml +10 -0
- .idea/inspectionProfiles/profiles_settings.xml +6 -0
- .idea/misc.xml +4 -0
- .idea/modules.xml +8 -0
- .idea/workspace.xml +91 -0
- README.md +5 -8
- app.py +193 -0
- edit_sessions.json +0 -0
- mongo_init.json +0 -0
- mongodb.ipynb +236 -0
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==4.44.1
|
2 |
+
pymongo==4.8.0
|
3 |
+
requests==2.32.3
|
4 |
+
python-dotenv==1.0.1
|
5 |
+
pillow==10.4.0
|
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.idea/gradio.iml
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<module type="PYTHON_MODULE" version="4">
|
3 |
+
<component name="NewModuleRootManager">
|
4 |
+
<content url="file://$MODULE_DIR$">
|
5 |
+
<excludeFolder url="file://$MODULE_DIR$/enviro" />
|
6 |
+
</content>
|
7 |
+
<orderEntry type="inheritedJdk" />
|
8 |
+
<orderEntry type="sourceFolder" forTests="false" />
|
9 |
+
</component>
|
10 |
+
</module>
|
.idea/inspectionProfiles/profiles_settings.xml
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<component name="InspectionProjectProfileManager">
|
2 |
+
<settings>
|
3 |
+
<option name="USE_PROJECT_PROFILE" value="false" />
|
4 |
+
<version value="1.0" />
|
5 |
+
</settings>
|
6 |
+
</component>
|
.idea/misc.xml
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (gradio)" project-jdk-type="Python SDK" />
|
4 |
+
</project>
|
.idea/modules.xml
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="ProjectModuleManager">
|
4 |
+
<modules>
|
5 |
+
<module fileurl="file://$PROJECT_DIR$/.idea/gradio.iml" filepath="$PROJECT_DIR$/.idea/gradio.iml" />
|
6 |
+
</modules>
|
7 |
+
</component>
|
8 |
+
</project>
|
.idea/workspace.xml
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
2 |
+
<project version="4">
|
3 |
+
<component name="AutoImportSettings">
|
4 |
+
<option name="autoReloadType" value="SELECTIVE" />
|
5 |
+
</component>
|
6 |
+
<component name="ChangeListManager">
|
7 |
+
<list default="true" id="8697917c-9f93-4f94-859f-bab053b785ea" name="Changes" comment="" />
|
8 |
+
<option name="SHOW_DIALOG" value="false" />
|
9 |
+
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
10 |
+
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
11 |
+
<option name="LAST_RESOLUTION" value="IGNORE" />
|
12 |
+
</component>
|
13 |
+
<component name="MarkdownSettingsMigration">
|
14 |
+
<option name="stateVersion" value="1" />
|
15 |
+
</component>
|
16 |
+
<component name="ProjectId" id="2lbyoOwwUV5Z6jiUjzA95ejUu03" />
|
17 |
+
<component name="ProjectViewState">
|
18 |
+
<option name="hideEmptyMiddlePackages" value="true" />
|
19 |
+
<option name="showLibraryContents" value="true" />
|
20 |
+
</component>
|
21 |
+
<component name="PropertiesComponent"><![CDATA[{
|
22 |
+
"keyToString": {
|
23 |
+
"RunOnceActivity.ShowReadmeOnStart": "true",
|
24 |
+
"WebServerToolWindowFactoryState": "false",
|
25 |
+
"last_opened_file_path": "/Users/piadonabauer/PycharmProjects/gradio",
|
26 |
+
"node.js.detected.package.eslint": "true",
|
27 |
+
"node.js.detected.package.tslint": "true",
|
28 |
+
"node.js.selected.package.eslint": "(autodetect)",
|
29 |
+
"node.js.selected.package.tslint": "(autodetect)",
|
30 |
+
"settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable",
|
31 |
+
"vue.rearranger.settings.migration": "true"
|
32 |
+
}
|
33 |
+
}]]></component>
|
34 |
+
<component name="RecentsManager">
|
35 |
+
<key name="CopyFile.RECENT_KEYS">
|
36 |
+
<recent name="$PROJECT_DIR$" />
|
37 |
+
</key>
|
38 |
+
<key name="MoveFile.RECENT_KEYS">
|
39 |
+
<recent name="$PROJECT_DIR$" />
|
40 |
+
<recent name="$PROJECT_DIR$/data" />
|
41 |
+
</key>
|
42 |
+
</component>
|
43 |
+
<component name="RunManager">
|
44 |
+
<configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
|
45 |
+
<module name="gradio" />
|
46 |
+
<option name="INTERPRETER_OPTIONS" value="" />
|
47 |
+
<option name="PARENT_ENVS" value="true" />
|
48 |
+
<envs>
|
49 |
+
<env name="PYTHONUNBUFFERED" value="1" />
|
50 |
+
</envs>
|
51 |
+
<option name="SDK_HOME" value="" />
|
52 |
+
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
|
53 |
+
<option name="IS_MODULE_SDK" value="true" />
|
54 |
+
<option name="ADD_CONTENT_ROOTS" value="true" />
|
55 |
+
<option name="ADD_SOURCE_ROOTS" value="true" />
|
56 |
+
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
57 |
+
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
|
58 |
+
<option name="PARAMETERS" value="" />
|
59 |
+
<option name="SHOW_COMMAND_LINE" value="false" />
|
60 |
+
<option name="EMULATE_TERMINAL" value="false" />
|
61 |
+
<option name="MODULE_MODE" value="false" />
|
62 |
+
<option name="REDIRECT_INPUT" value="false" />
|
63 |
+
<option name="INPUT_FILE" value="" />
|
64 |
+
<method v="2" />
|
65 |
+
</configuration>
|
66 |
+
</component>
|
67 |
+
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
|
68 |
+
<component name="TaskManager">
|
69 |
+
<task active="true" id="Default" summary="Default task">
|
70 |
+
<changelist id="8697917c-9f93-4f94-859f-bab053b785ea" name="Changes" comment="" />
|
71 |
+
<created>1725463244919</created>
|
72 |
+
<option name="number" value="Default" />
|
73 |
+
<option name="presentableId" value="Default" />
|
74 |
+
<updated>1725463244919</updated>
|
75 |
+
<workItem from="1725463246928" duration="18213000" />
|
76 |
+
<workItem from="1725897141437" duration="4873000" />
|
77 |
+
<workItem from="1727438790253" duration="7494000" />
|
78 |
+
<workItem from="1727876890592" duration="1494000" />
|
79 |
+
<workItem from="1730908139516" duration="823000" />
|
80 |
+
<workItem from="1730928642179" duration="5000" />
|
81 |
+
<workItem from="1732371650265" duration="677000" />
|
82 |
+
<workItem from="1736170501354" duration="2227000" />
|
83 |
+
<workItem from="1736236372042" duration="13000" />
|
84 |
+
<workItem from="1736236479831" duration="15221000" />
|
85 |
+
</task>
|
86 |
+
<servers />
|
87 |
+
</component>
|
88 |
+
<component name="TypeScriptGeneratedFilesManager">
|
89 |
+
<option name="version" value="3" />
|
90 |
+
</component>
|
91 |
+
</project>
|
README.md
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
---
|
2 |
-
title: Image
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.10.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Image-Edit-Annotation
|
3 |
+
emoji: 🔍
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: pink
|
6 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
+
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
from PIL import Image
|
4 |
+
from pymongo import MongoClient
|
5 |
+
import requests
|
6 |
+
from io import BytesIO
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
|
9 |
+
instruction_beginning = """
|
10 |
+
## 🔍 Evaluation of AI Quality
|
11 |
+
|
12 |
+
\n**Background**
|
13 |
+
|
14 |
+
\nIn this task, you will evaluate the quality of image edits based on a textual instruction and two input images regarding three aspects: **Instruction-Edit Alignment**, **Visual Quality** and **Consistency**.
|
15 |
+
Each aspect should be rated on a scale from 1 to 10, where 1 indicates 'very poor' and 10 represents 'excellent'.
|
16 |
+
|
17 |
+
Please ensure you have read the detailed instructions provided in this [document](https://www.canva.com/design/DAGP0UTTygI/rYkYZtLUipuKbXPbRcj9kQ/edit?utm_content=DAGP0UTTygI&utm_campaign=designshare&utm_medium=link2&utm_source=sharebutton) before starting the labeling process.
|
18 |
+
|
19 |
+
\n**Labeling**
|
20 |
+
|
21 |
+
\nPlease enter a nickname to avoid repeating triplets. Make sure to remember this nickname for future sessions. If you choose not to enter a nickname, your ratings will not be saved.
|
22 |
+
"""
|
23 |
+
|
24 |
+
alignment_info = """
|
25 |
+
How well does the edited area align with the text instruction? (e.g. numbers, colors, and objects)
|
26 |
+
"""
|
27 |
+
|
28 |
+
quality_info = """
|
29 |
+
How realistic and aesthetically pleasing is the edited area? (e.g. color realism and overall aesthetics)
|
30 |
+
"""
|
31 |
+
|
32 |
+
consistency_info = """
|
33 |
+
How seamlessly does the edit integrate with the rest of the original image? (e.g. consistency in style, lighting, logic, and spatial coherence)
|
34 |
+
"""
|
35 |
+
|
36 |
+
overall_info = """
|
37 |
+
How do you perceive and like the edit as a whole, how well does it meet your expectations and complements the original image?
|
38 |
+
"""
|
39 |
+
|
40 |
+
# load_dotenv()
|
41 |
+
|
42 |
+
mongo_user = os.getenv('MONGO_USER')
|
43 |
+
mongo_password = os.getenv('MONGO_PASSWORD')
|
44 |
+
cluster_url = os.getenv('MONGO_CLUSTER_URL')
|
45 |
+
gradio_user = os.getenv('GRADIO_USER')
|
46 |
+
gradio_password = os.getenv('GRADIO_PASSWORD')
|
47 |
+
|
48 |
+
connection_url = f"mongodb+srv://{mongo_user}:{mongo_password}@{cluster_url}"
|
49 |
+
client = MongoClient(connection_url)
|
50 |
+
db = client["thesis"]
|
51 |
+
collection = db["labeling"]
|
52 |
+
|
53 |
+
|
54 |
+
def download_image(url):
|
55 |
+
"""Download image from a given URL."""
|
56 |
+
response = requests.get(url)
|
57 |
+
response.raise_for_status()
|
58 |
+
return Image.open(BytesIO(response.content))
|
59 |
+
|
60 |
+
|
61 |
+
def fetch_random_entry(annotator):
|
62 |
+
"""Fetch a random entry from the database that hasn't been rated three times yet."""
|
63 |
+
pipeline = [
|
64 |
+
{
|
65 |
+
"$addFields": {
|
66 |
+
"ratings_count": {"$size": "$ratings"}
|
67 |
+
}
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"$match": {
|
71 |
+
"ratings_count": {"$lt": 5},
|
72 |
+
"ratings.rater": {"$ne": annotator} # exclude entries where rater is the specified annotator
|
73 |
+
}
|
74 |
+
},
|
75 |
+
{"$sample": {"size": 1}} # randomly select one entry
|
76 |
+
]
|
77 |
+
|
78 |
+
results = list(collection.aggregate(pipeline))
|
79 |
+
return results[0] if results else None
|
80 |
+
|
81 |
+
|
82 |
+
def save_rating(entry_id, turn, annotator, alignment, quality, consistency, overall):
|
83 |
+
"""Save the given ratings into the database."""
|
84 |
+
if annotator:
|
85 |
+
rating = {
|
86 |
+
"rater": annotator,
|
87 |
+
"alignment": alignment,
|
88 |
+
"quality": quality,
|
89 |
+
"consistency": consistency,
|
90 |
+
"overall": overall
|
91 |
+
}
|
92 |
+
|
93 |
+
collection.update_one(
|
94 |
+
{"meta_information.id": int(entry_id), "meta_information.turn": int(turn)},
|
95 |
+
{"$push": {"ratings": rating}}
|
96 |
+
)
|
97 |
+
|
98 |
+
|
99 |
+
def count_labeled_images(annotator):
|
100 |
+
"""Count how many images a person has labeled based on the 'ratings' field."""
|
101 |
+
pipeline = [
|
102 |
+
{
|
103 |
+
"$match": {
|
104 |
+
"ratings.rater": annotator # where 'rater' is the given annotator
|
105 |
+
}
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"$count": "labeled_images" # count the number of documents that match
|
109 |
+
}
|
110 |
+
]
|
111 |
+
|
112 |
+
result = list(collection.aggregate(pipeline))
|
113 |
+
|
114 |
+
return result[0]['labeled_images'] if result else 0
|
115 |
+
|
116 |
+
|
117 |
+
def prepare_next_image(annotator):
|
118 |
+
"""Fetch the next image and its metadata."""
|
119 |
+
entry = fetch_random_entry(annotator)
|
120 |
+
|
121 |
+
if not entry:
|
122 |
+
return None, None, None, None, "No more images to rate!", None
|
123 |
+
|
124 |
+
meta_info = entry["meta_information"]
|
125 |
+
input_image = download_image(meta_info["input_img_link"])
|
126 |
+
output_image = download_image(meta_info["output_img_link"])
|
127 |
+
instruction = meta_info["instruction"]
|
128 |
+
progress_message = f"Rate this image! ({count_labeled_images(annotator)}/528 labeled)"
|
129 |
+
|
130 |
+
return meta_info["id"], input_image, output_image, instruction, progress_message, meta_info["turn"]
|
131 |
+
|
132 |
+
|
133 |
+
def start(annotator):
|
134 |
+
return prepare_next_image(annotator)
|
135 |
+
|
136 |
+
|
137 |
+
def record_input(id, turn, annotator, alignment, quality, consistency, overall):
|
138 |
+
save_rating(id, turn, annotator, alignment, quality, consistency, overall)
|
139 |
+
return prepare_next_image(annotator)
|
140 |
+
|
141 |
+
|
142 |
+
# Gradio Interface
|
143 |
+
def create_interface():
|
144 |
+
with gr.Blocks() as demo:
|
145 |
+
gr.Markdown(instruction_beginning)
|
146 |
+
|
147 |
+
# annotator = gr.Textbox(label="Nickname", interactive=True)
|
148 |
+
annotator = gr.Textbox(label="Annotator Nickname")
|
149 |
+
|
150 |
+
start_btn = gr.Button("Start")
|
151 |
+
progress_text = gr.Markdown("Waiting to start.")
|
152 |
+
# progress_text = gr.Markdown("You have labeled **0** out of 528 potential images.")
|
153 |
+
|
154 |
+
with gr.Row():
|
155 |
+
img_block1 = gr.Image(visible=True, width=300, height=300, label="Original Image", interactive=False)
|
156 |
+
img_block2 = gr.Image(visible=True, width=300, height=300, label="Edited Image", interactive=False)
|
157 |
+
|
158 |
+
prompt = gr.Textbox(label="Instruction", visible=True, interactive=False)
|
159 |
+
img_id = gr.Textbox(visible=False)
|
160 |
+
turn = gr.Textbox(visible=False)
|
161 |
+
|
162 |
+
with gr.Row():
|
163 |
+
slider_alignment = gr.Slider(label="Instruction-Edit Alignment", minimum=0, maximum=10, step=1, value=5,
|
164 |
+
info=alignment_info)
|
165 |
+
slider_quality = gr.Slider(label="Visual Quality", minimum=0, maximum=10, step=1, value=5,
|
166 |
+
info=quality_info)
|
167 |
+
slider_consistency = gr.Slider(label="Consistency", minimum=0, maximum=10, step=1, value=5,
|
168 |
+
info=consistency_info)
|
169 |
+
|
170 |
+
slider_overall = gr.Slider(label="Overall Impression", minimum=0, maximum=10, step=1, value=5,
|
171 |
+
info=overall_info)
|
172 |
+
|
173 |
+
save_and_continue_btn = gr.Button("Save & Continue")
|
174 |
+
|
175 |
+
start_btn.click(
|
176 |
+
fn=start,
|
177 |
+
inputs=[annotator],
|
178 |
+
outputs=[img_id, img_block1, img_block2, prompt, progress_text, turn]
|
179 |
+
)
|
180 |
+
|
181 |
+
save_and_continue_btn.click(
|
182 |
+
fn=record_input,
|
183 |
+
inputs=[img_id, turn, annotator, slider_alignment, slider_quality, slider_consistency, slider_overall],
|
184 |
+
outputs=[img_id, img_block1, img_block2, prompt, progress_text, turn]
|
185 |
+
)
|
186 |
+
|
187 |
+
return demo
|
188 |
+
|
189 |
+
|
190 |
+
if __name__ == "__main__":
|
191 |
+
demo = create_interface()
|
192 |
+
demo.queue()
|
193 |
+
demo.launch(share=True, debug=True, auth=(gradio_user, gradio_password))
|
edit_sessions.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mongo_init.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
mongodb.ipynb
ADDED
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"source": [
|
6 |
+
"### Create db structure"
|
7 |
+
],
|
8 |
+
"metadata": {
|
9 |
+
"collapsed": false
|
10 |
+
}
|
11 |
+
},
|
12 |
+
{
|
13 |
+
"cell_type": "code",
|
14 |
+
"execution_count": 22,
|
15 |
+
"outputs": [],
|
16 |
+
"source": [
|
17 |
+
"import json\n",
|
18 |
+
"import re"
|
19 |
+
],
|
20 |
+
"metadata": {
|
21 |
+
"collapsed": false,
|
22 |
+
"ExecuteTime": {
|
23 |
+
"end_time": "2025-01-07T19:49:05.706326Z",
|
24 |
+
"start_time": "2025-01-07T19:49:05.697814Z"
|
25 |
+
}
|
26 |
+
}
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"cell_type": "code",
|
30 |
+
"execution_count": 23,
|
31 |
+
"outputs": [],
|
32 |
+
"source": [
|
33 |
+
"base_url = \"https://raw.githubusercontent.com/piadonabauer/magicbrush-dev/main/images\""
|
34 |
+
],
|
35 |
+
"metadata": {
|
36 |
+
"collapsed": false,
|
37 |
+
"ExecuteTime": {
|
38 |
+
"end_time": "2025-01-07T19:49:06.010163Z",
|
39 |
+
"start_time": "2025-01-07T19:49:06.006475Z"
|
40 |
+
}
|
41 |
+
}
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"cell_type": "code",
|
45 |
+
"execution_count": 24,
|
46 |
+
"metadata": {
|
47 |
+
"collapsed": true,
|
48 |
+
"ExecuteTime": {
|
49 |
+
"end_time": "2025-01-07T19:49:06.347366Z",
|
50 |
+
"start_time": "2025-01-07T19:49:06.324433Z"
|
51 |
+
}
|
52 |
+
},
|
53 |
+
"outputs": [
|
54 |
+
{
|
55 |
+
"name": "stdout",
|
56 |
+
"output_type": "stream",
|
57 |
+
"text": [
|
58 |
+
"Data saved at mongo_init.json\n"
|
59 |
+
]
|
60 |
+
}
|
61 |
+
],
|
62 |
+
"source": [
|
63 |
+
"output_data = []\n",
|
64 |
+
"\n",
|
65 |
+
"def extract_turn(output_filename):\n",
|
66 |
+
" match = re.search(r\"output(\\d+)\", output_filename)\n",
|
67 |
+
" return int(match.group(1)) if match else None\n",
|
68 |
+
"\n",
|
69 |
+
"with open(\"edit_sessions.json\", \"r\") as file:\n",
|
70 |
+
" edit_sessions = json.load(file)\n",
|
71 |
+
"\n",
|
72 |
+
"for id, sessions in edit_sessions.items():\n",
|
73 |
+
" for session in sessions:\n",
|
74 |
+
" input_link = f\"{base_url}/{id}/{session['input']}\"\n",
|
75 |
+
" output_link = f\"{base_url}/{id}/{session['output']}\"\n",
|
76 |
+
"\n",
|
77 |
+
" turn = extract_turn(session['output'])\n",
|
78 |
+
" if turn is None:\n",
|
79 |
+
" print(f\"No turn value found in {session['output']} - skip.\")\n",
|
80 |
+
" continue\n",
|
81 |
+
"\n",
|
82 |
+
" document = {\n",
|
83 |
+
" \"meta_information\": {\n",
|
84 |
+
" \"id\": int(id),\n",
|
85 |
+
" \"turn\": int(turn),\n",
|
86 |
+
" \"input_img_link\": input_link,\n",
|
87 |
+
" \"output_img_link\": output_link,\n",
|
88 |
+
" \"instruction\": session[\"instruction\"]\n",
|
89 |
+
" },\n",
|
90 |
+
" \"ratings\": []\n",
|
91 |
+
" }\n",
|
92 |
+
" output_data.append(document)\n",
|
93 |
+
"\n",
|
94 |
+
"output_json_path = \"mongo_init.json\"\n",
|
95 |
+
"with open(output_json_path, \"w\") as outfile:\n",
|
96 |
+
" json.dump(output_data, outfile, indent=4)\n",
|
97 |
+
"\n",
|
98 |
+
"print(f\"Data saved at {output_json_path}\")"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "markdown",
|
103 |
+
"source": [
|
104 |
+
"### Upload structure to mongo db"
|
105 |
+
],
|
106 |
+
"metadata": {
|
107 |
+
"collapsed": false
|
108 |
+
}
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cell_type": "code",
|
112 |
+
"execution_count": 25,
|
113 |
+
"outputs": [],
|
114 |
+
"source": [
|
115 |
+
"from pymongo import MongoClient\n",
|
116 |
+
"from dotenv import load_dotenv\n",
|
117 |
+
"import os"
|
118 |
+
],
|
119 |
+
"metadata": {
|
120 |
+
"collapsed": false,
|
121 |
+
"ExecuteTime": {
|
122 |
+
"end_time": "2025-01-07T19:49:07.701178Z",
|
123 |
+
"start_time": "2025-01-07T19:49:07.700478Z"
|
124 |
+
}
|
125 |
+
}
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": 26,
|
130 |
+
"outputs": [
|
131 |
+
{
|
132 |
+
"name": "stdout",
|
133 |
+
"output_type": "stream",
|
134 |
+
"text": [
|
135 |
+
"4.8.0\n"
|
136 |
+
]
|
137 |
+
}
|
138 |
+
],
|
139 |
+
"source": [
|
140 |
+
"import pymongo\n",
|
141 |
+
"print(pymongo.__version__)"
|
142 |
+
],
|
143 |
+
"metadata": {
|
144 |
+
"collapsed": false,
|
145 |
+
"ExecuteTime": {
|
146 |
+
"end_time": "2025-01-07T19:49:07.987410Z",
|
147 |
+
"start_time": "2025-01-07T19:49:07.983489Z"
|
148 |
+
}
|
149 |
+
}
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"cell_type": "code",
|
153 |
+
"execution_count": 29,
|
154 |
+
"outputs": [],
|
155 |
+
"source": [
|
156 |
+
"#os.environ.pop('MONGO_PASSWORD', None)\n",
|
157 |
+
"load_dotenv() # load gitignore\n",
|
158 |
+
"\n",
|
159 |
+
"mongo_user = os.getenv('MONGO_USER')\n",
|
160 |
+
"mongo_password = os.getenv('MONGO_PASSWORD')\n",
|
161 |
+
"cluster_url = os.getenv('MONGO_CLUSTER_URL')\n",
|
162 |
+
"#print(mongo_user, mongo_password, cluster_url)"
|
163 |
+
],
|
164 |
+
"metadata": {
|
165 |
+
"collapsed": false,
|
166 |
+
"ExecuteTime": {
|
167 |
+
"end_time": "2025-01-07T19:50:05.798336Z",
|
168 |
+
"start_time": "2025-01-07T19:50:05.794142Z"
|
169 |
+
}
|
170 |
+
}
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"cell_type": "code",
|
174 |
+
"execution_count": 30,
|
175 |
+
"outputs": [
|
176 |
+
{
|
177 |
+
"name": "stdout",
|
178 |
+
"output_type": "stream",
|
179 |
+
"text": [
|
180 |
+
"Data added.\n"
|
181 |
+
]
|
182 |
+
}
|
183 |
+
],
|
184 |
+
"source": [
|
185 |
+
"connection_url = f\"mongodb+srv://{mongo_user}:{mongo_password}@{cluster_url}\"\n",
|
186 |
+
"client = MongoClient(connection_url)\n",
|
187 |
+
"\n",
|
188 |
+
"db = client[\"thesis\"]\n",
|
189 |
+
"collection = db[\"labeling\"]\n",
|
190 |
+
"\n",
|
191 |
+
"with open(output_json_path, \"r\") as infile:\n",
|
192 |
+
" documents = json.load(infile)\n",
|
193 |
+
" collection.insert_many(documents)\n",
|
194 |
+
"\n",
|
195 |
+
"print(\"Data added.\")"
|
196 |
+
],
|
197 |
+
"metadata": {
|
198 |
+
"collapsed": false,
|
199 |
+
"ExecuteTime": {
|
200 |
+
"end_time": "2025-01-07T19:50:08.311238Z",
|
201 |
+
"start_time": "2025-01-07T19:50:06.147140Z"
|
202 |
+
}
|
203 |
+
}
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"cell_type": "code",
|
207 |
+
"execution_count": null,
|
208 |
+
"outputs": [],
|
209 |
+
"source": [],
|
210 |
+
"metadata": {
|
211 |
+
"collapsed": false
|
212 |
+
}
|
213 |
+
}
|
214 |
+
],
|
215 |
+
"metadata": {
|
216 |
+
"kernelspec": {
|
217 |
+
"name": "enviro",
|
218 |
+
"language": "python",
|
219 |
+
"display_name": "Python (enviro)"
|
220 |
+
},
|
221 |
+
"language_info": {
|
222 |
+
"codemirror_mode": {
|
223 |
+
"name": "ipython",
|
224 |
+
"version": 2
|
225 |
+
},
|
226 |
+
"file_extension": ".py",
|
227 |
+
"mimetype": "text/x-python",
|
228 |
+
"name": "python",
|
229 |
+
"nbconvert_exporter": "python",
|
230 |
+
"pygments_lexer": "ipython2",
|
231 |
+
"version": "2.7.6"
|
232 |
+
}
|
233 |
+
},
|
234 |
+
"nbformat": 4,
|
235 |
+
"nbformat_minor": 0
|
236 |
+
}
|