audio_img / text_data.py
pengdaqian
add more
171f55b
raw
history blame contribute delete
747 Bytes
import pandas as pd
import json
# read the csv file into a pandas DataFrame
df = pd.read_csv('/root/autodl-tmp/audioset_balanced_train.csv')
captions = df[" caption"].tolist()
df = pd.read_csv('/root/autodl-tmp/Epidemic_all_debiased.csv', on_bad_lines='skip')
captions_2 = df[' caption2'].tolist()
df = pd.read_csv('/root/autodl-tmp/audioset_eval.csv', on_bad_lines='skip')
captions_3 = df['caption'].tolist()
df = pd.read_csv('/root/autodl-tmp/audioset_unbalanced_train.csv', on_bad_lines='skip')
captions_4 = df[' caption'].tolist()
captions = captions + captions_2 + captions_3 + captions_4
print(len(captions))
with open("audio_text.json", "w") as outfile:
# write the data as a JSON string to the file
json.dump(captions, outfile)