File size: 1,711 Bytes
0632cd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import json
import pandas as pd
with open('../../data.json', 'r') as f:
data = json.load(f)
chunks = []
for post in data:
post_text = post['text']
comments: list[dict] = post['comments']
comments_str = ''
for i, comment in enumerate(comments):
comment_text = list(comment.keys())[0]
replies = comment[comment_text]
reply_str = 'Replies:\n'
for j, reply in enumerate(replies):
if j + 1 == len(replies):
reply_str += f' • {reply}'
else:
reply_str += f' • {reply}\n'
comments_str += f'{i + 1}. {comment_text}\n'
if replies:
comments_str += f'{reply_str}\n'
chunk = f"Post: {post_text}\n"
if comments:
chunk += f'Comments:\n{comments_str}'
chunks.append(chunk)
#
df = pd.DataFrame({"chunks": chunks})
df.to_csv('chunks_javea.csv', index=False)
# for post in data:
# post_text = post['text']
# comments: list[dict] = post['comments']
# comments_str = ''
# for i, comment in enumerate(comments):
# comment_text = list(comment.keys())[0]
# replies = comment[comment_text]
# reply_str = '\n'
# for j, reply in enumerate(replies):
# if j + 1 == len(replies):
# reply_str += f'{reply}'
# else:
# reply_str += f'{reply}\n'
# comments_str += f'{comment_text}\n'
# if replies:
# comments_str += f'{reply_str}\n'
#
# chunk = f"{post_text}\n"
# if comments:
# chunk += f'\n{comments_str}'
# chunks.append(chunk)
# df = pd.DataFrame({"chunks": chunks})
# df.to_csv('chunks_javea_raw.csv', index=False)
|