|
import json |
|
import pandas as pd |
|
|
|
with open('../../data.json', 'r') as f: |
|
data = json.load(f) |
|
|
|
chunks = [] |
|
for post in data: |
|
post_text = post['text'] |
|
comments: list[dict] = post['comments'] |
|
comments_str = '' |
|
for i, comment in enumerate(comments): |
|
comment_text = list(comment.keys())[0] |
|
replies = comment[comment_text] |
|
reply_str = 'Replies:\n' |
|
for j, reply in enumerate(replies): |
|
if j + 1 == len(replies): |
|
reply_str += f' • {reply}' |
|
else: |
|
reply_str += f' • {reply}\n' |
|
comments_str += f'{i + 1}. {comment_text}\n' |
|
if replies: |
|
comments_str += f'{reply_str}\n' |
|
|
|
chunk = f"Post: {post_text}\n" |
|
if comments: |
|
chunk += f'Comments:\n{comments_str}' |
|
chunks.append(chunk) |
|
|
|
df = pd.DataFrame({"chunks": chunks}) |
|
df.to_csv('chunks_javea.csv', index=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|