JaveaAI / project /bot /documents.py
brestok's picture
Upload 40 files
0632cd1 verified
raw
history blame
1.71 kB
import json
import pandas as pd
with open('../../data.json', 'r') as f:
data = json.load(f)
chunks = []
for post in data:
post_text = post['text']
comments: list[dict] = post['comments']
comments_str = ''
for i, comment in enumerate(comments):
comment_text = list(comment.keys())[0]
replies = comment[comment_text]
reply_str = 'Replies:\n'
for j, reply in enumerate(replies):
if j + 1 == len(replies):
reply_str += f' • {reply}'
else:
reply_str += f' • {reply}\n'
comments_str += f'{i + 1}. {comment_text}\n'
if replies:
comments_str += f'{reply_str}\n'
chunk = f"Post: {post_text}\n"
if comments:
chunk += f'Comments:\n{comments_str}'
chunks.append(chunk)
#
df = pd.DataFrame({"chunks": chunks})
df.to_csv('chunks_javea.csv', index=False)
# for post in data:
# post_text = post['text']
# comments: list[dict] = post['comments']
# comments_str = ''
# for i, comment in enumerate(comments):
# comment_text = list(comment.keys())[0]
# replies = comment[comment_text]
# reply_str = '\n'
# for j, reply in enumerate(replies):
# if j + 1 == len(replies):
# reply_str += f'{reply}'
# else:
# reply_str += f'{reply}\n'
# comments_str += f'{comment_text}\n'
# if replies:
# comments_str += f'{reply_str}\n'
#
# chunk = f"{post_text}\n"
# if comments:
# chunk += f'\n{comments_str}'
# chunks.append(chunk)
# df = pd.DataFrame({"chunks": chunks})
# df.to_csv('chunks_javea_raw.csv', index=False)