File size: 1,711 Bytes
0632cd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import json
import pandas as pd

with open('../../data.json', 'r') as f:
    data = json.load(f)

chunks = []
for post in data:
    post_text = post['text']
    comments: list[dict] = post['comments']
    comments_str = ''
    for i, comment in enumerate(comments):
        comment_text = list(comment.keys())[0]
        replies = comment[comment_text]
        reply_str = 'Replies:\n'
        for j, reply in enumerate(replies):
            if j + 1 == len(replies):
                reply_str += f'     • {reply}'
            else:
                reply_str += f'     • {reply}\n'
        comments_str += f'{i + 1}. {comment_text}\n'
        if replies:
            comments_str += f'{reply_str}\n'

    chunk = f"Post: {post_text}\n"
    if comments:
        chunk += f'Comments:\n{comments_str}'
    chunks.append(chunk)
#
df = pd.DataFrame({"chunks": chunks})
df.to_csv('chunks_javea.csv', index=False)

# for post in data:
#     post_text = post['text']
#     comments: list[dict] = post['comments']
#     comments_str = ''
#     for i, comment in enumerate(comments):
#         comment_text = list(comment.keys())[0]
#         replies = comment[comment_text]
#         reply_str = '\n'
#         for j, reply in enumerate(replies):
#             if j + 1 == len(replies):
#                 reply_str += f'{reply}'
#             else:
#                 reply_str += f'{reply}\n'
#         comments_str += f'{comment_text}\n'
#         if replies:
#             comments_str += f'{reply_str}\n'
#
#     chunk = f"{post_text}\n"
#     if comments:
#         chunk += f'\n{comments_str}'
#     chunks.append(chunk)

# df = pd.DataFrame({"chunks": chunks})
# df.to_csv('chunks_javea_raw.csv', index=False)