File size: 1,980 Bytes
41f97d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import openai 
import glob 
import os
import shutil
from tqdm import tqdm

def load_api_key(file_path='api_key.txt'):
    with open(file_path, 'r') as f:
        for line in f:
            if line.startswith('api_key='):
                return line.strip().split('=', 1)[1]
    return None

openai.api_key = load_api_key()

if openai.api_key is None:
    print("Error: API key not found.")
    exit()

files = glob.glob('/path/to/txt')
aug_dir = '/path/to/output'

for f in tqdm(files):

    file_id = f.split('/')[-1]
    if not os.path.exists(aug_dir + file_id):
        
        with open(f, 'r') as file:
            lines = file.readlines()

        text = []
        for i, l in enumerate(lines):
            text.append(str(i) + ': ')
            text.append((l).split('#')[0].strip())
            if text[-1][-1] != '.':
                text.append('. ')
            else:
                text.append(' ')
        text = ''.join(text)

        prompt = 'The following one or multiple descriptions are describing the same human activities: '
        prompt += text
        prompt += 'Generate 3 paraphrases to describe the same activities. One in a line in a plain text format ending with \n, without numbering or - at the beginning. Do not generate any other analysis except from the paraphrased descriptions.'

        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                    {"role": "user", "content": prompt}
                ]
        )
        pred = response.choices[0]['message']['content']
        # res = pred.split('\n')

        shutil.copy(f, aug_dir)
        with open(aug_dir + file_id, 'a') as log_file:
            log_file.write(pred)

files = glob.glob('/path/to/output')
for f in tqdm(files):
    with open(f, 'r') as file:
        lines = file.readlines()
    
    lines = [line.lstrip("- ")  for line in lines if line.strip()]

    with open(f, 'w') as file:
        file.writelines(lines)