|
import openai |
|
import glob |
|
import os |
|
import shutil |
|
from tqdm import tqdm |
|
|
|
def load_api_key(file_path='api_key.txt'): |
|
with open(file_path, 'r') as f: |
|
for line in f: |
|
if line.startswith('api_key='): |
|
return line.strip().split('=', 1)[1] |
|
return None |
|
|
|
openai.api_key = load_api_key() |
|
|
|
if openai.api_key is None: |
|
print("Error: API key not found.") |
|
exit() |
|
|
|
files = glob.glob('/path/to/txt') |
|
aug_dir = '/path/to/output' |
|
|
|
for f in tqdm(files): |
|
|
|
file_id = f.split('/')[-1] |
|
if not os.path.exists(aug_dir + file_id): |
|
|
|
with open(f, 'r') as file: |
|
lines = file.readlines() |
|
|
|
text = [] |
|
for i, l in enumerate(lines): |
|
text.append(str(i) + ': ') |
|
text.append((l).split('#')[0].strip()) |
|
if text[-1][-1] != '.': |
|
text.append('. ') |
|
else: |
|
text.append(' ') |
|
text = ''.join(text) |
|
|
|
prompt = 'The following one or multiple descriptions are describing the same human activities: ' |
|
prompt += text |
|
prompt += 'Generate 3 paraphrases to describe the same activities. One in a line in a plain text format ending with \n, without numbering or - at the beginning. Do not generate any other analysis except from the paraphrased descriptions.' |
|
|
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "user", "content": prompt} |
|
] |
|
) |
|
pred = response.choices[0]['message']['content'] |
|
|
|
|
|
shutil.copy(f, aug_dir) |
|
with open(aug_dir + file_id, 'a') as log_file: |
|
log_file.write(pred) |
|
|
|
files = glob.glob('/path/to/output') |
|
for f in tqdm(files): |
|
with open(f, 'r') as file: |
|
lines = file.readlines() |
|
|
|
lines = [line.lstrip("- ") for line in lines if line.strip()] |
|
|
|
with open(f, 'w') as file: |
|
file.writelines(lines) |