shangrilar commited on
Commit
df7eba9
Β·
1 Parent(s): df1bdb5

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +247 -0
utils.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ---
2
+ # jupyter:
3
+ # jupytext:
4
+ # formats: ipynb,py:light
5
+ # text_representation:
6
+ # extension: .py
7
+ # format_name: light
8
+ # format_version: '1.5'
9
+ # jupytext_version: 1.14.1
10
+ # kernelspec:
11
+ # display_name: Python 3 (ipykernel)
12
+ # language: python
13
+ # name: python3
14
+ # ---
15
+
16
+ # +
17
+ import os
18
+ import sys
19
+ import time
20
+ import urllib.request
21
+ import json
22
+ import random
23
+ import requests
24
+ from voice import voice_dict
25
+
26
+ OPENAPI_KEY = os.getenv('OPENAPI_KEY')
27
+ CLOVA_VOICE_Client_ID = os.getenv('CLOVA_VOICE_Client_ID')
28
+ CLOVA_VOICE_Client_Secret = os.getenv('CLOVA_VOICE_Client_Secret')
29
+ PAPAGO_Translate_Client_ID = os.getenv('PAPAGO_Translate_Client_ID')
30
+ PAPAGO_Translate_Client_Secret = os.getenv('PAPAGO_Translate_Client_Secret')
31
+ mubert_pat = os.getenv('mubert_pat')
32
+ SUMMARY_Client_ID = os.getenv('SUMMARY_Client_ID')
33
+ SUMMARY_Client_Secret = os.getenv('SUMMARY_Client_Secret')
34
+
35
+
36
+ def get_story(first_sentence:str, num_sentences:int):
37
+ response = requests.post("https://api.openai.com/v1/chat/completions",
38
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
39
+ data=json.dumps({
40
+ "model": "gpt-3.5-turbo",
41
+ "messages": [{"role": "system", "content": "You are a helpful assistant."},
42
+ {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
43
+ \n\nFirst sentence: {first_sentence}"""}]
44
+ }))
45
+ return response.json()['choices'][0]['message']['content']
46
+
47
+ def get_voice(input_text:str, gender:str="female", age_group:str="youth", filename="voice.mp3"):
48
+ """
49
+ gender: female or male
50
+ age_group: child, teenager, youth, middle_aged
51
+ """
52
+ speaker = random.choice(voice_dict[gender][age_group])
53
+ data = {"speaker":speaker, "text":input_text}
54
+ url = "https://naveropenapi.apigw.ntruss.com/tts-premium/v1/tts"
55
+ headers = {
56
+ "X-NCP-APIGW-API-KEY-ID": CLOVA_VOICE_Client_ID,
57
+ "X-NCP-APIGW-API-KEY": CLOVA_VOICE_Client_Secret,
58
+ }
59
+ response = requests.post(url, headers=headers, data=data)
60
+ if response.status_code == 200:
61
+ print("TTS mp3 μ €μž₯")
62
+ response_body = response.content
63
+ with open(filename, 'wb') as f:
64
+ f.write(response_body)
65
+ else:
66
+ print("Error Code: " + str(response.status_code))
67
+ print("Error Message: " + str(response.json()))
68
+ return filename
69
+
70
+ def translate_text(text:str):
71
+
72
+ encText = urllib.parse.quote(text)
73
+ data = f"source=ko&target=en&text={encText}"
74
+ url = "https://naveropenapi.apigw.ntruss.com/nmt/v1/translation"
75
+
76
+ request = urllib.request.Request(url)
77
+ request.add_header("X-NCP-APIGW-API-KEY-ID", PAPAGO_Translate_Client_ID)
78
+ request.add_header("X-NCP-APIGW-API-KEY", PAPAGO_Translate_Client_Secret)
79
+
80
+ try:
81
+ response = urllib.request.urlopen(request, data=data.encode("utf-8"))
82
+ response_body = response.read()
83
+ return json.loads(response_body.decode('utf-8'))['message']['result']['translatedText']
84
+ except urllib.error.HTTPError as e:
85
+ return f"Error Code: {e.code}"
86
+
87
+
88
+ # -
89
+
90
+ def get_summary(input_text:str, summary_count:int = 5):
91
+ if len(input_text) > 2000:
92
+ input_text = input_text[:2000]
93
+ input_text = input_text.strip()
94
+
95
+ data = {
96
+ "document": {
97
+ "content": input_text
98
+ },
99
+ "option": {
100
+ "language": "ko",
101
+ "model": "general",
102
+ "tone": "0",
103
+ "summaryCount": summary_count
104
+ }
105
+ }
106
+ url = "https://naveropenapi.apigw.ntruss.com/text-summary/v1/summarize"
107
+ headers = {
108
+ "X-NCP-APIGW-API-KEY-ID": SUMMARY_Client_ID,
109
+ "X-NCP-APIGW-API-KEY": SUMMARY_Client_Secret,
110
+ "Content-Type": "application/json"
111
+ }
112
+ response = requests.post(url, headers=headers, data=json.dumps(data))
113
+ if response.status_code == 200:
114
+ return ' '.join(response.json()['summary'].split('\n'))
115
+ else:
116
+ print("Error Code: " + str(response.status_code))
117
+ print("Error Message: " + str(response.json()))
118
+
119
+
120
+ def get_music(text, duration=300):
121
+ print('original text length: ', len(text))
122
+ summary = get_summary(text, 3)
123
+ print('summary text length: ', len(summary))
124
+ translated_text = translate_text(summary)
125
+ print('translated_text length: ', len(translated_text))
126
+ if len(translated_text) > 200:
127
+ translated_text = translated_text[:200]
128
+
129
+ r = requests.post('https://api-b2b.mubert.com/v2/TTMRecordTrack',
130
+ json={
131
+ "method":"TTMRecordTrack",
132
+ "params":
133
+ {
134
+ "text":translated_text,
135
+ "pat":mubert_pat,
136
+ "mode":"track",
137
+ "duration":duration,
138
+ "bitrate":128
139
+ }
140
+ })
141
+
142
+ rdata = json.loads(r.text)
143
+ if rdata['status'] == 1:
144
+ url = rdata['data']['tasks'][0]['download_link']
145
+
146
+ done = False
147
+ while not done:
148
+ r = requests.post('https://api-b2b.mubert.com/v2/TrackStatus',
149
+ json={
150
+ "method":"TrackStatus",
151
+ "params":
152
+ {
153
+ "pat":mubert_pat
154
+ }
155
+ })
156
+
157
+ if r.json()['data']['tasks'][0]['task_status_text'] == 'Done':
158
+ done = True
159
+ time.sleep(2)
160
+
161
+ # return url
162
+ local_filename = "mubert_music.mp3"
163
+ headers = {
164
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
165
+ }
166
+
167
+ download = False
168
+ while not download:
169
+ response = requests.get(url, stream=True, headers=headers)
170
+
171
+ if response.status_code == 200:
172
+ download=True
173
+ time.sleep(1)
174
+
175
+ if response.status_code == 404:
176
+ print("파일이 μ‘΄μž¬ν•˜μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
177
+ return
178
+ elif response.status_code != 200:
179
+ print(f"파일 λ‹€μš΄λ‘œλ“œμ— μ‹€νŒ¨ν•˜μ˜€μŠ΅λ‹ˆλ‹€. μ—λŸ¬ μ½”λ“œ: {response.status_code}")
180
+ return
181
+
182
+ with open(local_filename, "wb") as f:
183
+ for chunk in response.iter_content(chunk_size=8192):
184
+ if chunk:
185
+ f.write(chunk)
186
+ print(f"{local_filename} 파일이 μ €μž₯λ˜μ—ˆμŠ΅λ‹ˆλ‹€.")
187
+ return local_filename
188
+
189
+
190
+ def get_story(first_sentence:str, num_sentences:int, chatbot=[], history=[]):
191
+ history.append(first_sentence)
192
+ # make a POST request to the API endpoint using the requests.post method, passing in stream=True
193
+ response = requests.post("https://api.openai.com/v1/chat/completions",
194
+ headers={"Content-Type": "application/json", "Authorization": f"Bearer {OPENAPI_KEY}"},
195
+ stream=True,
196
+ data=json.dumps({
197
+ "stream": True,
198
+ "model": "gpt-3.5-turbo",
199
+ "messages": [{"role": "system", "content": "You are a helpful assistant."},
200
+ {"role": "user", "content": f"""I will provide the first sentence of the novel, and please write {num_sentences} sentences continuing the story in a first-person protagonist's perspective in Korean. Don't number the sentences.
201
+ \n\nFirst sentence: {first_sentence}"""}]
202
+ }))
203
+
204
+ token_counter = 0
205
+ partial_words = ""
206
+ counter=0
207
+ for chunk in response.iter_lines():
208
+ #Skipping first chunk
209
+ if counter == 0:
210
+ counter+=1
211
+ continue
212
+ # check whether each line is non-empty
213
+ if chunk.decode() :
214
+ chunk = chunk.decode()
215
+ # decode each line as response data is in bytes
216
+ if len(chunk) > 12 and "content" in json.loads(chunk[6:])['choices'][0]['delta']:
217
+ partial_words = partial_words + json.loads(chunk[6:])['choices'][0]["delta"]["content"]
218
+ if token_counter == 0:
219
+ history.append(" " + partial_words)
220
+ else:
221
+ history[-1] = partial_words
222
+ chat = [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2) ] # convert to tuples of list
223
+ token_counter+=1
224
+ yield chat, history, response
225
+
226
+
227
+ def get_voice_filename(text, gender, age):
228
+ filename = None
229
+ if gender == '남성':
230
+ if age == "어린이":
231
+ filename = get_voice(text, gender="male", age_group="child", filename="voice.mp3")
232
+ elif age == "μ²­μ†Œλ…„":
233
+ filename = get_voice(text, gender="male", age_group="teenager", filename="voice.mp3")
234
+ elif age == "μ²­λ…„":
235
+ filename = get_voice(text, gender="male", age_group="youth", filename="voice.mp3")
236
+ elif age == "쀑년":
237
+ filename = get_voice(text, gender="male", age_group="middle_aged", filename="voice.mp3")
238
+ else:
239
+ if age == "어린이":
240
+ filename = get_voice(text, gender="female", age_group="child", filename="voice.mp3")
241
+ elif age == "μ²­μ†Œλ…„":
242
+ filename = get_voice(text, gender="female", age_group="teenager", filename="voice.mp3")
243
+ elif age == "μ²­λ…„":
244
+ filename = get_voice(text, gender="female", age_group="youth", filename="voice.mp3")
245
+ elif age == "쀑년":
246
+ filename = get_voice(text, gender="female", age_group="middle_aged", filename="voice.mp3")
247
+ return filename