Spaces:
Sleeping
Sleeping
File size: 1,168 Bytes
f23978c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import os
import subprocess
# Java μ€μΉ νμΈ λ° μ€μΉ
try:
subprocess.run(["java", "-version"], check=True)
except FileNotFoundError:
print("Java is not installed. Installing Java...")
subprocess.run(["apt-get", "update"], check=True)
subprocess.run(["apt-get", "install", "-y", "default-jdk"], check=True) # λλ 'openjdk-17-jdk'
# JAVA_HOME νκ²½ λ³μ μ€μ
java_home = "/usr/lib/jvm/java-17-openjdk-amd64"
if os.path.exists(java_home):
os.environ['JAVA_HOME'] = java_home
else:
raise EnvironmentError("JAVA_HOME could not be set because the path does not exist.")
print(f"JAVA_HOME is set to {java_home}")
from konlpy.tag import Okt, Komoran
komoran = Komoran()
okt = Okt()
# ν ν°νλ₯Ό μν ννμ λΆμ
def tokenize(data):
tokenized_data = []
tokenized_sentence = okt.pos(data,norm=True, stem=False)
for a in tokenized_sentence:
if a[1] in ['Verb','Adjective']:
tem= komoran.pos(a[0])
for word in tem:
tokenized_data.append(word[0])
else:
tokenized_data.append(a[0])
return tokenized_data |