File size: 1,168 Bytes
f23978c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import os
import subprocess

# Java μ„€μΉ˜ 확인 및 μ„€μΉ˜
try:
    subprocess.run(["java", "-version"], check=True)
except FileNotFoundError:
    print("Java is not installed. Installing Java...")
    subprocess.run(["apt-get", "update"], check=True)
    subprocess.run(["apt-get", "install", "-y", "default-jdk"], check=True)  # λ˜λŠ” 'openjdk-17-jdk'

# JAVA_HOME ν™˜κ²½ λ³€μˆ˜ μ„€μ •
java_home = "/usr/lib/jvm/java-17-openjdk-amd64"
if os.path.exists(java_home):
    os.environ['JAVA_HOME'] = java_home
else:
    raise EnvironmentError("JAVA_HOME could not be set because the path does not exist.")

print(f"JAVA_HOME is set to {java_home}")



from konlpy.tag import Okt, Komoran

komoran = Komoran()
okt = Okt()

# 토큰화λ₯Ό μœ„ν•œ ν˜•νƒœμ†Œ 뢄석
def tokenize(data):
    tokenized_data = []
    tokenized_sentence = okt.pos(data,norm=True, stem=False)
    for a in tokenized_sentence:
        if a[1] in ['Verb','Adjective']:
            tem= komoran.pos(a[0])
            for word in tem:
                tokenized_data.append(word[0])
        else:
            tokenized_data.append(a[0])
    return tokenized_data