Spaces:
Sleeping
Sleeping
# Copyright 2018 Google Inc. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License.! | |
if (SPM_ABSL_PROVIDER STREQUAL "module" OR SPM_ABSL_PROVIDER STREQUAL "package") | |
set(ABSL_FLAGS_SRCS "") | |
set(ABSL_STRINGS_SRCS "") | |
list(APPEND SPM_LIBS absl::strings) | |
list(APPEND SPM_LIBS absl::flags) | |
list(APPEND SPM_LIBS absl::flags_parse) | |
list(APPEND SPM_LIBS absl::log) | |
list(APPEND SPM_LIBS absl::check) | |
if (MSVC) | |
add_definitions("/D_USE_EXTERNAL_ABSL") | |
else() | |
add_definitions("-D_USE_EXTERNAL_ABSL") | |
endif() | |
elseif (SPM_ABSL_PROVIDER STREQUAL "internal") | |
set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc) | |
endif() | |
if (SPM_PROTOBUF_PROVIDER STREQUAL "internal") | |
set(SPM_PROTO_HDRS builtin_pb/sentencepiece.pb.h) | |
set(SPM_PROTO_SRCS builtin_pb/sentencepiece.pb.cc) | |
set(SPM_MODEL_PROTO_HDRS builtin_pb/sentencepiece_model.pb.h) | |
set(SPM_MODEL_PROTO_SRCS builtin_pb/sentencepiece_model.pb.cc) | |
set(PROTOBUF_LITE_LIBRARY "") | |
set(PROTOBUF_LITE_SRCS | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/arena.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/arenastring.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/bytestream.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/coded_stream.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/common.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/extension_set.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_enum_util.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_table_driven_lite.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_util.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/implicit_weak_message.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/int128.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/io_win32.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/message_lite.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/parse_context.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/repeated_field.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/status.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/statusor.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/stringpiece.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/stringprintf.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/structurally_valid.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/strutil.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/time.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/wire_format_lite.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl.cc | |
${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl_lite.cc) | |
if (MSVC) | |
add_definitions("/DHAVE_PTHREAD /wd4018 /wd4514") | |
else() | |
add_definitions("-pthread -DHAVE_PTHREAD=1 -Wno-sign-compare -Wno-deprecated-declarations") | |
endif() | |
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite) | |
include_directories(builtin_pb) | |
elseif (SPM_PROTOBUF_PROVIDER STREQUAL "package") | |
find_package(Protobuf REQUIRED) | |
include_directories(${Protobuf_INCLUDE_DIRS}) | |
protobuf_generate_cpp(SPM_PROTO_SRCS SPM_PROTO_HDRS sentencepiece.proto) | |
protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) | |
set(PROTOBUF_LITE_SRCS "") | |
include_directories(${PROTOBUF_INCLUDE_DIR}) | |
if (MSVC) | |
add_definitions("/D_USE_EXTERNAL_PROTOBUF") | |
else() | |
add_definitions("-D_USE_EXTERNAL_PROTOBUF") | |
endif() | |
endif() | |
include_directories(${CMAKE_CURRENT_BINARY_DIR}) | |
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party) | |
if (MSVC) | |
add_definitions("/D_USE_INTERNAL_STRING_VIEW") | |
else() | |
add_definitions("-D_USE_INTERNAL_STRING_VIEW") | |
endif() | |
set(SPM_SRCS | |
${PROTOBUF_LITE_SRCS} | |
${SPM_PROTO_HDRS} | |
${SPM_PROTO_SRCS} | |
${SPM_MODEL_PROTO_HDRS} | |
${SPM_MODEL_PROTO_SRCS} | |
bpe_model.h | |
common.h | |
normalizer.h | |
util.h | |
freelist.h | |
filesystem.h | |
init.h | |
sentencepiece_processor.h | |
word_model.h | |
model_factory.h | |
char_model.h | |
model_interface.h | |
testharness.h | |
unigram_model.h | |
bpe_model.cc | |
char_model.cc | |
error.cc | |
filesystem.cc | |
model_factory.cc | |
model_interface.cc | |
normalizer.cc | |
sentencepiece_processor.cc | |
unigram_model.cc | |
util.cc | |
word_model.cc | |
${ABSL_STRINGS_SRCS} | |
${ABSL_FLAGS_SRCS}) | |
set(SPM_TRAIN_SRCS | |
${SPM_PROTO_HDRS} | |
${SPM_MODEL_PROTO_HDRS} | |
builder.h | |
normalization_rule.h | |
unicode_script.h | |
unicode_script_map.h | |
trainer_factory.h | |
trainer_interface.h | |
unigram_model_trainer.h | |
word_model_trainer.h | |
char_model_trainer.h | |
bpe_model_trainer.h | |
sentencepiece_trainer.h | |
pretokenizer_for_training.h | |
builder.cc | |
unicode_script.cc | |
trainer_factory.cc | |
trainer_interface.cc | |
unigram_model_trainer.cc | |
word_model_trainer.cc | |
char_model_trainer.cc | |
bpe_model_trainer.cc | |
sentencepiece_trainer.cc | |
pretokenizer_for_training.cc) | |
set(SPM_TEST_SRCS | |
${SPM_PROTO_HDRS} | |
${SPM_MODEL_PROTO_HDRS} | |
testharness.h | |
bpe_model_test.cc | |
bpe_model_trainer_test.cc | |
builder_test.cc | |
char_model_test.cc | |
char_model_trainer_test.cc | |
filesystem_test.cc | |
init_test.cc | |
model_factory_test.cc | |
model_interface_test.cc | |
normalizer_test.cc | |
sentencepiece_processor_test.cc | |
sentencepiece_trainer_test.cc | |
test_main.cc | |
testharness.cc | |
trainer_factory_test.cc | |
trainer_interface_test.cc | |
unicode_script_test.cc | |
unigram_model_test.cc | |
unigram_model_trainer_test.cc | |
util_test.cc | |
word_model_test.cc | |
word_model_trainer_test.cc | |
pretokenizer_for_training_test.cc) | |
find_package(Threads REQUIRED) | |
list(APPEND SPM_LIBS ${PROTOBUF_LITE_LIBRARY} Threads::Threads) | |
if (SPM_ENABLE_NFKC_COMPILE) | |
find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED) | |
include_directories(${ICU_INCLUDE_DIRS}) | |
add_definitions(-DENABLE_NFKC_COMPILE) | |
list(APPEND SPM_LIBS ICU::i18n ICU::data ICU::uc) | |
endif() | |
if (SPM_ENABLE_TCMALLOC) | |
if (SPM_TCMALLOC_STATIC) | |
find_library(TCMALLOC_LIB NAMES libtcmalloc_minimal.a) | |
else() | |
find_library(TCMALLOC_LIB NAMES tcmalloc_minimal) | |
endif() | |
if (TCMALLOC_LIB) | |
message(STATUS "Found TCMalloc: ${TCMALLOC_LIB}") | |
list(APPEND SPM_LIBS ${TCMALLOC_LIB}) | |
add_definitions(-fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free) | |
else() | |
message(STATUS "Not Found TCMalloc: ${TCMALLOC_LIB}") | |
endif() | |
endif() | |
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR | |
(${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4")) | |
find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1) | |
if (ATOMIC_LIB) | |
message(STATUS "Found atomic: ${ATOMIC_LIB}") | |
list(APPEND SPM_LIBS "atomic") | |
endif() | |
endif() | |
if (SPM_ENABLE_SHARED) | |
add_library(sentencepiece SHARED ${SPM_SRCS}) | |
add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) | |
if (ANDROID) | |
target_link_libraries(sentencepiece log) | |
target_link_libraries(sentencepiece_train log) | |
endif() | |
endif() | |
add_library(sentencepiece-static STATIC ${SPM_SRCS}) | |
add_library(sentencepiece_train-static STATIC ${SPM_TRAIN_SRCS}) | |
target_link_libraries(sentencepiece-static INTERFACE ${SPM_LIBS}) | |
target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static ${SPM_LIBS}) | |
if (SPM_ENABLE_SHARED) | |
target_link_libraries(sentencepiece ${SPM_LIBS}) | |
target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) | |
set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) | |
set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) | |
set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) | |
set_target_properties(sentencepiece_train PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) | |
if (MSVC) | |
set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX "_import.lib") | |
set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX "_import.lib") | |
elseif (MINGW) | |
set_target_properties(sentencepiece PROPERTIES IMPORT_SUFFIX ".dll.a") | |
set_target_properties(sentencepiece_train PROPERTIES IMPORT_SUFFIX ".dll.a") | |
endif() | |
else() | |
add_library(sentencepiece ALIAS sentencepiece-static) | |
add_library(sentencepiece_train ALIAS sentencepiece_train-static) | |
set(SPM_INSTALLTARGETS sentencepiece-static sentencepiece_train-static) | |
endif() | |
set_target_properties(sentencepiece-static PROPERTIES OUTPUT_NAME "sentencepiece") | |
set_target_properties(sentencepiece_train-static PROPERTIES OUTPUT_NAME "sentencepiece_train") | |
if (NOT MSVC) | |
if (SPM_COVERAGE) | |
set(CMAKE_CXX_FLAGS "-O0 -Wall -fPIC -coverage ${CMAKE_CXX_FLAGS}") | |
else() | |
set(CMAKE_CXX_FLAGS "-O3 -Wall -fPIC ${CMAKE_CXX_FLAGS}") | |
endif() | |
if (SPM_ENABLE_TENSORFLOW_SHARED) | |
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) | |
endif() | |
if (SPM_NO_THREADLOCAL) | |
add_definitions(-DSPM_NO_THREADLOCAL=1) | |
add_definitions(-DGOOGLE_PROTOBUF_NO_THREADLOCAL=1) | |
endif() | |
set_source_files_properties( | |
sentencepiece.pb.cc sentencepiece_model.pb.cc | |
PROPERTIES COMPILE_FLAGS "-Wno-misleading-indentation") | |
set_source_files_properties(${SPM_TEST_SRCS} | |
PROPERTIES COMPILE_FLAGS "-Wno-sign-compare") | |
if (SPM_ENABLE_SHARED) | |
set_property(TARGET sentencepiece APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") | |
set_property(TARGET sentencepiece_train APPEND_STRING PROPERTY COMPILE_FLAGS " -DPIC") | |
endif() | |
endif() | |
add_executable(spm_encode spm_encode_main.cc) | |
add_executable(spm_decode spm_decode_main.cc) | |
add_executable(spm_normalize spm_normalize_main.cc) | |
add_executable(spm_train spm_train_main.cc) | |
add_executable(spm_export_vocab spm_export_vocab_main.cc) | |
target_link_libraries(spm_encode sentencepiece) | |
target_link_libraries(spm_decode sentencepiece) | |
target_link_libraries(spm_normalize sentencepiece sentencepiece_train) | |
target_link_libraries(spm_train sentencepiece sentencepiece_train) | |
target_link_libraries(spm_export_vocab sentencepiece) | |
if (SPM_ENABLE_NFKC_COMPILE) | |
add_executable(compile_charsmap compile_charsmap_main.cc) | |
target_link_libraries(compile_charsmap sentencepiece sentencepiece_train) | |
endif() | |
list(APPEND SPM_INSTALLTARGETS | |
spm_encode spm_decode spm_normalize spm_train spm_export_vocab) | |
if (CMAKE_SYSTEM_NAME STREQUAL "iOS") | |
install(TARGETS ${SPM_INSTALLTARGETS} | |
BUNDLE DESTINATION ${CMAKE_INSTALL_BINDIR} | |
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | |
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) | |
else() | |
install(TARGETS ${SPM_INSTALLTARGETS} | |
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} | |
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) | |
endif() | |
install(FILES sentencepiece_trainer.h sentencepiece_processor.h | |
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |
if (NOT SPM_PROTOBUF_PROVIDER STREQUAL "internal") | |
install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |
endif() | |
file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) | |
if (SPM_BUILD_TEST OR SPM_COVERAGE) | |
enable_testing() | |
add_executable(spm_test test_main.cc ${SPM_TEST_SRCS}) | |
if (SPM_COVERAGE) | |
target_link_libraries(spm_test sentencepiece sentencepiece_train "-lgcov") | |
else() | |
target_link_libraries(spm_test sentencepiece sentencepiece_train) | |
endif() | |
set(MEMORYCHECK_COMMAND_OPTIONS "--leak-check=full --show-leak-kinds=definite,possible --error-exitcode=1") | |
find_program(CTEST_MEMORYCHECK_COMMAND NAMES valgrind) | |
add_test(NAME sentencepiece_test | |
COMMAND $<TARGET_FILE:spm_test> --test_srcdir=${data_dir}) | |
endif() | |
if (SPM_COVERAGE) | |
add_custom_target(coverage | |
COMMAND mkdir -p coverage | |
COMMAND $<TARGET_FILE:spm_test> --test_srcdir=${data_dir} | |
COMMAND lcov -c -d . -o coverage.info | |
COMMAND lcov --remove coverage.info "include*" "/c++" "_test*" "testharness*" "third_party*" ".pb.*" -o coverage.info | |
COMMAND mkdir -p lcov_html | |
COMMAND genhtml -o lcov_html coverage.info) | |
add_dependencies(coverage spm_test) | |
endif() | |
if (CMAKE_SYSTEM_NAME STREQUAL "iOS") | |
set_xcode_property(spm_encode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) | |
set_xcode_property(spm_decode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) | |
set_xcode_property(spm_normalize PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) | |
set_xcode_property(spm_train PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) | |
set_xcode_property(spm_export_vocab PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) | |
endif() | |