-
Notifications
You must be signed in to change notification settings - Fork 1
/
CMakeLists.txt
89 lines (78 loc) · 2.97 KB
/
CMakeLists.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the LICENSE
# file in the root directory of this source tree.
#
# Build tokenizers.
#
# ### Editing this file ###
#
# This file should be formatted with
# ~~~
# cmake-format -i CMakeLists.txt
# ~~~
# It should also be cmake-lint clean.
#
cmake_minimum_required(VERSION 3.18)
set(CMAKE_CXX_STANDARD 17)
project(Tokenizers)
option(TOKENIZERS_BUILD_TEST "Build tests" OFF)
option(TOKENIZERS_BUILD_TOOLS "Build tools" OFF)
# Ignore weak attribute warning
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-attributes")
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(${CMAKE_SOURCE_DIR}/third-party/abseil-cpp)
add_subdirectory(${CMAKE_SOURCE_DIR}/third-party/re2)
add_subdirectory(${CMAKE_SOURCE_DIR}/third-party/sentencepiece)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
file(GLOB tokenizers_source_files ${CMAKE_SOURCE_DIR}/src/*.cpp)
file(GLOB unicode_source_files ${CMAKE_SOURCE_DIR}/third-party/llama.cpp-unicode/src/*.cpp)
add_library(tokenizers STATIC ${tokenizers_source_files} ${unicode_source_files})
# Using abseil from sentencepiece/third_party
target_include_directories(
tokenizers PUBLIC
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/third-party/sentencepiece
${CMAKE_SOURCE_DIR}/third-party/sentencepiece/src
${CMAKE_SOURCE_DIR}/third-party/re2
${CMAKE_SOURCE_DIR}/third-party/json/single_include
${CMAKE_SOURCE_DIR}/third-party/llama.cpp-unicode/include)
target_link_libraries(tokenizers PUBLIC sentencepiece-static re2::re2)
# Build test
if(TOKENIZERS_BUILD_TEST)
enable_testing()
include(FetchContent)
# CMAKE
FetchContent_Declare(
googletest
# Specify the commit you depend on and update it regularly.
URL https://github.com/google/googletest/archive/5376968f6948923e2411081fd9372e71a59d8e77.zip
)
set(gtest_force_shared_crt
ON
CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
file(GLOB test_source_files ${CMAKE_SOURCE_DIR}/test/test_*.cpp)
foreach(test_source_file ${test_source_files})
get_filename_component(test_name ${test_source_file} NAME_WE)
message(STATUS "Configuring unit test ${test_name}")
add_executable(${test_name} ${test_source_file})
target_include_directories(${test_name} PRIVATE
GTEST_INCLUDE_PATH
${CMAKE_SOURCE_DIR}/include
${CMAKE_SOURCE_DIR}/third-party/sentencepiece
${CMAKE_SOURCE_DIR}/third-party/re2
${CMAKE_SOURCE_DIR}/third-party/json/single_include
)
target_link_libraries(${test_name} gtest_main tokenizers)
target_compile_definitions(${test_name} PRIVATE RESOURCES_PATH="${CMAKE_SOURCE_DIR}/test/resources")
add_test(${test_name} "${test_name}")
endforeach()
endif()
# Build tools
if(TOKENIZERS_BUILD_TOOLS)
add_subdirectory(tools/tokenize_tool)
endif()