From a2a494d8194324be55e115546c1620a0c319429b Mon Sep 17 00:00:00 2001 From: lvhan028 Date: Sat, 24 Aug 2024 14:04:07 +0800 Subject: [PATCH] remove INIReader and chagne config to yaml format --- 3rdparty/INIReader.h | 501 ------------------ examples/cpp/llama/llama_config.ini | 82 --- examples/cpp/llama/llama_config.yaml | 81 +++ examples/cpp/llama/llama_triton_example.cc | 50 +- .../triton_backend/llama/LlamaTritonModel.cc | 38 +- src/turbomind/utils/cuda_utils.cc | 27 - src/turbomind/utils/cuda_utils.h | 4 +- 7 files changed, 130 insertions(+), 653 deletions(-) delete mode 100644 3rdparty/INIReader.h delete mode 100644 examples/cpp/llama/llama_config.ini create mode 100644 examples/cpp/llama/llama_config.yaml diff --git a/3rdparty/INIReader.h b/3rdparty/INIReader.h deleted file mode 100644 index 6ed9b5a5a..000000000 --- a/3rdparty/INIReader.h +++ /dev/null @@ -1,501 +0,0 @@ -// Read an INI file into easy-to-access name/value pairs. - -// inih and INIReader are released under the New BSD license. -// Go to the project home page for more info: -// -// https://github.com/benhoyt/inih (Initial repo) -// https://github.com/jtilly/inih (The reference of this header file) -/* inih -- simple .INI file parser -inih is released under the New BSD license (see LICENSE.txt). Go to the project -home page for more info: -https://github.com/benhoyt/inih -https://github.com/jtilly/inih -*/ - -#ifndef __INI_H__ -#define __INI_H__ - -/* Make this header file easier to include in C++ code */ -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* Typedef for prototype of handler function. */ -typedef int (*ini_handler)(void* user, const char* section, - const char* name, const char* value); - -/* Typedef for prototype of fgets-style reader function. */ -typedef char* (*ini_reader)(char* str, int num, void* stream); - -/* Parse given INI-style file. May have [section]s, name=value pairs - (whitespace stripped), and comments starting with ';' (semicolon). Section - is "" if name=value pair parsed before any section heading. name:value - pairs are also supported as a concession to Python's configparser. - For each name=value pair parsed, call handler function with given user - pointer as well as section, name, and value (data only valid for duration - of handler call). Handler should return nonzero on success, zero on error. - Returns 0 on success, line number of first error on parse error (doesn't - stop on first error), -1 on file open error, or -2 on memory allocation - error (only when INI_USE_STACK is zero). -*/ -int ini_parse(const char* filename, ini_handler handler, void* user); - -/* Same as ini_parse(), but takes a FILE* instead of filename. This doesn't - close the file when it's finished -- the caller must do that. */ -int ini_parse_file(FILE* file, ini_handler handler, void* user); - -/* Same as ini_parse(), but takes an ini_reader function pointer instead of - filename. Used for implementing custom or string-based I/O. */ -int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler, - void* user); - -/* Nonzero to allow multi-line value parsing, in the style of Python's - configparser. If allowed, ini_parse() will call the handler with the same - name for each subsequent line parsed. */ -#ifndef INI_ALLOW_MULTILINE -#define INI_ALLOW_MULTILINE 1 -#endif - -/* Nonzero to allow a UTF-8 BOM sequence (0xEF 0xBB 0xBF) at the start of - the file. See http://code.google.com/p/inih/issues/detail?id=21 */ -#ifndef INI_ALLOW_BOM -#define INI_ALLOW_BOM 1 -#endif - -/* Nonzero to allow inline comments (with valid inline comment characters - specified by INI_INLINE_COMMENT_PREFIXES). Set to 0 to turn off and match - Python 3.2+ configparser behaviour. */ -#ifndef INI_ALLOW_INLINE_COMMENTS -#define INI_ALLOW_INLINE_COMMENTS 1 -#endif -#ifndef INI_INLINE_COMMENT_PREFIXES -#define INI_INLINE_COMMENT_PREFIXES ";" -#endif - -/* Nonzero to use stack, zero to use heap (malloc/free). */ -#ifndef INI_USE_STACK -#define INI_USE_STACK 1 -#endif - -/* Stop parsing on first error (default is to keep parsing). */ -#ifndef INI_STOP_ON_FIRST_ERROR -#define INI_STOP_ON_FIRST_ERROR 0 -#endif - -/* Maximum line length for any line in INI file. */ -#ifndef INI_MAX_LINE -#define INI_MAX_LINE 200 -#endif - -#ifdef __cplusplus -} -#endif - -/* inih -- simple .INI file parser -inih is released under the New BSD license (see LICENSE.txt). Go to the project -home page for more info: -https://github.com/benhoyt/inih -*/ - -#if defined(_MSC_VER) && !defined(_CRT_SECURE_NO_WARNINGS) -#define _CRT_SECURE_NO_WARNINGS -#endif - -#include -#include -#include - -#if !INI_USE_STACK -#include -#endif - -#define MAX_SECTION 50 -#define MAX_NAME 50 - -/* Strip whitespace chars off end of given string, in place. Return s. */ -inline static char* rstrip(char* s) -{ - char* p = s + strlen(s); - while (p > s && isspace((unsigned char)(*--p))) - *p = '\0'; - return s; -} - -/* Return pointer to first non-whitespace char in given string. */ -inline static char* lskip(const char* s) -{ - while (*s && isspace((unsigned char)(*s))) - s++; - return (char*)s; -} - -/* Return pointer to first char (of chars) or inline comment in given string, - or pointer to null at end of string if neither found. Inline comment must - be prefixed by a whitespace character to register as a comment. */ -inline static char* find_chars_or_comment(const char* s, const char* chars) -{ -#if INI_ALLOW_INLINE_COMMENTS - int was_space = 0; - while (*s && (!chars || !strchr(chars, *s)) && - !(was_space && strchr(INI_INLINE_COMMENT_PREFIXES, *s))) { - was_space = isspace((unsigned char)(*s)); - s++; - } -#else - while (*s && (!chars || !strchr(chars, *s))) { - s++; - } -#endif - return (char*)s; -} - -/* Version of strncpy that ensures dest (size bytes) is null-terminated. */ -inline static char* strncpy0(char* dest, const char* src, size_t size) -{ - strncpy(dest, src, size); - dest[size - 1] = '\0'; - return dest; -} - -/* See documentation in header file. */ -inline int ini_parse_stream(ini_reader reader, void* stream, ini_handler handler, - void* user) -{ - /* Uses a fair bit of stack (use heap instead if you need to) */ -#if INI_USE_STACK - char line[INI_MAX_LINE]; -#else - char* line; -#endif - char section[MAX_SECTION] = ""; - char prev_name[MAX_NAME] = ""; - - char* start; - char* end; - char* name; - char* value; - int lineno = 0; - int error = 0; - -#if !INI_USE_STACK - line = (char*)malloc(INI_MAX_LINE); - if (!line) { - return -2; - } -#endif - - /* Scan through stream line by line */ - while (reader(line, INI_MAX_LINE, stream) != NULL) { - lineno++; - - start = line; -#if INI_ALLOW_BOM - if (lineno == 1 && (unsigned char)start[0] == 0xEF && - (unsigned char)start[1] == 0xBB && - (unsigned char)start[2] == 0xBF) { - start += 3; - } -#endif - start = lskip(rstrip(start)); - - if (*start == ';' || *start == '#') { - /* Per Python configparser, allow both ; and # comments at the - start of a line */ - } -#if INI_ALLOW_MULTILINE - else if (*prev_name && *start && start > line) { - -#if INI_ALLOW_INLINE_COMMENTS - end = find_chars_or_comment(start, NULL); - if (*end) - *end = '\0'; - rstrip(start); -#endif - - /* Non-blank line with leading whitespace, treat as continuation - of previous name's value (as per Python configparser). */ - if (!handler(user, section, prev_name, start) && !error) - error = lineno; - } -#endif - else if (*start == '[') { - /* A "[section]" line */ - end = find_chars_or_comment(start + 1, "]"); - if (*end == ']') { - *end = '\0'; - strncpy0(section, start + 1, sizeof(section)); - *prev_name = '\0'; - } - else if (!error) { - /* No ']' found on section line */ - error = lineno; - } - } - else if (*start) { - /* Not a comment, must be a name[=:]value pair */ - end = find_chars_or_comment(start, "=:"); - if (*end == '=' || *end == ':') { - *end = '\0'; - name = rstrip(start); - value = lskip(end + 1); -#if INI_ALLOW_INLINE_COMMENTS - end = find_chars_or_comment(value, NULL); - if (*end) - *end = '\0'; -#endif - rstrip(value); - - /* Valid name[=:]value pair found, call handler */ - strncpy0(prev_name, name, sizeof(prev_name)); - if (!handler(user, section, name, value) && !error) - error = lineno; - } - else if (!error) { - /* No '=' or ':' found on name[=:]value line */ - error = lineno; - } - } - -#if INI_STOP_ON_FIRST_ERROR - if (error) - break; -#endif - } - -#if !INI_USE_STACK - free(line); -#endif - - return error; -} - -/* See documentation in header file. */ -inline int ini_parse_file(FILE* file, ini_handler handler, void* user) -{ - return ini_parse_stream((ini_reader)fgets, file, handler, user); -} - -/* See documentation in header file. */ -inline int ini_parse(const char* filename, ini_handler handler, void* user) -{ - FILE* file; - int error; - - file = fopen(filename, "r"); - if (!file) - return -1; - error = ini_parse_file(file, handler, user); - fclose(file); - return error; -} - -#endif /* __INI_H__ */ - - -#ifndef __INIREADER_H__ -#define __INIREADER_H__ - -#include -#include -#include - -// Read an INI file into easy-to-access name/value pairs. (Note that I've gone -// for simplicity here rather than speed, but it should be pretty decent.) -class INIReader -{ -public: - // Empty Constructor - INIReader() {}; - - // Construct INIReader and parse given filename. See ini.h for more info - // about the parsing. - INIReader(std::string filename); - - // Construct INIReader and parse given file. See ini.h for more info - // about the parsing. - INIReader(FILE *file); - ~INIReader(); - // Return the result of ini_parse(), i.e., 0 on success, line number of - // first error on parse error, or -1 on file open error. - int ParseError() const; - - // Return the list of sections found in ini file - const std::set& Sections() const; - - // Get a string value from INI file, returning default_value if not found. - std::string Get(std::string section, std::string name, - std::string default_value) const; - std::string Get(std::string section, std::string name) const; - - // Get an integer (long) value from INI file, returning default_value if - // not found or not a valid integer (decimal "1234", "-1234", or hex "0x4d2"). - long GetInteger(std::string section, std::string name, long default_value) const; - long GetInteger(std::string section, std::string name) const; - - // Get a real (floating point double) value from INI file, returning - // default_value if not found or not a valid floating point value - // according to strtod(). - double GetReal(std::string section, std::string name, double default_value) const; - - // Get a single precision floating point number value from INI file, returning - // default_value if not found or not a valid floating point value - // according to strtof(). - float GetFloat(std::string section, std::string name, float default_value) const; - float GetFloat(std::string section, std::string name) const; - - // Get a boolean value from INI file, returning default_value if not found or if - // not a valid true/false value. Valid true values are "true", "yes", "on", "1", - // and valid false values are "false", "no", "off", "0" (not case sensitive). - bool GetBoolean(std::string section, std::string name, bool default_value) const; - -protected: - int _error; - std::map _values; - std::set _sections; - static std::string MakeKey(std::string section, std::string name); - static int ValueHandler(void* user, const char* section, const char* name, - const char* value); -}; - -#endif // __INIREADER_H__ - - -#ifndef __INIREADER__ -#define __INIREADER__ - -#include -#include -#include - -inline INIReader::INIReader(std::string filename) -{ - _error = ini_parse(filename.c_str(), ValueHandler, this); -} - -inline INIReader::INIReader(FILE *file) -{ - _error = ini_parse_file(file, ValueHandler, this); -} - -inline int INIReader::ParseError() const -{ - return _error; -} - -inline INIReader::~INIReader() { } - -inline const std::set& INIReader::Sections() const -{ - return _sections; -} - -inline std::string INIReader::Get(std::string section, std::string name, std::string default_value) const -{ - std::string key = MakeKey(section, name); - return _values.count(key) ? _values.at(key) : default_value; -} - -inline std::string INIReader::Get(std::string section, std::string name) const -{ - std::string key = MakeKey(section, name); - if(_values.count(key)) return _values.at(key); - else - { - printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str()); - exit(-1); - } -} - -inline long INIReader::GetInteger(std::string section, std::string name, long default_value) const -{ - std::string valstr = Get(section, name, ""); - const char* value = valstr.c_str(); - char* end; - // This parses "1234" (decimal) and also "0x4D2" (hex) - long n = strtol(value, &end, 0); - return end > value ? n : default_value; -} - -inline long INIReader::GetInteger(std::string section, std::string name) const -{ - std::string valstr = Get(section, name, ""); - const char* value = valstr.c_str(); - char* end; - // This parses "1234" (decimal) and also "0x4D2" (hex) - long n = strtol(value, &end, 0); - if(end <= value) - { - printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str()); - exit(-1); - } - return n; -} - -inline double INIReader::GetReal(std::string section, std::string name, double default_value) const -{ - std::string valstr = Get(section, name, ""); - const char* value = valstr.c_str(); - char* end; - double n = strtod(value, &end); - return end > value ? n : default_value; -} - -inline float INIReader::GetFloat(std::string section, std::string name, float default_value) const -{ - std::string valstr = Get(section, name, ""); - const char* value = valstr.c_str(); - char* end; - float n = strtof(value, &end); - return end > value ? n : default_value; -} - -inline float INIReader::GetFloat(std::string section, std::string name) const -{ - std::string valstr = Get(section, name, ""); - const char* value = valstr.c_str(); - char* end; - float n = strtof(value, &end); - if(end <= value) - { - printf("[ERROR] Does not find the section %s with name %s. \n", section.c_str(), name.c_str()); - exit(-1); - } - return n; -} - -inline bool INIReader::GetBoolean(std::string section, std::string name, bool default_value) const -{ - std::string valstr = Get(section, name, ""); - // Convert to lower case to make string comparisons case-insensitive - std::transform(valstr.begin(), valstr.end(), valstr.begin(), ::tolower); - if (valstr == "true" || valstr == "yes" || valstr == "on" || valstr == "1") - return true; - else if (valstr == "false" || valstr == "no" || valstr == "off" || valstr == "0") - return false; - else - return default_value; -} - -inline std::string INIReader::MakeKey(std::string section, std::string name) -{ - std::string key = section + "=" + name; - // Convert to lower case to make section/name lookups case-insensitive - std::transform(key.begin(), key.end(), key.begin(), ::tolower); - return key; -} - -inline int INIReader::ValueHandler(void* user, const char* section, const char* name, - const char* value) -{ - INIReader* reader = (INIReader*)user; - std::string key = MakeKey(section, name); - if (reader->_values[key].size() > 0) - reader->_values[key] += "\n"; - reader->_values[key] += value; - reader->_sections.insert(section); - return 1; -} - -#endif // __INIREADER__ diff --git a/examples/cpp/llama/llama_config.ini b/examples/cpp/llama/llama_config.ini deleted file mode 100644 index ed07d8038..000000000 --- a/examples/cpp/llama/llama_config.ini +++ /dev/null @@ -1,82 +0,0 @@ -[ft_instance_hyperparameter] -data_type=fp16 -enable_custom_all_reduce=0 -pipeline_para_size=1 -tensor_para_size=1 -; update model_dir path according to the actual situation -model_dir=/workspace/models/triton_models/weights/ - - -[request] -request_batch_size=8 -max_input_len=1 -request_output_len=2048 -beam_width=1 ; beam width for beam search -top_k=1 ; k value for top k sampling -top_p=0.0 ; p value for top p sampling -temperature=1.0 ; Use for sampling -repetition_penalty=1.00 ; Use for sampling -presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed. -len_penalty=0.0 -beam_search_diversity_rate=0.0 -; PJLM start/end ids -start_id=0 -end_id=1 - - -; --------------------- legacy params ------------------------- - -; LLaMA start/end ids -; start_id=1 -; end_id=2 - -[4999_llama] -head_num=80 -size_per_head=128 -vocab_size=65632 -num_layer=82 -rotary_embedding=128 -norm_eps=1e-5 -start_id=0 -end_id=1 -inter_size=27392 - -[llama_7B] -head_num=32 -size_per_head=128 -vocab_size=32000 -num_layer=32 -rotary_embedding=128 -start_id=1 -end_id=2 -inter_size=11008 - -[llama_13B] -head_num=40 -size_per_head=128 -vocab_size=32000 -num_layer=40 -rotary_embedding=128 -start_id=1 -end_id=2 -inter_size=13824 - -[llama_30B] -head_num=52 -size_per_head=128 -vocab_size=32000 -num_layer=60 -rotary_embedding=128 -start_id=1 -end_id=2 -inter_size=17920 - -[llama_65B] -head_num=64 -size_per_head=128 -vocab_size=32000 -num_layer=80 -rotary_embedding=128 -start_id=1 -end_id=2 -inter_size=22016 diff --git a/examples/cpp/llama/llama_config.yaml b/examples/cpp/llama/llama_config.yaml new file mode 100644 index 000000000..7ec404b6a --- /dev/null +++ b/examples/cpp/llama/llama_config.yaml @@ -0,0 +1,81 @@ +ft_instance_hyperparameter: + data_type: fp16 + enable_custom_all_reduce: 0 + pipeline_para_size: 1 + tensor_para_size: 1 + # update model_dir path according to the actual situation + model_dir: /workspace/models/triton_models/weights/ + + +request: + request_batch_size: 8 + max_input_len: 1 + request_output_len: 2048 + beam_width: 1 # beam width for beam search + top_k: 1 # k value for top k sampling + top_p: 0.0 # p value for top p sampling + temperature: 1.0 # Use for sampling + repetition_penalty: 1.00 # Use for sampling + presence_penalty: 0.0 # Only one of repetition_penalty and presence_penalty are allowed. + len_penalty: 0.0 + beam_search_diversity_rate: 0.0 + # PJLM start/end ids + start_id: 0 + end_id: 1 + + + # --------------------- legacy params ------------------------- + + # LLaMA start/end ids + # start_id: 1 + # end_id: 2 +4999_llama: + head_num: 80 + size_per_head: 128 + vocab_size: 65632 + num_layer: 82 + rotary_embedding: 128 + norm_eps: 1e-5 + start_id: 0 + end_id: 1 + inter_size: 27392 + +llama_7B: + head_num: 32 + size_per_head: 128 + vocab_size: 32000 + num_layer: 32 + rotary_embedding: 128 + start_id: 1 + end_id: 2 + inter_size: 11008 + +llama_13B: + head_num: 40 + size_per_head: 128 + vocab_size: 32000 + num_layer: 40 + rotary_embedding: 128 + start_id: 1 + end_id: 2 + inter_size: 13824 + +llama_30B: + head_num: 52 + size_per_head: 128 + vocab_size: 32000 + num_layer: 60 + rotary_embedding: 128 + start_id: 1 + end_id: 2 + inter_size: 17920 + +llama_65B: + head_num: 64 + size_per_head: 128 + vocab_size: 32000 + num_layer: 80 + rotary_embedding: 128 + start_id: 1 + end_id: 2 + inter_size: 22016 diff --git a/examples/cpp/llama/llama_triton_example.cc b/examples/cpp/llama/llama_triton_example.cc index 11036cf1b..b0e513410 100644 --- a/examples/cpp/llama/llama_triton_example.cc +++ b/examples/cpp/llama/llama_triton_example.cc @@ -18,7 +18,7 @@ // Modified from // https://github.com/NVIDIA/FasterTransformer/blob/main/examples/cpp/multi_gpu_gpt/multi_gpu_gpt_triton_example.cc -#include "3rdparty/INIReader.h" +#include #include #include #include @@ -254,20 +254,24 @@ int read_start_ids(size_t batch_size, std::string file_name); std::vector>> -prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std::vector* pointer_record, const std::string& csv_name) +prepareRequest(std::string config_file, const int node_id, const int gpu_count, std::vector* pointer_record, const std::string& csv_name) { - INIReader reader = INIReader(ini_name); - if (reader.ParseError() < 0) { - std::cout << "[ERROR] Can't load '" << ini_name << "'\n"; + YAML::Node reader; + try { + reader = YAML::Load(config_file); + } + catch (const YAML::Exception& e) { + std::cerr << "Error reading YAML config: " << e.what() << std::endl; ft::FT_CHECK(false); } + auto request = reader["request"]; - const size_t request_batch_size = reader.GetInteger("request", "request_batch_size"); + const size_t request_batch_size = request["request_batch_size"].as(); std::cerr << "request_batch_size=" << request_batch_size << "\n"; - const int start_id = reader.GetInteger("request", "start_id"); - const int end_id = reader.GetInteger("request", "end_id"); - const int max_input_len = reader.GetInteger("request", "max_input_len"); + const int start_id = request["start_id"].as(); + const int end_id = request["end_id"].as(); + const int max_input_len = request["max_input_len"].as(); std::vector v_start_ids; std::vector v_start_lengths; @@ -289,16 +293,16 @@ prepareRequest(std::string ini_name, const int node_id, const int gpu_count, std std::vector v_bad_words; RequestParam param; - param.beam_width = reader.GetInteger("request", "beam_width"); - param.request_output_len = reader.GetInteger("request", "request_output_len"); - param.beam_search_diversity_rate = reader.GetFloat("request", "beam_search_diversity_rate"); - param.runtime_top_k = reader.GetInteger("request", "top_k"); - param.runtime_top_p = reader.GetFloat("request", "top_p"); - param.temperature = reader.GetFloat("request", "temperature"); - param.len_penalty = reader.GetFloat("request", "len_penalty"); - param.repetition_penalty = reader.GetFloat("request", "repetition_penalty", 1.0f); - param.presence_penalty = reader.GetFloat("request", "presence_penalty", 0.0f); - param.min_length = reader.GetInteger("request", "min_length", 0); + param.beam_width = request["beam_width"].as(); + param.request_output_len = request["request_output_len"].as(); + param.beam_search_diversity_rate = request["beam_search_diversity_rate"].as(); + param.runtime_top_k = request["top_k"].as(); + param.runtime_top_p = request["top_p"].as(); + param.temperature = request["temperature"].as(); + param.len_penalty = request["len_penalty"].as(); + param.repetition_penalty = request["repetition_penalty"].as(1.0f); + param.presence_penalty = request["presence_penalty"].as(0.0f); + param.min_length = request["min_length"].as(0); param.random_seed = (unsigned long long int)0; param.start_id = start_id; param.end_id = end_id; @@ -361,11 +365,11 @@ int main(int argc, char* argv[]) // Note: Only supports that all nodes have same gpu count const int gpu_count = ft::getDeviceCount(); const int world_size = node_num * gpu_count; - printf("Recommend to specify the first parameter on the command line as the path to llama_config.ini\n"); - std::string ini_name = argc >= 2 ? std::string(argv[1]) : "../examples/cpp/llama/llama_config.ini"; + printf("Recommend to specify the first parameter on the command line as the path to llama_config.yaml\n"); + std::string config_file = argc >= 2 ? std::string(argv[1]) : "../examples/cpp/llama/llama_config.yaml"; // step 1: Create model - std::shared_ptr model = AbstractTransformerModel::createLlamaModel(ini_name); + std::shared_ptr model = AbstractTransformerModel::createLlamaModel(config_file); int tensor_para_size = model->getTensorParaSize(); int pipeline_para_size = model->getPipelineParaSize(); printf( @@ -406,7 +410,7 @@ int main(int argc, char* argv[]) std::vector pointer_record; // Used to prevent the pointers are // release after leaving functions std::vector>> request_list = - prepareRequest(ini_name, node_id, gpu_count, &pointer_record, csv_name); + prepareRequest(config_file, node_id, gpu_count, &pointer_record, csv_name); printf("[INFO] request is created \n"); // step 5: Forward diff --git a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc index b49717fd0..4823b6a15 100644 --- a/src/turbomind/triton_backend/llama/LlamaTritonModel.cc +++ b/src/turbomind/triton_backend/llama/LlamaTritonModel.cc @@ -33,31 +33,35 @@ namespace ft = turbomind; -std::shared_ptr AbstractTransformerModel::createLlamaModel(std::string inifile) +std::shared_ptr AbstractTransformerModel::createLlamaModel(std::string config_file) { - INIReader reader = INIReader(inifile); - if (reader.ParseError() < 0) { - std::cout << "[ERROR] Can't load '" << inifile << "'\n"; - return nullptr; + YAML::Node reader; + try { + reader = YAML::Load(config_file); + } + catch (const YAML::Exception& e) { + std::cerr << "Error reading YAML config: " << e.what() << std::endl; + ft::FT_CHECK(false); } - const std::string data_type = reader.Get("ft_instance_hyperparameter", "data_type"); - int tensor_para_size = reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"); - std::string model_dir = reader.Get("ft_instance_hyperparameter", "model_dir"); + auto ft_instance_hyperparameter = reader["ft_instance_hyperparameter"]; + const std::string data_type = ft_instance_hyperparameter["data_type"].as(); + int tensor_para_size = ft_instance_hyperparameter["tensor_para_size"].as(); + std::string model_dir = ft_instance_hyperparameter["model_dir"].as(); if (data_type == "half" || data_type == "fp16") { return std::make_shared>( - reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "pipeline_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "enable_custom_all_reduce", 0), + ft_instance_hyperparameter["tensor_para_size"].as(), + ft_instance_hyperparameter["pipeline_para_size"].as(), + ft_instance_hyperparameter["enable_custom_all_reduce"].as(0), model_dir); } else if (data_type == "bf16") { #ifdef ENABLE_BF16 return std::make_shared>( - reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "pipeline_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "enable_custom_all_reduce", 0), + ft_instance_hyperparameter["tensor_para_size"].as(), + ft_instance_hyperparameter["pipeline_para_size"].as(), + ft_instance_hyperparameter["enable_custom_all_reduce"].as(0), model_dir); #else TM_LOG_ERROR("[ERROR] Turbomind is not built with ENABLE_BF16"); @@ -67,9 +71,9 @@ std::shared_ptr AbstractTransformerModel::createLlamaM else { #ifdef ENABLE_FP32 return std::make_shared>( - reader.GetInteger("ft_instance_hyperparameter", "tensor_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "pipeline_para_size"), - reader.GetInteger("ft_instance_hyperparameter", "enable_custom_all_reduce", 0), + ft_instance_hyperparameter["tensor_para_size"].as(), + ft_instance_hyperparameter["pipeline_para_size"].as(), + ft_instance_hyperparameter["enable_custom_all_reduce"].as(0), model_dir); #else TM_LOG_ERROR("[ERROR] Turbomind is not built with ENABLE_BF32"); diff --git a/src/turbomind/utils/cuda_utils.cc b/src/turbomind/utils/cuda_utils.cc index db783c563..c13688ff3 100644 --- a/src/turbomind/utils/cuda_utils.cc +++ b/src/turbomind/utils/cuda_utils.cc @@ -366,33 +366,6 @@ cudaError_t getSetDevice(int i_device, int* o_device) return cudaSuccess; } -FtCudaDataType getModelFileType(std::string ini_file, std::string section_name) -{ - FtCudaDataType model_file_type; - INIReader reader = INIReader(ini_file); - if (reader.ParseError() < 0) { - TM_LOG_WARNING("Can't load %s. Use FP32 as default", ini_file.c_str()); - model_file_type = FtCudaDataType::FP32; - } - else { - std::string weight_data_type_str = std::string(reader.Get(section_name, "weight_data_type")); - if (weight_data_type_str.find("fp32") != std::string::npos) { - model_file_type = FtCudaDataType::FP32; - } - else if (weight_data_type_str.find("fp16") != std::string::npos) { - model_file_type = FtCudaDataType::FP16; - } - else if (weight_data_type_str.find("bf16") != std::string::npos) { - model_file_type = FtCudaDataType::BF16; - } - else { - TM_LOG_WARNING("Invalid type %s. Use FP32 as default", weight_data_type_str.c_str()); - model_file_type = FtCudaDataType::FP32; - } - } - return model_file_type; -} - bool is_16xx_series(const char* name) { const std::regex re(R"(GTX 16\d\d)"); diff --git a/src/turbomind/utils/cuda_utils.h b/src/turbomind/utils/cuda_utils.h index 533263604..4d14dfecd 100644 --- a/src/turbomind/utils/cuda_utils.h +++ b/src/turbomind/utils/cuda_utils.h @@ -16,7 +16,6 @@ #pragma once -#include "3rdparty/INIReader.h" #include "src/turbomind/macro.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h" #include "src/turbomind/utils/logger.h" @@ -28,6 +27,7 @@ #include #include #include +#include #ifdef SPARSITY_ENABLED #include #endif @@ -384,8 +384,6 @@ struct getTypeFromCudaDataType { }; #endif -FtCudaDataType getModelFileType(std::string ini_file, std::string section_name); - // clang-format off template struct packed_type; template <> struct packed_type { using type = float; }; // we don't need to pack float by default