10
10
#include < cstdio>
11
11
#include < string>
12
12
#include < vector>
13
+ #include < ctime>
13
14
14
15
// trim whitespace from the beginning and end of a string
15
16
static std::string trim (const std::string & str) {
@@ -70,6 +71,26 @@ struct client {
70
71
std::vector<llama_token> tokens_prev;
71
72
};
72
73
74
+ static void print_date_time () {
75
+ std::time_t current_time = std::time (nullptr );
76
+ std::tm * local_time = std::localtime (¤t_time);
77
+ char buffer[80 ];
78
+ strftime (buffer, sizeof (buffer), " %Y-%m-%d %H:%M:%S" , local_time);
79
+
80
+ printf (" \n\033 [35mrun parameters as at %s\033 [0m\n " , buffer);
81
+ }
82
+
83
+ // Define a split string function to ...
84
+ static std::vector<std::string> split_string (const std::string& input, char delimiter) {
85
+ std::vector<std::string> tokens;
86
+ std::istringstream stream (input);
87
+ std::string token;
88
+ while (std::getline (stream, token, delimiter)) {
89
+ tokens.push_back (token);
90
+ }
91
+ return tokens;
92
+ }
93
+
73
94
int main (int argc, char ** argv) {
74
95
srand (1234 );
75
96
@@ -104,6 +125,23 @@ int main(int argc, char ** argv) {
104
125
params.logits_all = true ;
105
126
std::tie (model, ctx) = llama_init_from_gpt_params (params);
106
127
128
+ // load the prompts from an external file if there are any
129
+ if (params.prompt .empty ()) {
130
+ printf (" \n\033 [32mNo new questions so proceed with build-in defaults.\033 [0m\n " );
131
+ } else {
132
+ // Output each line of the input params.prompts vector and copy to k_prompts
133
+ int index = 0 ;
134
+ printf (" \n\033 [32mNow printing the external prompt file %s\033 [0m\n\n " , params.prompt_file .c_str ());
135
+
136
+ std::vector<std::string> prompts = split_string (params.prompt , ' \n ' );
137
+ for (const auto & prompt : prompts) {
138
+ k_prompts.resize (index + 1 );
139
+ k_prompts[index ] = prompt;
140
+ index ++;
141
+ printf (" %3d prompt: %s\n " , index , prompt.c_str ());
142
+ }
143
+ }
144
+
107
145
fprintf (stderr, " \n\n " );
108
146
fflush (stderr);
109
147
@@ -233,7 +271,7 @@ int main(int argc, char ** argv) {
233
271
client.n_decoded = 0 ;
234
272
client.i_batch = batch.n_tokens - 1 ;
235
273
236
- LOG_TEE (" \033 [1mClient %3d, seq %4d, started decoding ...\033 [0m\n " , client.id , client.seq_id );
274
+ LOG_TEE (" \033 [31mClient %3d, seq %4d, started decoding ...\033 [0m\n " , client.id , client.seq_id );
237
275
238
276
g_seq_id += 1 ;
239
277
@@ -336,8 +374,8 @@ int main(int argc, char ** argv) {
336
374
337
375
const auto t_main_end = ggml_time_us ();
338
376
339
- LOG_TEE (" \033 [1mClient %3d, seq %4d , prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033 [0m \n\ n Input: %s\n Response : %s\n\n " ,
340
- client.id , client.seq_id , client.n_prompt , client.n_decoded ,
377
+ LOG_TEE (" \033 [31mClient %3d, seq %3d/%3d , prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033 [0m \n Input: %s\n\033 [35mResponse : %s\033 [0m \n\n " ,
378
+ client.id , client.seq_id , n_seq, client.n_prompt , client.n_decoded ,
341
379
(t_main_end - client.t_start_prompt ) / 1e6 ,
342
380
(double ) (client.n_prompt + client.n_decoded ) / (t_main_end - client.t_start_prompt ) * 1e6 ,
343
381
n_cache_miss,
@@ -357,13 +395,21 @@ int main(int argc, char ** argv) {
357
395
358
396
const auto t_main_end = ggml_time_us ();
359
397
360
- LOG_TEE (" \n\n " );
398
+ print_date_time ();
399
+
400
+ LOG_TEE (" \n %s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n " , __func__, n_clients, n_seq, cont_batching, n_tokens_system);
401
+ if (params.prompt_file .empty ()) {
402
+ params.prompt_file = " used built-in defaults" ;
403
+ }
404
+ LOG_TEE (" External prompt file: \033 [32m%s\033 [0m\n " , params.prompt_file .c_str ());
405
+ LOG_TEE (" Model and path used: \033 [32m%s\033 [0m\n\n " , params.model .c_str ());
406
+
361
407
LOG_TEE (" Total prompt tokens: %6d, speed: %5.2f t/s\n " , n_total_prompt, (double ) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6 );
362
408
LOG_TEE (" Total gen tokens: %6d, speed: %5.2f t/s\n " , n_total_gen, (double ) (n_total_gen ) / (t_main_end - t_main_start) * 1e6 );
363
409
LOG_TEE (" Total speed (AVG): %6s speed: %5.2f t/s\n " , " " , (double ) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6 );
364
410
LOG_TEE (" Cache misses: %6d\n " , n_cache_miss);
365
411
366
- LOG_TEE (" \n\n " );
412
+ LOG_TEE (" \n " );
367
413
368
414
llama_print_timings (ctx);
369
415
0 commit comments