-
-
Notifications
You must be signed in to change notification settings - Fork 83
/
binding.h
63 lines (46 loc) · 2.46 KB
/
binding.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#ifdef __cplusplus
#include <vector>
#include <string>
extern "C" {
#endif
#include <stdbool.h>
extern unsigned char tokenCallback(void *, char *);
int load_state(void *ctx, char *statefile, char*modes);
int eval(void* params_ptr, void *ctx, char*text);
void save_state(void *ctx, char *dst, char*modes);
void* load_model(const char *fname,
int n_ctx,
int n_seed,
bool memory_f16,
bool mlock,
bool embeddings,
bool mmap,
bool low_vram,
int n_gpu,
int n_batch,
const char *maingpu,
const char *tensorsplit,
bool numa,
float rope_freq_base,
float rope_freq_scale,
bool mul_mat_q, const char *lora, const char *lora_base, bool perplexity
);
int get_embeddings(void* params_ptr, void* state_pr, float * res_embeddings);
int get_token_embeddings(void* params_ptr, void* state_pr, int *tokens, int tokenSize, float * res_embeddings);
void* llama_allocate_params(const char *prompt, int seed, int threads, int tokens,
int top_k, float top_p, float temp, float repeat_penalty,
int repeat_last_n, bool ignore_eos, bool memory_f16,
int n_batch, int n_keep, const char** antiprompt, int antiprompt_count,
float tfs_z, float typical_p, float frequency_penalty, float presence_penalty, int mirostat, float mirostat_eta, float mirostat_tau, bool penalize_nl, const char *logit_bias, const char *session_file, bool prompt_cache_all, bool mlock, bool mmap, const char *maingpu, const char *tensorsplit ,
bool prompt_cache_ro, const char *grammar, float rope_freq_base, float rope_freq_scale, float negative_prompt_scale, const char* negative_prompt,
int n_draft);
int speculative_sampling(void* params_ptr, void* target_model, void* draft_model, char* result, bool debug);
void llama_free_params(void* params_ptr);
void llama_binding_free_model(void* state);
int llama_tokenize_string(void* params_ptr, void* state_pr, int* result);
int llama_predict(void* params_ptr, void* state_pr, char* result, bool debug);
#ifdef __cplusplus
}
std::vector<std::string> create_vector(const char** strings, int count);
void delete_vector(std::vector<std::string>* vec);
#endif