llama_cpp class

llama.cpp binding

Constructors

llama_cpp.new(DynamicLibrary dynamicLibrary): The symbols are looked up in dynamicLibrary.
llama_cpp.fromLookup(Pointer<T> lookup<T extends NativeType>(String symbolName)): The symbols are looked up with lookup.

Properties

GGML_TENSOR_SIZE → int: no setter
hashCode → int: The hash code for this object.
no setterinherited
runtimeType → Type: A representation of the runtime type of the object.
no setterinherited
sys_errlist ↔ Pointer<Pointer<Char>>: getter/setter pair
sys_nerr → int: no setter

Methods

asprintf(Pointer<Pointer<Char>> arg0, Pointer<Char> arg1) → int
clearerr(Pointer<FILE> arg0) → void
ctermid(Pointer<Char> arg0) → Pointer<Char>
ctermid_r(Pointer<Char> arg0) → Pointer<Char>
dprintf(int arg0, Pointer<Char> arg1) → int
fclose(Pointer<FILE> arg0) → int
fdopen(int arg0, Pointer<Char> arg1) → Pointer<FILE>
feof(Pointer<FILE> arg0) → int
ferror(Pointer<FILE> arg0) → int
fflush(Pointer<FILE> arg0) → int
fgetc(Pointer<FILE> arg0) → int
fgetln(Pointer<FILE> arg0, Pointer<Size> __len) → Pointer<Char>
fgetpos(Pointer<FILE> arg0, Pointer<fpos_t> arg1) → int
fgets(Pointer<Char> arg0, int __size, Pointer<FILE> arg2) → Pointer<Char>
fileno(Pointer<FILE> arg0) → int
flockfile(Pointer<FILE> arg0) → void
fmemopen(Pointer<Void> __buf, int __size, Pointer<Char> __mode) → Pointer<FILE>
fmtcheck(Pointer<Char> arg0, Pointer<Char> arg1) → Pointer<Char>
fopen(Pointer<Char> __filename, Pointer<Char> __mode) → Pointer<FILE>
fprintf(Pointer<FILE> arg0, Pointer<Char> arg1) → int
fpurge(Pointer<FILE> arg0) → int
fputc(int arg0, Pointer<FILE> arg1) → int
fputs(Pointer<Char> arg0, Pointer<FILE> arg1) → int
fread(Pointer<Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int
freopen(Pointer<Char> arg0, Pointer<Char> arg1, Pointer<FILE> arg2) → Pointer<FILE>
fscanf(Pointer<FILE> arg0, Pointer<Char> arg1) → int
fseek(Pointer<FILE> arg0, int arg1, int arg2) → int
fseeko(Pointer<FILE> __stream, int __offset, int __whence) → int
fsetpos(Pointer<FILE> arg0, Pointer<fpos_t> arg1) → int
ftell(Pointer<FILE> arg0) → int
ftello(Pointer<FILE> __stream) → int
ftrylockfile(Pointer<FILE> arg0) → int
funlockfile(Pointer<FILE> arg0) → void
funopen(Pointer<Void> arg0, Pointer<NativeFunction<Int Function(Pointer<Void>, Pointer<Char>, Int)>> arg1, Pointer<NativeFunction<Int Function(Pointer<Void>, Pointer<Char>, Int)>> arg2, Pointer<NativeFunction<fpos_t Function(Pointer<Void>, fpos_t, Int)>> arg3, Pointer<NativeFunction<Int Function(Pointer<Void>)>> arg4) → Pointer<FILE>
fwrite(Pointer<Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int
getc(Pointer<FILE> arg0) → int
getc_unlocked(Pointer<FILE> arg0) → int
getchar() → int
getchar_unlocked() → int
getdelim(Pointer<Pointer<Char>> __linep, Pointer<Size> __linecapp, int __delimiter, Pointer<FILE> __stream) → int
getline(Pointer<Pointer<Char>> __linep, Pointer<Size> __linecapp, Pointer<FILE> __stream) → int
gets(Pointer<Char> arg0) → Pointer<Char>
getw(Pointer<FILE> arg0) → int
ggml_abort(Pointer<Char> file, int line, Pointer<Char> fmt) → void
ggml_abs(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_abs_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_acc(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_acc_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_add(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_type type) → Pointer<ggml_tensor>
ggml_add_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor>
ggml_add_rel_pos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor>
ggml_arange(Pointer<ggml_context> ctx, double start, double stop, double step) → Pointer<ggml_tensor>
ggml_are_same_shape(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool
ggml_are_same_stride(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool
ggml_argmax(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_argsort(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_sort_order order) → Pointer<ggml_tensor>
ggml_backend_alloc_buffer(ggml_backend_t backend, int size) → ggml_backend_buffer_t
ggml_backend_alloc_ctx_tensors(Pointer<ggml_context> ctx, ggml_backend_t backend) → Pointer<ggml_backend_buffer>
ggml_backend_alloc_ctx_tensors_from_buft(Pointer<ggml_context> ctx, ggml_backend_buffer_type_t buft) → Pointer<ggml_backend_buffer>
ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, int value) → void
ggml_backend_buffer_free(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) → int
ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → int
ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) → Pointer<Void>
ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) → int
ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) → int
ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) → ggml_backend_buffer_type_t
ggml_backend_buffer_get_usage(ggml_backend_buffer_t buffer) → ggml_backend_buffer_usage
ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → ggml_status
ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) → bool
ggml_backend_buffer_name(ggml_backend_buffer_t buffer) → Pointer<Char>
ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, ggml_backend_buffer_usage usage) → void
ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, int size) → ggml_backend_buffer_t
ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) → int
ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, Pointer<ggml_tensor> tensor) → int
ggml_backend_buft_get_device(ggml_backend_buffer_type_t buft) → ggml_backend_dev_t
ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) → int
ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) → bool
ggml_backend_buft_name(ggml_backend_buffer_type_t buft) → Pointer<Char>
ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, Pointer<ggml_cgraph> graph, ggml_backend_eval_callback callback, Pointer<Void> user_data) → bool
ggml_backend_cpu_buffer_from_ptr(Pointer<Void> ptr, int size) → ggml_backend_buffer_t
ggml_backend_cpu_buffer_type() → ggml_backend_buffer_type_t
ggml_backend_cpu_init() → ggml_backend_t
ggml_backend_cpu_reg() → ggml_backend_reg_t
ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, Pointer<Void> abort_callback_data) → void
ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) → void
ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) → void
ggml_backend_dev_backend_reg(ggml_backend_dev_t device) → ggml_backend_reg_t
ggml_backend_dev_buffer_from_host_ptr(ggml_backend_dev_t device, Pointer<Void> ptr, int size, int max_tensor_size) → ggml_backend_buffer_t
ggml_backend_dev_buffer_type(ggml_backend_dev_t device) → ggml_backend_buffer_type_t
ggml_backend_dev_by_name(Pointer<Char> name) → ggml_backend_dev_t
ggml_backend_dev_by_type(ggml_backend_dev_type type) → ggml_backend_dev_t
ggml_backend_dev_count() → int
ggml_backend_dev_description(ggml_backend_dev_t device) → Pointer<Char>
ggml_backend_dev_get(int index) → ggml_backend_dev_t
ggml_backend_dev_get_props(ggml_backend_dev_t device, Pointer<ggml_backend_dev_props> props) → void
ggml_backend_dev_host_buffer_type(ggml_backend_dev_t device) → ggml_backend_buffer_type_t
ggml_backend_dev_init(ggml_backend_dev_t device, Pointer<Char> params) → ggml_backend_t
ggml_backend_dev_memory(ggml_backend_dev_t device, Pointer<Size> free, Pointer<Size> total) → void
ggml_backend_dev_name(ggml_backend_dev_t device) → Pointer<Char>
ggml_backend_dev_offload_op(ggml_backend_dev_t device, Pointer<ggml_tensor> op) → bool
ggml_backend_dev_supports_buft(ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) → bool
ggml_backend_dev_supports_op(ggml_backend_dev_t device, Pointer<ggml_tensor> op) → bool
ggml_backend_dev_type1(ggml_backend_dev_t device) → ggml_backend_dev_type
ggml_backend_device_register(ggml_backend_dev_t device) → void
ggml_backend_event_free(ggml_backend_event_t event) → void
ggml_backend_event_new(ggml_backend_dev_t device) → ggml_backend_event_t
ggml_backend_event_record(ggml_backend_event_t event, ggml_backend_t backend) → void
ggml_backend_event_synchronize(ggml_backend_event_t event) → void
ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) → void
ggml_backend_free(ggml_backend_t backend) → void
ggml_backend_get_alignment(ggml_backend_t backend) → int
ggml_backend_get_default_buffer_type(ggml_backend_t backend) → ggml_backend_buffer_type_t
ggml_backend_get_device(ggml_backend_t backend) → ggml_backend_dev_t
ggml_backend_get_max_size(ggml_backend_t backend) → int
ggml_backend_graph_compute(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) → ggml_status
ggml_backend_graph_compute_async(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) → ggml_status
ggml_backend_graph_copy1(ggml_backend_t backend, Pointer<ggml_cgraph> graph) → ggml_backend_graph_copy
ggml_backend_graph_copy_free(ggml_backend_graph_copy copy) → void
ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) → ggml_status
ggml_backend_graph_plan_create(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) → ggml_backend_graph_plan_t
ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void
ggml_backend_guid(ggml_backend_t backend) → ggml_guid_t
ggml_backend_init_best() → ggml_backend_t
ggml_backend_init_by_name(Pointer<Char> name, Pointer<Char> params) → ggml_backend_t
ggml_backend_init_by_type(ggml_backend_dev_type type, Pointer<Char> params) → ggml_backend_t
ggml_backend_is_cpu(ggml_backend_t backend) → bool
ggml_backend_load(Pointer<Char> path) → ggml_backend_reg_t
ggml_backend_load_all() → void
ggml_backend_load_all_from_path(Pointer<Char> dir_path) → void
ggml_backend_name(ggml_backend_t backend) → Pointer<Char>
ggml_backend_offload_op(ggml_backend_t backend, Pointer<ggml_tensor> op) → bool
ggml_backend_reg_by_name(Pointer<Char> name) → ggml_backend_reg_t
ggml_backend_reg_count() → int
ggml_backend_reg_dev_count(ggml_backend_reg_t reg) → int
ggml_backend_reg_dev_get(ggml_backend_reg_t reg, int index) → ggml_backend_dev_t
ggml_backend_reg_get(int index) → ggml_backend_reg_t
ggml_backend_reg_get_proc_address(ggml_backend_reg_t reg, Pointer<Char> name) → Pointer<Void>
ggml_backend_reg_name(ggml_backend_reg_t reg) → Pointer<Char>
ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) → bool
ggml_backend_sched_free(ggml_backend_sched_t sched) → void
ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) → ggml_backend_t
ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) → int
ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) → int
ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) → int
ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) → int
ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node) → ggml_backend_t
ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) → ggml_status
ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) → ggml_status
ggml_backend_sched_new(Pointer<ggml_backend_t> backends, Pointer<ggml_backend_buffer_type_t> bufts, int n_backends, int graph_size, bool parallel, bool op_offload) → ggml_backend_sched_t
ggml_backend_sched_reserve(ggml_backend_sched_t sched, Pointer<ggml_cgraph> measure_graph) → bool
ggml_backend_sched_reset(ggml_backend_sched_t sched) → void
ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, Pointer<Void> user_data) → void
ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node, ggml_backend_t backend) → void
ggml_backend_sched_synchronize(ggml_backend_sched_t sched) → void
ggml_backend_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) → bool
ggml_backend_supports_op(ggml_backend_t backend, Pointer<ggml_tensor> op) → bool
ggml_backend_synchronize(ggml_backend_t backend) → void
ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor, Pointer<Void> addr) → ggml_status
ggml_backend_tensor_copy(Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
ggml_backend_tensor_get(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_get_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_memset(Pointer<ggml_tensor> tensor, int value, int offset, int size) → void
ggml_backend_tensor_set(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_set_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_unload(ggml_backend_reg_t reg) → void
ggml_backend_view_init(Pointer<ggml_tensor> tensor) → ggml_status
ggml_bf16_to_fp32(ggml_bf16_t arg0) → double
ggml_bf16_to_fp32_row(Pointer<ggml_bf16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_blck_size(ggml_type type) → int
ggml_build_backward_expand(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph, Pointer<Pointer<ggml_tensor>> grad_accs) → void
ggml_build_forward_expand(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void
ggml_can_repeat(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool
ggml_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_type type) → Pointer<ggml_tensor>
ggml_clamp(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double min, double max) → Pointer<ggml_tensor>
ggml_concat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int dim) → Pointer<ggml_tensor>
ggml_cont(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_cont_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor>
ggml_cont_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_cont_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_cont_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_conv_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor>
ggml_conv_1d_dw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor>
ggml_conv_1d_dw_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int d0) → Pointer<ggml_tensor>
ggml_conv_1d_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s, int d) → Pointer<ggml_tensor>
ggml_conv_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor>
ggml_conv_2d_dw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor>
ggml_conv_2d_dw_direct(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride0, int stride1, int pad0, int pad1, int dilation0, int dilation1) → Pointer<ggml_tensor>
ggml_conv_2d_s1_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_conv_2d_sk_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_conv_transpose_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor>
ggml_conv_transpose_2d_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride) → Pointer<ggml_tensor>
ggml_cos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_cos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_count_equal(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cpu_bf16_to_fp32(Pointer<ggml_bf16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_cpu_fp16_to_fp32(Pointer<ggml_fp16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_cpu_fp32_to_bf16(Pointer<Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void
ggml_cpu_fp32_to_fp16(Pointer<Float> arg0, Pointer<ggml_fp16_t> arg1, int arg2) → void
ggml_cpu_get_sve_cnt() → int
ggml_cpu_has_amx_int8() → int
ggml_cpu_has_arm_fma() → int
ggml_cpu_has_avx() → int
ggml_cpu_has_avx2() → int
ggml_cpu_has_avx512() → int
ggml_cpu_has_avx512_bf16() → int
ggml_cpu_has_avx512_vbmi() → int
ggml_cpu_has_avx512_vnni() → int
ggml_cpu_has_avx_vnni() → int
ggml_cpu_has_bmi2() → int
ggml_cpu_has_dotprod() → int
ggml_cpu_has_f16c() → int
ggml_cpu_has_fma() → int
ggml_cpu_has_fp16_va() → int
ggml_cpu_has_llamafile() → int
ggml_cpu_has_matmul_int8() → int
ggml_cpu_has_neon() → int
ggml_cpu_has_riscv_v() → int
ggml_cpu_has_sme() → int
ggml_cpu_has_sse3() → int
ggml_cpu_has_ssse3() → int
ggml_cpu_has_sve() → int
ggml_cpu_has_vsx() → int
ggml_cpu_has_vxe() → int
ggml_cpu_has_wasm_simd() → int
ggml_cpu_init() → void
ggml_cpy(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cross_entropy_loss(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cross_entropy_loss_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor>
ggml_custom_4d(Pointer<ggml_context> ctx, ggml_type type, int ne0, int ne1, int ne2, int ne3, Pointer<Pointer<ggml_tensor>> args, int n_args, ggml_custom_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_custom_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<Pointer<ggml_tensor>> args, int n_args, ggml_custom_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_cycles() → int
ggml_cycles_per_ms() → int
ggml_diag(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_diag_mask_inf(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_inf_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_zero(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_zero_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_div(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_div_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_dup(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_dup_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_dup_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor>
ggml_element_size(Pointer<ggml_tensor> tensor) → int
ggml_elu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_elu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_exp(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_exp_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_flash_attn_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> d, bool masked) → Pointer<ggml_tensor>
ggml_flash_attn_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> mask, double scale, double max_bias, double logit_softcap) → Pointer<ggml_tensor>
ggml_flash_attn_ext_get_prec(Pointer<ggml_tensor> a) → ggml_prec
ggml_flash_attn_ext_set_prec(Pointer<ggml_tensor> a, ggml_prec prec) → void
ggml_fopen(Pointer<Char> fname, Pointer<Char> mode) → Pointer<FILE>
ggml_format_name(Pointer<ggml_tensor> tensor, Pointer<Char> fmt) → Pointer<ggml_tensor>
ggml_fp16_to_fp32(int arg0) → double
ggml_fp16_to_fp32_row(Pointer<ggml_fp16_t> arg0, Pointer<Float> arg1, int arg2) → void
ggml_fp32_to_bf16(double arg0) → ggml_bf16_t
ggml_fp32_to_bf16_row(Pointer<Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void
ggml_fp32_to_bf16_row_ref(Pointer<Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void
ggml_fp32_to_fp16(double arg0) → int
ggml_fp32_to_fp16_row(Pointer<Float> arg0, Pointer<ggml_fp16_t> arg1, int arg2) → void
ggml_free(Pointer<ggml_context> ctx) → void
ggml_ftype_to_ggml_type(ggml_ftype ftype) → ggml_type
ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, Pointer<ggml_cgraph> graph) → bool
ggml_gallocr_free(ggml_gallocr_t galloc) → void
ggml_gallocr_get_buffer_size(ggml_gallocr_t galloc, int buffer_id) → int
ggml_gallocr_new(ggml_backend_buffer_type_t buft) → ggml_gallocr_t
ggml_gallocr_new_n(Pointer<ggml_backend_buffer_type_t> bufts, int n_bufs) → ggml_gallocr_t
ggml_gallocr_reserve(ggml_gallocr_t galloc, Pointer<ggml_cgraph> graph) → bool
ggml_gallocr_reserve_n(ggml_gallocr_t galloc, Pointer<ggml_cgraph> graph, Pointer<Int> node_buffer_ids, Pointer<Int> leaf_buffer_ids) → bool
ggml_gated_linear_attn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> q, Pointer<ggml_tensor> g, Pointer<ggml_tensor> state, double scale) → Pointer<ggml_tensor>
ggml_gelu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_erf(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_erf_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_quick(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_quick_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_get_data(Pointer<ggml_tensor> tensor) → Pointer<Void>
ggml_get_data_f32(Pointer<ggml_tensor> tensor) → Pointer<Float>
ggml_get_f32_1d(Pointer<ggml_tensor> tensor, int i) → double
ggml_get_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) → double
ggml_get_first_tensor(Pointer<ggml_context> ctx) → Pointer<ggml_tensor>
ggml_get_i32_1d(Pointer<ggml_tensor> tensor, int i) → int
ggml_get_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) → int
ggml_get_max_tensor_size(Pointer<ggml_context> ctx) → int
ggml_get_mem_buffer(Pointer<ggml_context> ctx) → Pointer<Void>
ggml_get_mem_size(Pointer<ggml_context> ctx) → int
ggml_get_name(Pointer<ggml_tensor> tensor) → Pointer<Char>
ggml_get_next_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor>
ggml_get_no_alloc(Pointer<ggml_context> ctx) → bool
ggml_get_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int qh, int kh) → Pointer<ggml_tensor>
ggml_get_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_get_rows_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor>
ggml_get_tensor(Pointer<ggml_context> ctx, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_get_type_traits(ggml_type type) → Pointer<ggml_type_traits>
ggml_get_type_traits_cpu(ggml_type type) → Pointer<ggml_type_traits_cpu>
ggml_get_unary_op(Pointer<ggml_tensor> tensor) → ggml_unary_op
ggml_graph_add_node(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void
ggml_graph_clear(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_compute(Pointer<ggml_cgraph> cgraph, Pointer<ggml_cplan> cplan) → ggml_status
ggml_graph_compute_with_ctx(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph, int n_threads) → ggml_status
ggml_graph_cpy(Pointer<ggml_cgraph> src, Pointer<ggml_cgraph> dst) → void
ggml_graph_dump_dot(Pointer<ggml_cgraph> gb, Pointer<ggml_cgraph> gf, Pointer<Char> filename) → void
ggml_graph_dup(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph, bool force_grads) → Pointer<ggml_cgraph>
ggml_graph_get_grad(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor>
ggml_graph_get_grad_acc(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor>
ggml_graph_get_tensor(Pointer<ggml_cgraph> cgraph, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_graph_n_nodes(Pointer<ggml_cgraph> cgraph) → int
ggml_graph_node(Pointer<ggml_cgraph> cgraph, int i) → Pointer<ggml_tensor>
ggml_graph_nodes(Pointer<ggml_cgraph> cgraph) → Pointer<Pointer<ggml_tensor>>
ggml_graph_overhead() → int
ggml_graph_overhead_custom(int size, bool grads) → int
ggml_graph_plan(Pointer<ggml_cgraph> cgraph, int n_threads, Pointer<ggml_threadpool> threadpool) → ggml_cplan
ggml_graph_print(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_reset(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_size(Pointer<ggml_cgraph> cgraph) → int
ggml_group_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor>
ggml_group_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor>
ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) → bool
ggml_hardsigmoid(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_hardswish(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_im2col(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D, ggml_type dst_type) → Pointer<ggml_tensor>
ggml_im2col_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<Int64> ne, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D) → Pointer<ggml_tensor>
ggml_init(ggml_init_params params) → Pointer<ggml_context>
ggml_is_3d(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous_0(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous_1(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous_2(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous_channels(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguously_allocated(Pointer<ggml_tensor> tensor) → bool
ggml_is_empty(Pointer<ggml_tensor> tensor) → bool
ggml_is_matrix(Pointer<ggml_tensor> tensor) → bool
ggml_is_numa() → bool
ggml_is_permuted(Pointer<ggml_tensor> tensor) → bool
ggml_is_quantized(ggml_type type) → bool
ggml_is_scalar(Pointer<ggml_tensor> tensor) → bool
ggml_is_transposed(Pointer<ggml_tensor> tensor) → bool
ggml_is_vector(Pointer<ggml_tensor> tensor) → bool
ggml_l2_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_l2_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_leaky_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double negative_slope, bool inplace) → Pointer<ggml_tensor>
ggml_log(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_log_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_log_set(ggml_log_callback log_callback, Pointer<Void> user_data) → void
ggml_map_custom1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom2(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom2_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom3(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom3_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_mean(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_mul(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_mat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_mat_id(Pointer<ggml_context> ctx, Pointer<ggml_tensor> as1, Pointer<ggml_tensor> b, Pointer<ggml_tensor> ids) → Pointer<ggml_tensor>
ggml_mul_mat_set_prec(Pointer<ggml_tensor> a, ggml_prec prec) → void
ggml_n_dims(Pointer<ggml_tensor> tensor) → int
ggml_nbytes(Pointer<ggml_tensor> tensor) → int
ggml_nbytes_pad(Pointer<ggml_tensor> tensor) → int
ggml_neg(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_neg_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_nelements(Pointer<ggml_tensor> tensor) → int
ggml_new_buffer(Pointer<ggml_context> ctx, int nbytes) → Pointer<Void>
ggml_new_f32(Pointer<ggml_context> ctx, double value) → Pointer<ggml_tensor>
ggml_new_graph(Pointer<ggml_context> ctx) → Pointer<ggml_cgraph>
ggml_new_graph_custom(Pointer<ggml_context> ctx, int size, bool grads) → Pointer<ggml_cgraph>
ggml_new_i32(Pointer<ggml_context> ctx, int value) → Pointer<ggml_tensor>
ggml_new_tensor(Pointer<ggml_context> ctx, ggml_type type, int n_dims, Pointer<Int64> ne) → Pointer<ggml_tensor>
ggml_new_tensor_1d(Pointer<ggml_context> ctx, ggml_type type, int ne0) → Pointer<ggml_tensor>
ggml_new_tensor_2d(Pointer<ggml_context> ctx, ggml_type type, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_new_tensor_3d(Pointer<ggml_context> ctx, ggml_type type, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_new_tensor_4d(Pointer<ggml_context> ctx, ggml_type type, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_nrows(Pointer<ggml_tensor> tensor) → int
ggml_numa_init(ggml_numa_strategy numa) → void
ggml_op_desc(Pointer<ggml_tensor> t) → Pointer<Char>
ggml_op_name(ggml_op op) → Pointer<Char>
ggml_op_symbol(ggml_op op) → Pointer<Char>
ggml_opt_alloc(ggml_opt_context_t opt_ctx, bool backward) → void
ggml_opt_dataset_data(ggml_opt_dataset_t dataset) → Pointer<ggml_tensor>
ggml_opt_dataset_free(ggml_opt_dataset_t dataset) → void
ggml_opt_dataset_get_batch(ggml_opt_dataset_t dataset, Pointer<ggml_tensor> data_batch, Pointer<ggml_tensor> labels_batch, int ibatch) → void
ggml_opt_dataset_get_batch_host(ggml_opt_dataset_t dataset, Pointer<Void> data_batch, int nb_data_batch, Pointer<Void> labels_batch, int ibatch) → void
ggml_opt_dataset_init(ggml_type type_data, ggml_type type_label, int ne_datapoint, int ne_label, int ndata, int ndata_shard) → ggml_opt_dataset_t
ggml_opt_dataset_labels(ggml_opt_dataset_t dataset) → Pointer<ggml_tensor>
ggml_opt_dataset_ndata(ggml_opt_dataset_t dataset) → int
ggml_opt_dataset_shuffle(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, int idata) → void
ggml_opt_default_params(ggml_backend_sched_t backend_sched, ggml_opt_loss_type loss_type) → ggml_opt_params
ggml_opt_epoch(ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result_train, ggml_opt_result_t result_eval, int idata_split, ggml_opt_epoch_callback callback_train, ggml_opt_epoch_callback callback_eval) → void
ggml_opt_epoch_callback_progress_bar(bool train, ggml_opt_context_t opt_ctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result, int ibatch, int ibatch_max, int t_start_us) → void
ggml_opt_eval(ggml_opt_context_t opt_ctx, ggml_opt_result_t result) → void
ggml_opt_fit(ggml_backend_sched_t backend_sched, Pointer<ggml_context> ctx_compute, Pointer<ggml_tensor> inputs, Pointer<ggml_tensor> outputs, ggml_opt_dataset_t dataset, ggml_opt_loss_type loss_type, ggml_opt_get_optimizer_params get_opt_pars, int nepoch, int nbatch_logical, double val_split, bool silent) → void
ggml_opt_free(ggml_opt_context_t opt_ctx) → void
ggml_opt_get_constant_optimizer_params(Pointer<Void> userdata) → ggml_opt_optimizer_params
ggml_opt_get_default_optimizer_params(Pointer<Void> userdata) → ggml_opt_optimizer_params
ggml_opt_grad_acc(ggml_opt_context_t opt_ctx, Pointer<ggml_tensor> node) → Pointer<ggml_tensor>
ggml_opt_init(ggml_opt_params params) → ggml_opt_context_t
ggml_opt_inputs(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_labels(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_loss(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_ncorrect(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_outputs(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_pred(ggml_opt_context_t opt_ctx) → Pointer<ggml_tensor>
ggml_opt_prepare_alloc(ggml_opt_context_t opt_ctx, Pointer<ggml_context> ctx_compute, Pointer<ggml_cgraph> gf, Pointer<ggml_tensor> inputs, Pointer<ggml_tensor> outputs) → void
ggml_opt_reset(ggml_opt_context_t opt_ctx, bool optimizer) → void
ggml_opt_result_accuracy(ggml_opt_result_t result, Pointer<Double> accuracy, Pointer<Double> unc) → void
ggml_opt_result_free(ggml_opt_result_t result) → void
ggml_opt_result_init() → ggml_opt_result_t
ggml_opt_result_loss(ggml_opt_result_t result, Pointer<Double> loss, Pointer<Double> unc) → void
ggml_opt_result_ndata(ggml_opt_result_t result, Pointer<Int64> ndata) → void
ggml_opt_result_pred(ggml_opt_result_t result, Pointer<Int32> pred) → void
ggml_opt_result_reset(ggml_opt_result_t result) → void
ggml_opt_static_graphs(ggml_opt_context_t opt_ctx) → bool
ggml_opt_step_adamw(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> grad, Pointer<ggml_tensor> m, Pointer<ggml_tensor> v, Pointer<ggml_tensor> adamw_params) → Pointer<ggml_tensor>
ggml_out_prod(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_pad(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1, int p2, int p3) → Pointer<ggml_tensor>
ggml_pad_reflect_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1) → Pointer<ggml_tensor>
ggml_permute(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int axis0, int axis1, int axis2, int axis3) → Pointer<ggml_tensor>
ggml_pool_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_op_pool op, int k0, int s0, int p0) → Pointer<ggml_tensor>
ggml_pool_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_op_pool op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor>
ggml_pool_2d_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> af, ggml_op_pool op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor>
ggml_print_object(Pointer<ggml_object> obj) → void
ggml_print_objects(Pointer<ggml_context> ctx) → void
ggml_quantize_chunk(ggml_type type, Pointer<Float> src, Pointer<Void> dst, int start, int nrows, int n_per_row, Pointer<Float> imatrix) → int
ggml_quantize_free() → void
ggml_quantize_init(ggml_type type) → void
ggml_quantize_requires_imatrix(ggml_type type) → bool
ggml_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_relu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_repeat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_repeat_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_repeat_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_reset(Pointer<ggml_context> ctx) → void
ggml_reshape(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_reshape_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor>
ggml_reshape_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_reshape_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_reshape_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_rms_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_rms_norm_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double eps) → Pointer<ggml_tensor>
ggml_rms_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_roll(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int shift0, int shift1, int shift2, int shift3) → Pointer<ggml_tensor>
ggml_rope(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor>
ggml_rope_custom(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_custom_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_ext_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_ext_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor>
ggml_rope_multi(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_multi_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, Pointer<Int> sections, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_yarn_corr_dims(int n_dims, int n_ctx_orig, double freq_base, double beta_fast, double beta_slow, Pointer<Float> dims) → void
ggml_row_size(ggml_type type, int ne) → int
ggml_rwkv_wkv6(Pointer<ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> r, Pointer<ggml_tensor> tf, Pointer<ggml_tensor> td, Pointer<ggml_tensor> state) → Pointer<ggml_tensor>
ggml_rwkv_wkv7(Pointer<ggml_context> ctx, Pointer<ggml_tensor> r, Pointer<ggml_tensor> w, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> state) → Pointer<ggml_tensor>
ggml_scale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor>
ggml_scale_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor>
ggml_set(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_set_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor>
ggml_set_1d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor>
ggml_set_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor>
ggml_set_2d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor>
ggml_set_f32(Pointer<ggml_tensor> tensor, double value) → Pointer<ggml_tensor>
ggml_set_f32_1d(Pointer<ggml_tensor> tensor, int i, double value) → void
ggml_set_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, double value) → void
ggml_set_i32(Pointer<ggml_tensor> tensor, int value) → Pointer<ggml_tensor>
ggml_set_i32_1d(Pointer<ggml_tensor> tensor, int i, int value) → void
ggml_set_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, int value) → void
ggml_set_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_set_input(Pointer<ggml_tensor> tensor) → void
ggml_set_loss(Pointer<ggml_tensor> tensor) → void
ggml_set_name(Pointer<ggml_tensor> tensor, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_set_no_alloc(Pointer<ggml_context> ctx, bool no_alloc) → void
ggml_set_output(Pointer<ggml_tensor> tensor) → void
ggml_set_param(Pointer<ggml_tensor> tensor) → void
ggml_set_zero(Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor>
ggml_sgn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sgn_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sigmoid(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sigmoid_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_silu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_silu_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_silu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sin(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sin_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_soft_max(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_soft_max_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> mask, double scale, double max_bias) → Pointer<ggml_tensor>
ggml_soft_max_ext_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) → Pointer<ggml_tensor>
ggml_soft_max_ext_back_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double scale, double max_bias) → Pointer<ggml_tensor>
ggml_soft_max_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqr(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqr_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqrt(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqrt_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_ssm_conv(Pointer<ggml_context> ctx, Pointer<ggml_tensor> sx, Pointer<ggml_tensor> c) → Pointer<ggml_tensor>
ggml_ssm_scan(Pointer<ggml_context> ctx, Pointer<ggml_tensor> s, Pointer<ggml_tensor> x, Pointer<ggml_tensor> dt, Pointer<ggml_tensor> A, Pointer<ggml_tensor> B, Pointer<ggml_tensor> C) → Pointer<ggml_tensor>
ggml_status_to_string(ggml_status status) → Pointer<Char>
ggml_step(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_step_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sub(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_sub_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_sum(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sum_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tallocr_alloc(Pointer<ggml_tallocr> talloc, Pointer<ggml_tensor> tensor) → ggml_status
ggml_tallocr_new(ggml_backend_buffer_t buffer) → ggml_tallocr
ggml_tanh(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tanh_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tensor_overhead() → int
ggml_threadpool_free(Pointer<ggml_threadpool> threadpool) → void
ggml_threadpool_get_n_threads(Pointer<ggml_threadpool> threadpool) → int
ggml_threadpool_new(Pointer<ggml_threadpool_params> params) → Pointer<ggml_threadpool>
ggml_threadpool_params_default(int n_threads) → ggml_threadpool_params
ggml_threadpool_params_init(Pointer<ggml_threadpool_params> p, int n_threads) → void
ggml_threadpool_params_match(Pointer<ggml_threadpool_params> p0, Pointer<ggml_threadpool_params> p1) → bool
ggml_threadpool_pause(Pointer<ggml_threadpool> threadpool) → void
ggml_threadpool_resume(Pointer<ggml_threadpool> threadpool) → void
ggml_time_init() → void
ggml_time_ms() → int
ggml_time_us() → int
ggml_timestep_embedding(Pointer<ggml_context> ctx, Pointer<ggml_tensor> timesteps, int dim, int max_period) → Pointer<ggml_tensor>
ggml_top_k(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int k) → Pointer<ggml_tensor>
ggml_transpose(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_type_name(ggml_type type) → Pointer<Char>
ggml_type_size(ggml_type type) → int
ggml_type_sizef(ggml_type type) → double
ggml_unary(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op op) → Pointer<ggml_tensor>
ggml_unary_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op op) → Pointer<ggml_tensor>
ggml_unary_op_name(ggml_unary_op op) → Pointer<Char>
ggml_unravel_index(Pointer<ggml_tensor> tensor, int i, Pointer<Int64> i0, Pointer<Int64> i1, Pointer<Int64> i2, Pointer<Int64> i3) → void
ggml_upscale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int scale_factor, ggml_scale_mode mode) → Pointer<ggml_tensor>
ggml_upscale_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, ggml_scale_mode mode) → Pointer<ggml_tensor>
ggml_used_mem(Pointer<ggml_context> ctx) → int
ggml_validate_row_data(ggml_type type, Pointer<Void> data, int nbytes) → bool
ggml_view_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int offset) → Pointer<ggml_tensor>
ggml_view_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int nb1, int offset) → Pointer<ggml_tensor>
ggml_view_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int nb1, int nb2, int offset) → Pointer<ggml_tensor>
ggml_view_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_view_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor>
ggml_win_part(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w) → Pointer<ggml_tensor>
ggml_win_unpart(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w0, int h0, int w) → Pointer<ggml_tensor>
llama_adapter_lora_free(Pointer<llama_adapter_lora> adapter) → void
llama_adapter_lora_init(Pointer<llama_model> model, Pointer<Char> path_lora) → Pointer<llama_adapter_lora>
llama_add_bos_token(Pointer<llama_vocab> vocab) → bool
llama_add_eos_token(Pointer<llama_vocab> vocab) → bool
llama_apply_adapter_cvec(Pointer<llama_context> ctx, Pointer<Float> data, int len, int n_embd, int il_start, int il_end) → int
llama_attach_threadpool(Pointer<llama_context> ctx, ggml_threadpool_t threadpool, ggml_threadpool_t threadpool_batch) → void
llama_backend_free() → void
llama_backend_init() → void
llama_batch_free(llama_batch batch) → void
llama_batch_get_one(Pointer<llama_token> tokens, int n_tokens) → llama_batch
llama_batch_init(int n_tokens, int embd, int n_seq_max) → llama_batch
llama_chat_apply_template(Pointer<Char> tmpl, Pointer<llama_chat_message> chat, int n_msg, bool add_ass, Pointer<Char> buf, int length) → int: Apply chat template. Inspired by hf apply_chat_template() on python. Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model" NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
llama_chat_builtin_templates(Pointer<Pointer<Char>> output, int len) → int
llama_clear_adapter_lora(Pointer<llama_context> ctx) → void
llama_context_default_params() → llama_context_params
llama_copy_state_data(Pointer<llama_context> ctx, Pointer<Uint8> dst) → int
llama_decode(Pointer<llama_context> ctx, llama_batch batch) → int
llama_detach_threadpool(Pointer<llama_context> ctx) → void
llama_detokenize(Pointer<llama_vocab> vocab, Pointer<llama_token> tokens, int n_tokens, Pointer<Char> text, int text_len_max, bool remove_special, bool unparse_special) → int: @details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
llama_encode(Pointer<llama_context> ctx, llama_batch batch) → int
llama_free(Pointer<llama_context> ctx) → void
llama_free_model(Pointer<llama_model> model) → void
llama_get_embeddings(Pointer<llama_context> ctx) → Pointer<Float>
llama_get_embeddings_ith(Pointer<llama_context> ctx, int i) → Pointer<Float>
llama_get_embeddings_seq(Pointer<llama_context> ctx, int seq_id) → Pointer<Float>
llama_get_kv_self(Pointer<llama_context> ctx) → Pointer<llama_kv_cache>
llama_get_logits(Pointer<llama_context> ctx) → Pointer<Float>
llama_get_logits_ith(Pointer<llama_context> ctx, int i) → Pointer<Float>
llama_get_memory(Pointer<llama_context> ctx) → llama_memory_t
llama_get_model(Pointer<llama_context> ctx) → Pointer<llama_model>
llama_get_state_size(Pointer<llama_context> ctx) → int
llama_init_from_model(Pointer<llama_model> model, llama_context_params params) → Pointer<llama_context>
llama_kv_self_can_shift(Pointer<llama_context> ctx) → bool
llama_kv_self_clear(Pointer<llama_context> ctx) → void
llama_kv_self_defrag(Pointer<llama_context> ctx) → void
llama_kv_self_n_tokens(Pointer<llama_context> ctx) → int
llama_kv_self_seq_add(Pointer<llama_context> ctx, int seq_id, int p0, int p1, int delta) → void
llama_kv_self_seq_cp(Pointer<llama_context> ctx, int seq_id_src, int seq_id_dst, int p0, int p1) → void
llama_kv_self_seq_div(Pointer<llama_context> ctx, int seq_id, int p0, int p1, int d) → void
llama_kv_self_seq_keep(Pointer<llama_context> ctx, int seq_id) → void
llama_kv_self_seq_pos_max(Pointer<llama_context> ctx, int seq_id) → int
llama_kv_self_seq_pos_min(Pointer<llama_context> ctx, int seq_id) → int
llama_kv_self_seq_rm(Pointer<llama_context> ctx, int seq_id, int p0, int p1) → bool
llama_kv_self_update(Pointer<llama_context> ctx) → void
llama_kv_self_used_cells(Pointer<llama_context> ctx) → int
llama_load_model_from_file(Pointer<Char> path_model, llama_model_params params) → Pointer<llama_model>
llama_load_session_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → bool
llama_log_set(ggml_log_callback log_callback, Pointer<Void> user_data) → void
llama_max_devices() → int
llama_max_parallel_sequences() → int
llama_memory_can_shift(llama_memory_t mem) → bool
llama_memory_clear(llama_memory_t mem, bool data) → void
llama_memory_seq_add(llama_memory_t mem, int seq_id, int p0, int p1, int delta) → void
llama_memory_seq_cp(llama_memory_t mem, int seq_id_src, int seq_id_dst, int p0, int p1) → void
llama_memory_seq_div(llama_memory_t mem, int seq_id, int p0, int p1, int d) → void
llama_memory_seq_keep(llama_memory_t mem, int seq_id) → void
llama_memory_seq_pos_max(llama_memory_t mem, int seq_id) → int
llama_memory_seq_pos_min(llama_memory_t mem, int seq_id) → int
llama_memory_seq_rm(llama_memory_t mem, int seq_id, int p0, int p1) → bool
llama_model_chat_template(Pointer<llama_model> model, Pointer<Char> name) → Pointer<Char>
llama_model_cls_label(Pointer<llama_model> model, int i) → Pointer<Char>
llama_model_decoder_start_token(Pointer<llama_model> model) → int
llama_model_default_params() → llama_model_params
llama_model_desc(Pointer<llama_model> model, Pointer<Char> buf, int buf_size) → int
llama_model_free(Pointer<llama_model> model) → void
llama_model_get_vocab(Pointer<llama_model> model) → Pointer<llama_vocab>
llama_model_has_decoder(Pointer<llama_model> model) → bool
llama_model_has_encoder(Pointer<llama_model> model) → bool
llama_model_is_recurrent(Pointer<llama_model> model) → bool
llama_model_load_from_file(Pointer<Char> path_model, llama_model_params params) → Pointer<llama_model>
llama_model_load_from_splits(Pointer<Pointer<Char>> paths, int n_paths, llama_model_params params) → Pointer<llama_model>
llama_model_meta_count(Pointer<llama_model> model) → int
llama_model_meta_key_by_index(Pointer<llama_model> model, int i, Pointer<Char> buf, int buf_size) → int
llama_model_meta_val_str(Pointer<llama_model> model, Pointer<Char> key, Pointer<Char> buf, int buf_size) → int
llama_model_meta_val_str_by_index(Pointer<llama_model> model, int i, Pointer<Char> buf, int buf_size) → int
llama_model_n_cls_out(Pointer<llama_model> model) → int
llama_model_n_ctx_train(Pointer<llama_model> model) → int
llama_model_n_embd(Pointer<llama_model> model) → int
llama_model_n_head(Pointer<llama_model> model) → int
llama_model_n_head_kv(Pointer<llama_model> model) → int
llama_model_n_layer(Pointer<llama_model> model) → int
llama_model_n_params(Pointer<llama_model> model) → int
llama_model_n_swa(Pointer<llama_model> model) → int
llama_model_quantize(Pointer<Char> fname_inp, Pointer<Char> fname_out, Pointer<llama_model_quantize_params> params) → int
llama_model_quantize_default_params() → llama_model_quantize_params
llama_model_rope_freq_scale_train(Pointer<llama_model> model) → double
llama_model_rope_type(Pointer<llama_model> model) → llama_rope_type
llama_model_save_to_file(Pointer<llama_model> model, Pointer<Char> path_model) → void
llama_model_size(Pointer<llama_model> model) → int
llama_n_batch(Pointer<llama_context> ctx) → int
llama_n_ctx(Pointer<llama_context> ctx) → int
llama_n_ctx_train(Pointer<llama_model> model) → int
llama_n_embd(Pointer<llama_model> model) → int
llama_n_head(Pointer<llama_model> model) → int
llama_n_layer(Pointer<llama_model> model) → int
llama_n_seq_max(Pointer<llama_context> ctx) → int
llama_n_threads(Pointer<llama_context> ctx) → int
llama_n_threads_batch(Pointer<llama_context> ctx) → int
llama_n_ubatch(Pointer<llama_context> ctx) → int
llama_n_vocab(Pointer<llama_vocab> vocab) → int
llama_new_context_with_model(Pointer<llama_model> model, llama_context_params params) → Pointer<llama_context>
llama_numa_init(ggml_numa_strategy numa) → void
llama_opt_epoch(Pointer<llama_context> lctx, ggml_opt_dataset_t dataset, ggml_opt_result_t result_train, ggml_opt_result_t result_eval, int idata_split, ggml_opt_epoch_callback callback_train, ggml_opt_epoch_callback callback_eval) → void
llama_opt_init(Pointer<llama_context> lctx, Pointer<llama_model> model, llama_opt_params lopt_params) → void
llama_opt_param_filter_all(Pointer<ggml_tensor> tensor, Pointer<Void> userdata) → bool
llama_perf_context(Pointer<llama_context> ctx) → llama_perf_context_data
llama_perf_context_print(Pointer<llama_context> ctx) → void
llama_perf_context_reset(Pointer<llama_context> ctx) → void
llama_perf_sampler(Pointer<llama_sampler> chain) → llama_perf_sampler_data
llama_perf_sampler_print(Pointer<llama_sampler> chain) → void
llama_perf_sampler_reset(Pointer<llama_sampler> chain) → void
llama_pooling_type1(Pointer<llama_context> ctx) → llama_pooling_type
llama_print_system_info() → Pointer<Char>
llama_rm_adapter_lora(Pointer<llama_context> ctx, Pointer<llama_adapter_lora> adapter) → int
llama_sampler_accept(Pointer<llama_sampler> smpl, int token) → void
llama_sampler_apply(Pointer<llama_sampler> smpl, Pointer<llama_token_data_array> cur_p) → void
llama_sampler_chain_add(Pointer<llama_sampler> chain, Pointer<llama_sampler> smpl) → void
llama_sampler_chain_default_params() → llama_sampler_chain_params
llama_sampler_chain_get(Pointer<llama_sampler> chain, int i) → Pointer<llama_sampler>
llama_sampler_chain_init(llama_sampler_chain_params params) → Pointer<llama_sampler>
llama_sampler_chain_n(Pointer<llama_sampler> chain) → int
llama_sampler_chain_remove(Pointer<llama_sampler> chain, int i) → Pointer<llama_sampler>
llama_sampler_clone(Pointer<llama_sampler> smpl) → Pointer<llama_sampler>
llama_sampler_free(Pointer<llama_sampler> smpl) → void
llama_sampler_get_seed(Pointer<llama_sampler> smpl) → int
llama_sampler_init(Pointer<llama_sampler_i> iface, llama_sampler_context_t ctx) → Pointer<llama_sampler>
llama_sampler_init_dist(int seed) → Pointer<llama_sampler>
llama_sampler_init_dry(Pointer<llama_vocab> vocab, int n_ctx_train, double dry_multiplier, double dry_base, int dry_allowed_length, int dry_penalty_last_n, Pointer<Pointer<Char>> seq_breakers, int num_breakers) → Pointer<llama_sampler>: @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
llama_sampler_init_grammar(Pointer<llama_vocab> vocab, Pointer<Char> grammar_str, Pointer<Char> grammar_root) → Pointer<llama_sampler>: @details Intializes a GBNF grammar, see grammars/README.md for details. @param vocab The vocabulary that this grammar will be used with. @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails. @param grammar_root The name of the start symbol for the grammar.
llama_sampler_init_grammar_lazy(Pointer<llama_vocab> vocab, Pointer<Char> grammar_str, Pointer<Char> grammar_root, Pointer<Pointer<Char>> trigger_words, int num_trigger_words, Pointer<llama_token> trigger_tokens, int num_trigger_tokens) → Pointer<llama_sampler>
llama_sampler_init_grammar_lazy_patterns(Pointer<llama_vocab> vocab, Pointer<Char> grammar_str, Pointer<Char> grammar_root, Pointer<Pointer<Char>> trigger_patterns, int num_trigger_patterns, Pointer<llama_token> trigger_tokens, int num_trigger_tokens) → Pointer<llama_sampler>: @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639 @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group. @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.
llama_sampler_init_greedy() → Pointer<llama_sampler>
llama_sampler_init_infill(Pointer<llama_vocab> vocab) → Pointer<llama_sampler>
llama_sampler_init_logit_bias(int n_vocab, int n_logit_bias, Pointer<llama_logit_bias> logit_bias) → Pointer<llama_sampler>
llama_sampler_init_min_p(double p, int min_keep) → Pointer<llama_sampler>: @details Minimum P sampling as described in https://github.com/ggml-org/llama.cpp/pull/3841
llama_sampler_init_mirostat(int n_vocab, int seed, double tau, double eta, int m) → Pointer<llama_sampler>: @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_mirostat_v2(int seed, double tau, double eta) → Pointer<llama_sampler>: @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sampler_init_penalties(int penalty_last_n, double penalty_repeat, double penalty_freq, double penalty_present) → Pointer<llama_sampler>: NOTE: Avoid using on the full vocabulary as searching for repeated tokens can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_softmax() → Pointer<llama_sampler>: @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
llama_sampler_init_temp(double t) → Pointer<llama_sampler>: #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
llama_sampler_init_temp_ext(double t, double delta, double exponent) → Pointer<llama_sampler>: @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
llama_sampler_init_top_k(int k) → Pointer<llama_sampler>: @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751 Setting k <= 0 makes this a noop
llama_sampler_init_top_n_sigma(double n) → Pointer<llama_sampler>: @details Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641
llama_sampler_init_top_p(double p, int min_keep) → Pointer<llama_sampler>: @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
llama_sampler_init_typical(double p, int min_keep) → Pointer<llama_sampler>: @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_sampler_init_xtc(double p, double t, int min_keep, int seed) → Pointer<llama_sampler>: @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
llama_sampler_name(Pointer<llama_sampler> smpl) → Pointer<Char>
llama_sampler_reset(Pointer<llama_sampler> smpl) → void
llama_sampler_sample(Pointer<llama_sampler> smpl, Pointer<llama_context> ctx, int idx) → int
llama_save_session_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens, int n_token_count) → bool
llama_set_abort_callback(Pointer<llama_context> ctx, ggml_abort_callback abort_callback, Pointer<Void> abort_callback_data) → void
llama_set_adapter_lora(Pointer<llama_context> ctx, Pointer<llama_adapter_lora> adapter, double scale) → int
llama_set_causal_attn(Pointer<llama_context> ctx, bool causal_attn) → void
llama_set_embeddings(Pointer<llama_context> ctx, bool embeddings) → void
llama_set_n_threads(Pointer<llama_context> ctx, int n_threads, int n_threads_batch) → void
llama_set_state_data(Pointer<llama_context> ctx, Pointer<Uint8> src) → int
llama_set_warmup(Pointer<llama_context> ctx, bool warmup) → void
llama_split_path(Pointer<Char> split_path, int maxlen, Pointer<Char> path_prefix, int split_no, int split_count) → int: @details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
llama_split_prefix(Pointer<Char> split_prefix, int maxlen, Pointer<Char> split_path, int split_no, int split_count) → int: @details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
llama_state_get_data(Pointer<llama_context> ctx, Pointer<Uint8> dst, int size) → int
llama_state_get_size(Pointer<llama_context> ctx) → int
llama_state_load_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → bool
llama_state_save_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens, int n_token_count) → bool
llama_state_seq_get_data(Pointer<llama_context> ctx, Pointer<Uint8> dst, int size, int seq_id) → int
llama_state_seq_get_size(Pointer<llama_context> ctx, int seq_id) → int
llama_state_seq_load_file(Pointer<llama_context> ctx, Pointer<Char> filepath, int dest_seq_id, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → int
llama_state_seq_save_file(Pointer<llama_context> ctx, Pointer<Char> filepath, int seq_id, Pointer<llama_token> tokens, int n_token_count) → int
llama_state_seq_set_data(Pointer<llama_context> ctx, Pointer<Uint8> src, int size, int dest_seq_id) → int
llama_state_set_data(Pointer<llama_context> ctx, Pointer<Uint8> src, int size) → int
llama_supports_gpu_offload() → bool
llama_supports_mlock() → bool
llama_supports_mmap() → bool
llama_supports_rpc() → bool
llama_synchronize(Pointer<llama_context> ctx) → void
llama_time_us() → int
llama_token_bos(Pointer<llama_vocab> vocab) → int
llama_token_cls(Pointer<llama_vocab> vocab) → int
llama_token_eos(Pointer<llama_vocab> vocab) → int
llama_token_eot(Pointer<llama_vocab> vocab) → int
llama_token_fim_mid(Pointer<llama_vocab> vocab) → int
llama_token_fim_pad(Pointer<llama_vocab> vocab) → int
llama_token_fim_pre(Pointer<llama_vocab> vocab) → int
llama_token_fim_rep(Pointer<llama_vocab> vocab) → int
llama_token_fim_sep(Pointer<llama_vocab> vocab) → int
llama_token_fim_suf(Pointer<llama_vocab> vocab) → int
llama_token_get_attr(Pointer<llama_vocab> vocab, Dartllama_token token) → llama_token_attr
llama_token_get_score(Pointer<llama_vocab> vocab, int token) → double
llama_token_get_text(Pointer<llama_vocab> vocab, int token) → Pointer<Char>
llama_token_is_control(Pointer<llama_vocab> vocab, int token) → bool
llama_token_is_eog(Pointer<llama_vocab> vocab, int token) → bool
llama_token_nl(Pointer<llama_vocab> vocab) → int
llama_token_pad(Pointer<llama_vocab> vocab) → int
llama_token_sep(Pointer<llama_vocab> vocab) → int
llama_token_to_piece(Pointer<llama_vocab> vocab, int token, Pointer<Char> buf, int length, int lstrip, bool special) → int
llama_tokenize(Pointer<llama_vocab> vocab, Pointer<Char> text, int text_len, Pointer<llama_token> tokens, int n_tokens_max, bool add_special, bool parse_special) → int: @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit) @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_bos(Pointer<llama_vocab> vocab) → int
llama_vocab_cls(Pointer<llama_vocab> vocab) → int
llama_vocab_eos(Pointer<llama_vocab> vocab) → int
llama_vocab_eot(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_mid(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_pad(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_pre(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_rep(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_sep(Pointer<llama_vocab> vocab) → int
llama_vocab_fim_suf(Pointer<llama_vocab> vocab) → int
llama_vocab_get_add_bos(Pointer<llama_vocab> vocab) → bool
llama_vocab_get_add_eos(Pointer<llama_vocab> vocab) → bool
llama_vocab_get_add_sep(Pointer<llama_vocab> vocab) → bool
llama_vocab_get_attr(Pointer<llama_vocab> vocab, Dartllama_token token) → llama_token_attr
llama_vocab_get_score(Pointer<llama_vocab> vocab, int token) → double
llama_vocab_get_text(Pointer<llama_vocab> vocab, int token) → Pointer<Char>
llama_vocab_is_control(Pointer<llama_vocab> vocab, int token) → bool
llama_vocab_is_eog(Pointer<llama_vocab> vocab, int token) → bool
llama_vocab_n_tokens(Pointer<llama_vocab> vocab) → int
llama_vocab_nl(Pointer<llama_vocab> vocab) → int
llama_vocab_pad(Pointer<llama_vocab> vocab) → int
llama_vocab_sep(Pointer<llama_vocab> vocab) → int
llama_vocab_type1(Pointer<llama_vocab> vocab) → llama_vocab_type
noSuchMethod(Invocation invocation) → dynamic: Invoked when a nonexistent method or property is accessed.
inherited
open_memstream(Pointer<Pointer<Char>> __bufp, Pointer<Size> __sizep) → Pointer<FILE>
pclose(Pointer<FILE> arg0) → int
perror(Pointer<Char> arg0) → void
popen(Pointer<Char> arg0, Pointer<Char> arg1) → Pointer<FILE>
printf(Pointer<Char> arg0) → int
putc(int arg0, Pointer<FILE> arg1) → int
putc_unlocked(int arg0, Pointer<FILE> arg1) → int
putchar(int arg0) → int
putchar_unlocked(int arg0) → int
puts(Pointer<Char> arg0) → int
putw(int arg0, Pointer<FILE> arg1) → int
remove(Pointer<Char> arg0) → int
rename(Pointer<Char> __old, Pointer<Char> __new) → int
renameat(int arg0, Pointer<Char> arg1, int arg2, Pointer<Char> arg3) → int
renameatx_np(int arg0, Pointer<Char> arg1, int arg2, Pointer<Char> arg3, int arg4) → int
renamex_np(Pointer<Char> arg0, Pointer<Char> arg1, int arg2) → int
rewind(Pointer<FILE> arg0) → void
scanf(Pointer<Char> arg0) → int
setbuf(Pointer<FILE> arg0, Pointer<Char> arg1) → void
setbuffer(Pointer<FILE> arg0, Pointer<Char> arg1, int __size) → void
setlinebuf(Pointer<FILE> arg0) → int
setvbuf(Pointer<FILE> arg0, Pointer<Char> arg1, int arg2, int __size) → int
snprintf(Pointer<Char> __str, int __size, Pointer<Char> __format) → int
sprintf(Pointer<Char> arg0, Pointer<Char> arg1) → int
sscanf(Pointer<Char> arg0, Pointer<Char> arg1) → int
tempnam(Pointer<Char> __dir, Pointer<Char> __prefix) → Pointer<Char>
tmpfile() → Pointer<FILE>
tmpnam(Pointer<Char> arg0) → Pointer<Char>
toString() → String: A string representation of this object.
inherited
ungetc(int arg0, Pointer<FILE> arg1) → int
vasprintf(Pointer<Pointer<Char>> arg0, Pointer<Char> arg1, va_list arg2) → int
vdprintf(int arg0, Pointer<Char> arg1, va_list arg2) → int
vfprintf(Pointer<FILE> arg0, Pointer<Char> arg1, va_list arg2) → int
vfscanf(Pointer<FILE> __stream, Pointer<Char> __format, va_list arg2) → int
vprintf(Pointer<Char> arg0, va_list arg1) → int
vscanf(Pointer<Char> __format, va_list arg1) → int
vsnprintf(Pointer<Char> __str, int __size, Pointer<Char> __format, va_list arg3) → int
vsprintf(Pointer<Char> arg0, Pointer<Char> arg1, va_list arg2) → int
vsscanf(Pointer<Char> __str, Pointer<Char> __format, va_list arg2) → int

Operators

operator ==(Object other) → bool: The equality operator.
inherited

llama_cpp class

Constructors

Properties

Methods

Operators

llama_cpp_dart_ffi library