llama_cpp class

llama.cpp binding

Constructors

llama_cpp(DynamicLibrary dynamicLibrary): The symbols are looked up in dynamicLibrary.
llama_cpp.fromLookup(Pointer<T> lookup<T extends NativeType>(String symbolName)): The symbols are looked up with lookup.

Properties

GGML_OBJECT_SIZE → int: no setter
GGML_TENSOR_SIZE → int: no setter
hashCode → int: The hash code for this object.
no setterinherited
runtimeType → Type: A representation of the runtime type of the object.
no setterinherited
sys_errlist ↔ Pointer<Pointer<Char>>: getter/setter pair
sys_nerr → int: no setter

Methods

asprintf(Pointer<Pointer<Char>> arg0, Pointer<Char> arg1) → int
clearerr(Pointer<FILE> arg0) → void
ctermid(Pointer<Char> arg0) → Pointer<Char>
ctermid_r(Pointer<Char> arg0) → Pointer<Char>
dprintf(int arg0, Pointer<Char> arg1) → int
fclose(Pointer<FILE> arg0) → int
fdopen(int arg0, Pointer<Char> arg1) → Pointer<FILE>
feof(Pointer<FILE> arg0) → int
ferror(Pointer<FILE> arg0) → int
fflush(Pointer<FILE> arg0) → int
fgetc(Pointer<FILE> arg0) → int
fgetln(Pointer<FILE> arg0, Pointer<Size> arg1) → Pointer<Char>
fgetpos(Pointer<FILE> arg0, Pointer<fpos_t> arg1) → int
fgets(Pointer<Char> arg0, int arg1, Pointer<FILE> arg2) → Pointer<Char>
fileno(Pointer<FILE> arg0) → int
flockfile(Pointer<FILE> arg0) → void
fmemopen(Pointer<Void> __buf, int __size, Pointer<Char> __mode) → Pointer<FILE>
fmtcheck(Pointer<Char> arg0, Pointer<Char> arg1) → Pointer<Char>
fopen(Pointer<Char> __filename, Pointer<Char> __mode) → Pointer<FILE>
fprintf(Pointer<FILE> arg0, Pointer<Char> arg1) → int
fpurge(Pointer<FILE> arg0) → int
fputc(int arg0, Pointer<FILE> arg1) → int
fputs(Pointer<Char> arg0, Pointer<FILE> arg1) → int
fread(Pointer<Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int
freopen(Pointer<Char> arg0, Pointer<Char> arg1, Pointer<FILE> arg2) → Pointer<FILE>
fscanf(Pointer<FILE> arg0, Pointer<Char> arg1) → int
fseek(Pointer<FILE> arg0, int arg1, int arg2) → int
fseeko(Pointer<FILE> __stream, int __offset, int __whence) → int
fsetpos(Pointer<FILE> arg0, Pointer<fpos_t> arg1) → int
ftell(Pointer<FILE> arg0) → int
ftello(Pointer<FILE> __stream) → int
ftrylockfile(Pointer<FILE> arg0) → int
funlockfile(Pointer<FILE> arg0) → void
funopen(Pointer<Void> arg0, Pointer<NativeFunction<Int Function(Pointer<Void>, Pointer<Char>, Int)>> arg1, Pointer<NativeFunction<Int Function(Pointer<Void>, Pointer<Char>, Int)>> arg2, Pointer<NativeFunction<fpos_t Function(Pointer<Void>, fpos_t, Int)>> arg3, Pointer<NativeFunction<Int Function(Pointer<Void>)>> arg4) → Pointer<FILE>
fwrite(Pointer<Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int
getc(Pointer<FILE> arg0) → int
getc_unlocked(Pointer<FILE> arg0) → int
getchar() → int
getchar_unlocked() → int
getdelim(Pointer<Pointer<Char>> __linep, Pointer<Size> __linecapp, int __delimiter, Pointer<FILE> __stream) → int
getline(Pointer<Pointer<Char>> __linep, Pointer<Size> __linecapp, Pointer<FILE> __stream) → int
gets(Pointer<Char> arg0) → Pointer<Char>
getw(Pointer<FILE> arg0) → int
ggml_abs(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_abs_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_acc(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_acc_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_add(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int type) → Pointer<ggml_tensor>
ggml_add_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_add_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor>
ggml_add_rel_pos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor>
ggml_alibi(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past, int n_head, double bias_max) → Pointer<ggml_tensor>
ggml_allocr_alloc(ggml_allocr_t alloc, Pointer<ggml_tensor> tensor) → void
ggml_allocr_alloc_graph(ggml_allocr_t alloc, Pointer<ggml_cgraph> graph) → int
ggml_allocr_free(ggml_allocr_t alloc) → void
ggml_allocr_get_buffer(ggml_allocr_t alloc) → Pointer<ggml_backend_buffer>
ggml_allocr_is_measure(ggml_allocr_t alloc) → bool
ggml_allocr_max_size(ggml_allocr_t alloc) → int
ggml_allocr_new(Pointer<Void> data, int size, int alignment) → ggml_allocr_t
ggml_allocr_new_from_backend(Pointer<ggml_backend> backend, int size) → ggml_allocr_t
ggml_allocr_new_from_buffer(Pointer<ggml_backend_buffer> buffer) → ggml_allocr_t
ggml_allocr_new_measure(int alignment) → ggml_allocr_t
ggml_allocr_new_measure_from_backend(Pointer<ggml_backend> backend) → ggml_allocr_t
ggml_allocr_reset(ggml_allocr_t alloc) → void
ggml_allocr_set_parse_seq(ggml_allocr_t alloc, Pointer<Int> list, int n) → void
ggml_are_same_shape(Pointer<ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool
ggml_argmax(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_argsort(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int order) → Pointer<ggml_tensor>
ggml_backend_alloc_buffer(ggml_backend_t backend, int size) → ggml_backend_buffer_t
ggml_backend_alloc_ctx_tensors(Pointer<ggml_context> ctx, Pointer<ggml_backend> backend) → Pointer<ggml_backend_buffer>
ggml_backend_alloc_ctx_tensors_from_buft(Pointer<ggml_context> ctx, Pointer<ggml_backend_buffer_type> buft) → Pointer<ggml_backend_buffer>
ggml_backend_buffer_clear(ggml_backend_buffer_t buffer, int value) → void
ggml_backend_buffer_free(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_get_alignment(ggml_backend_buffer_t buffer) → int
ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → int
ggml_backend_buffer_get_base(ggml_backend_buffer_t buffer) → Pointer<Void>
ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) → int
ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) → ggml_backend_buffer_type_t
ggml_backend_buffer_init_tensor(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → void
ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) → bool
ggml_backend_buffer_name(ggml_backend_buffer_t buffer) → Pointer<Char>
ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) → void
ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, int usage) → void
ggml_backend_buft_alloc_buffer(ggml_backend_buffer_type_t buft, int size) → ggml_backend_buffer_t
ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) → int
ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, Pointer<ggml_tensor> tensor) → int
ggml_backend_buft_is_host(ggml_backend_buffer_type_t buft) → bool
ggml_backend_buft_name(ggml_backend_buffer_type_t buft) → Pointer<Char>
ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) → bool
ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, Pointer<ggml_cgraph> graph, ggml_backend_eval_callback callback, Pointer<Void> user_data) → bool
ggml_backend_cpu_buffer_from_ptr(Pointer<Void> ptr, int size) → ggml_backend_buffer_t
ggml_backend_cpu_buffer_type() → ggml_backend_buffer_type_t
ggml_backend_cpu_init() → ggml_backend_t
ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) → void
ggml_backend_free(ggml_backend_t backend) → void
ggml_backend_get_alignment(ggml_backend_t backend) → int
ggml_backend_get_default_buffer_type(ggml_backend_t backend) → ggml_backend_buffer_type_t
ggml_backend_graph_compute(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) → bool
ggml_backend_graph_copy1(ggml_backend_t backend, Pointer<ggml_cgraph> graph) → ggml_backend_graph_copy
ggml_backend_graph_copy_free(ggml_backend_graph_copy copy) → void
ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void
ggml_backend_graph_plan_create(ggml_backend_t backend, Pointer<ggml_cgraph> cgraph) → ggml_backend_graph_plan_t
ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void
ggml_backend_is_cpu(ggml_backend_t backend) → bool
ggml_backend_name(ggml_backend_t backend) → Pointer<Char>
ggml_backend_reg_alloc_buffer(int i, int size) → ggml_backend_buffer_t
ggml_backend_reg_find_by_name(Pointer<Char> name) → int
ggml_backend_reg_get_count() → int
ggml_backend_reg_get_default_buffer_type(int i) → ggml_backend_buffer_type_t
ggml_backend_reg_get_name(int i) → Pointer<Char>
ggml_backend_reg_init_backend(int i, Pointer<Char> params) → ggml_backend_t
ggml_backend_reg_init_backend_from_str(Pointer<Char> backend_str) → ggml_backend_t
ggml_backend_sched_free(ggml_backend_sched_t sched) → void
ggml_backend_sched_get_buffer(ggml_backend_sched_t sched, ggml_backend_t backend) → ggml_backend_buffer_t
ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) → int
ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node) → ggml_backend_t
ggml_backend_sched_get_tallocr(ggml_backend_sched_t sched, ggml_backend_t backend) → ggml_tallocr_t
ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, Pointer<ggml_cgraph> graph) → void
ggml_backend_sched_init_measure(ggml_backend_sched_t sched, Pointer<ggml_cgraph> measure_graph) → void
ggml_backend_sched_new(Pointer<ggml_backend_t> backends, Pointer<ggml_backend_buffer_type_t> bufts, int n_backends, int graph_size) → ggml_backend_sched_t
ggml_backend_sched_reset(ggml_backend_sched_t sched) → void
ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, Pointer<Void> user_data) → void
ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, Pointer<ggml_tensor> node, ggml_backend_t backend) → void
ggml_backend_supports_op(ggml_backend_t backend, Pointer<ggml_tensor> op) → bool
ggml_backend_synchronize(ggml_backend_t backend) → void
ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor, Pointer<Void> addr) → void
ggml_backend_tensor_copy(Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
ggml_backend_tensor_copy_async(ggml_backend_t backend, Pointer<ggml_tensor> src, Pointer<ggml_tensor> dst) → void
ggml_backend_tensor_get(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_get_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_set(Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_tensor_set_async(ggml_backend_t backend, Pointer<ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void
ggml_backend_view_init(ggml_backend_buffer_t buffer, Pointer<ggml_tensor> tensor) → void
ggml_blck_size(int type) → int
ggml_build_backward_expand(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> gf, Pointer<ggml_cgraph> gb, bool keep) → void
ggml_build_backward_gradient_checkpointing(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> gf, Pointer<ggml_cgraph> gb, Pointer<ggml_cgraph> gb_tmp, Pointer<Pointer<ggml_tensor>> checkpoints, int n_checkpoints) → void
ggml_build_forward_expand(Pointer<ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void
ggml_cast(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int type) → Pointer<ggml_tensor>
ggml_clamp(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double min, double max) → Pointer<ggml_tensor>
ggml_concat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cont(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_cont_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor>
ggml_cont_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_cont_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_cont_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_conv_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor>
ggml_conv_1d_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s, int d) → Pointer<ggml_tensor>
ggml_conv_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor>
ggml_conv_2d_s1_ph(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_conv_2d_sk_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_conv_transpose_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor>
ggml_conv_transpose_2d_p0(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride) → Pointer<ggml_tensor>
ggml_cpu_has_arm_fma() → int
ggml_cpu_has_avx() → int
ggml_cpu_has_avx2() → int
ggml_cpu_has_avx512() → int
ggml_cpu_has_avx512_vbmi() → int
ggml_cpu_has_avx512_vnni() → int
ggml_cpu_has_avx_vnni() → int
ggml_cpu_has_blas() → int
ggml_cpu_has_clblast() → int
ggml_cpu_has_cublas() → int
ggml_cpu_has_f16c() → int
ggml_cpu_has_fma() → int
ggml_cpu_has_fp16_va() → int
ggml_cpu_has_gpublas() → int
ggml_cpu_has_metal() → int
ggml_cpu_has_neon() → int
ggml_cpu_has_sse3() → int
ggml_cpu_has_ssse3() → int
ggml_cpu_has_vsx() → int
ggml_cpu_has_wasm_simd() → int
ggml_cpy(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cross_entropy_loss(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_cross_entropy_loss_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor>
ggml_cycles() → int
ggml_cycles_per_ms() → int
ggml_diag(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_diag_mask_inf(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_inf_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_zero(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_diag_mask_zero_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor>
ggml_div(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_div_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_dup(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_dup_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_dup_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor>
ggml_element_size(Pointer<ggml_tensor> tensor) → int
ggml_elu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_elu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_flash_attn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, bool masked) → Pointer<ggml_tensor>
ggml_flash_attn_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> d, bool masked) → Pointer<ggml_tensor>
ggml_flash_ff(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b0, Pointer<ggml_tensor> b1, Pointer<ggml_tensor> c0, Pointer<ggml_tensor> c1) → Pointer<ggml_tensor>
ggml_format_name(Pointer<ggml_tensor> tensor, Pointer<Char> fmt) → Pointer<ggml_tensor>
ggml_free(Pointer<ggml_context> ctx) → void
ggml_ftype_to_ggml_type(int ftype) → int
ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, ggml_tallocr_t talloc, Pointer<ggml_cgraph> graph) → int
ggml_gallocr_alloc_graph_n(ggml_gallocr_t galloc, Pointer<ggml_cgraph> graph, ggml_hash_set hash_set, Pointer<ggml_tallocr_t> hash_node_talloc) → void
ggml_gallocr_free(ggml_gallocr_t galloc) → void
ggml_gallocr_new() → ggml_gallocr_t
ggml_gallocr_set_parse_seq(ggml_gallocr_t galloc, Pointer<Int> list, int n) → void
ggml_gelu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_quick(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_gelu_quick_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_get_data(Pointer<ggml_tensor> tensor) → Pointer<Void>
ggml_get_data_f32(Pointer<ggml_tensor> tensor) → Pointer<Float>
ggml_get_f32_1d(Pointer<ggml_tensor> tensor, int i) → double
ggml_get_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) → double
ggml_get_first_tensor(Pointer<ggml_context> ctx) → Pointer<ggml_tensor>
ggml_get_i32_1d(Pointer<ggml_tensor> tensor, int i) → int
ggml_get_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3) → int
ggml_get_max_tensor_size(Pointer<ggml_context> ctx) → int
ggml_get_mem_buffer(Pointer<ggml_context> ctx) → Pointer<Void>
ggml_get_mem_size(Pointer<ggml_context> ctx) → int
ggml_get_name(Pointer<ggml_tensor> tensor) → Pointer<Char>
ggml_get_next_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor>
ggml_get_no_alloc(Pointer<ggml_context> ctx) → bool
ggml_get_rel_pos(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int qh, int kh) → Pointer<ggml_tensor>
ggml_get_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_get_rows_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor>
ggml_get_tensor(Pointer<ggml_context> ctx, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_get_unary_op(Pointer<ggml_tensor> tensor) → int
ggml_graph_clear(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_compute(Pointer<ggml_cgraph> cgraph, Pointer<ggml_cplan> cplan) → int
ggml_graph_compute_with_ctx(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph, int n_threads) → void
ggml_graph_cpy(Pointer<ggml_cgraph> src, Pointer<ggml_cgraph> dst) → void
ggml_graph_dump_dot(Pointer<ggml_cgraph> gb, Pointer<ggml_cgraph> gf, Pointer<Char> filename) → void
ggml_graph_dup(Pointer<ggml_context> ctx, Pointer<ggml_cgraph> cgraph) → Pointer<ggml_cgraph>
ggml_graph_export(Pointer<ggml_cgraph> cgraph, Pointer<Char> fname) → void
ggml_graph_get_tensor(Pointer<ggml_cgraph> cgraph, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_graph_import(Pointer<Char> fname, Pointer<Pointer<ggml_context>> ctx_data, Pointer<Pointer<ggml_context>> ctx_eval) → Pointer<ggml_cgraph>
ggml_graph_overhead() → int
ggml_graph_overhead_custom(int size, bool grads) → int
ggml_graph_plan(Pointer<ggml_cgraph> cgraph, int n_threads) → ggml_cplan
ggml_graph_print(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_reset(Pointer<ggml_cgraph> cgraph) → void
ggml_graph_view(Pointer<ggml_cgraph> cgraph, int i0, int i1) → ggml_cgraph
ggml_group_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups) → Pointer<ggml_tensor>
ggml_group_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups) → Pointer<ggml_tensor>
ggml_im2col(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D) → Pointer<ggml_tensor>
ggml_init(ggml_init_params params) → Pointer<ggml_context>
ggml_internal_get_type_traits(int type) → ggml_type_traits_t
ggml_is_3d(Pointer<ggml_tensor> tensor) → bool
ggml_is_contiguous(Pointer<ggml_tensor> tensor) → bool
ggml_is_matrix(Pointer<ggml_tensor> tensor) → bool
ggml_is_numa() → bool
ggml_is_permuted(Pointer<ggml_tensor> tensor) → bool
ggml_is_quantized(int type) → bool
ggml_is_scalar(Pointer<ggml_tensor> tensor) → bool
ggml_is_transposed(Pointer<ggml_tensor> tensor) → bool
ggml_is_vector(Pointer<ggml_tensor> tensor) → bool
ggml_leaky_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double negative_slope, bool inplace) → Pointer<ggml_tensor>
ggml_log(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_log_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_map_binary_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_binary_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom1(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom1_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom1_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom1_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom2(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom2_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom2_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom2_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom3(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom3_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_custom3_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor>
ggml_map_custom3_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_unary_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor>
ggml_map_unary_inplace_f32(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor>
ggml_mean(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_mul(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_mat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_mat_id(Pointer<ggml_context> ctx, Pointer<Pointer<ggml_tensor>> as1, int n_as, Pointer<ggml_tensor> ids, int id, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_mul_mat_set_prec(Pointer<ggml_tensor> a, int prec) → void
ggml_n_dims(Pointer<ggml_tensor> tensor) → int
ggml_nbytes(Pointer<ggml_tensor> tensor) → int
ggml_nbytes_pad(Pointer<ggml_tensor> tensor) → int
ggml_neg(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_neg_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_nelements(Pointer<ggml_tensor> tensor) → int
ggml_new_f32(Pointer<ggml_context> ctx, double value) → Pointer<ggml_tensor>
ggml_new_graph(Pointer<ggml_context> ctx) → Pointer<ggml_cgraph>
ggml_new_graph_custom(Pointer<ggml_context> ctx, int size, bool grads) → Pointer<ggml_cgraph>
ggml_new_i32(Pointer<ggml_context> ctx, int value) → Pointer<ggml_tensor>
ggml_new_tensor(Pointer<ggml_context> ctx, int type, int n_dims, Pointer<Int64> ne) → Pointer<ggml_tensor>
ggml_new_tensor_1d(Pointer<ggml_context> ctx, int type, int ne0) → Pointer<ggml_tensor>
ggml_new_tensor_2d(Pointer<ggml_context> ctx, int type, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_new_tensor_3d(Pointer<ggml_context> ctx, int type, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_new_tensor_4d(Pointer<ggml_context> ctx, int type, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_nrows(Pointer<ggml_tensor> tensor) → int
ggml_numa_init() → void
ggml_op_desc(Pointer<ggml_tensor> t) → Pointer<Char>
ggml_op_name(int op) → Pointer<Char>
ggml_op_symbol(int op) → Pointer<Char>
ggml_opt(Pointer<ggml_context> ctx, ggml_opt_params params, Pointer<ggml_tensor> f) → int
ggml_opt_default_params(int type) → ggml_opt_params
ggml_opt_init(Pointer<ggml_context> ctx, Pointer<ggml_opt_context> opt, ggml_opt_params params, int nx) → void
ggml_opt_resume(Pointer<ggml_context> ctx, Pointer<ggml_opt_context> opt, Pointer<ggml_tensor> f) → int
ggml_opt_resume_g(Pointer<ggml_context> ctx, Pointer<ggml_opt_context> opt, Pointer<ggml_tensor> f, Pointer<ggml_cgraph> gf, Pointer<ggml_cgraph> gb, ggml_opt_callback callback, Pointer<Void> callback_data) → int
ggml_out_prod(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_pad(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1, int p2, int p3) → Pointer<ggml_tensor>
ggml_permute(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int axis0, int axis1, int axis2, int axis3) → Pointer<ggml_tensor>
ggml_pool_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int s0, int p0) → Pointer<ggml_tensor>
ggml_pool_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor>
ggml_print_backtrace() → void
ggml_print_object(Pointer<ggml_object> obj) → void
ggml_print_objects(Pointer<ggml_context> ctx) → void
ggml_quantize_chunk(int type, Pointer<Float> src, Pointer<Void> dst, int start, int nrows, int n_per_row, Pointer<Int64> hist, Pointer<Float> imatrix) → int
ggml_quantize_free() → void
ggml_quantize_init(int type) → void
ggml_quantize_q2_K(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q3_K(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q4_0(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q4_1(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q4_K(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q5_0(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q5_1(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q5_K(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q6_K(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_q8_0(Pointer<Float> src, Pointer<Void> dst, int n, int k, Pointer<Int64> hist) → int
ggml_quantize_requires_imatrix(int type) → bool
ggml_relu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_relu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_repeat(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_repeat_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_reshape(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_reshape_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor>
ggml_reshape_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor>
ggml_reshape_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor>
ggml_reshape_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor>
ggml_rms_norm(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_rms_norm_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double eps) → Pointer<ggml_tensor>
ggml_rms_norm_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor>
ggml_rope(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx) → Pointer<ggml_tensor>
ggml_rope_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx, int n_orig_ctx, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow, double xpos_base, bool xpos_down) → Pointer<ggml_tensor>
ggml_rope_custom(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx, int n_orig_ctx, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_custom_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx, int n_orig_ctx, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor>
ggml_rope_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx) → Pointer<ggml_tensor>
ggml_rope_xpos_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, double base, bool down) → Pointer<ggml_tensor>
ggml_rope_yarn_corr_dims(int n_dims, int n_orig_ctx, double freq_base, double beta_fast, double beta_slow, Pointer<Float> dims) → void
ggml_row_size(int type, int ne) → int
ggml_scale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor>
ggml_scale_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor>
ggml_set(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_set_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor>
ggml_set_1d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor>
ggml_set_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor>
ggml_set_2d_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor>
ggml_set_f32(Pointer<ggml_tensor> tensor, double value) → Pointer<ggml_tensor>
ggml_set_f32_1d(Pointer<ggml_tensor> tensor, int i, double value) → void
ggml_set_f32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, double value) → void
ggml_set_i32(Pointer<ggml_tensor> tensor, int value) → Pointer<ggml_tensor>
ggml_set_i32_1d(Pointer<ggml_tensor> tensor, int i, int value) → void
ggml_set_i32_nd(Pointer<ggml_tensor> tensor, int i0, int i1, int i2, int i3, int value) → void
ggml_set_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_set_name(Pointer<ggml_tensor> tensor, Pointer<Char> name) → Pointer<ggml_tensor>
ggml_set_no_alloc(Pointer<ggml_context> ctx, bool no_alloc) → void
ggml_set_param(Pointer<ggml_context> ctx, Pointer<ggml_tensor> tensor) → void
ggml_set_scratch(Pointer<ggml_context> ctx, ggml_scratch scratch) → int
ggml_set_zero(Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor>
ggml_sgn(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sgn_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_silu(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_silu_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_silu_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_soft_max(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_soft_max_back(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_soft_max_back_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_soft_max_ext(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> mask, double scale) → Pointer<ggml_tensor>
ggml_soft_max_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqr(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqr_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqrt(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sqrt_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_step(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_step_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sub(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_sub_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor>
ggml_sum(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_sum_rows(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tallocr_alloc(ggml_tallocr_t talloc, Pointer<ggml_tensor> tensor) → void
ggml_tallocr_free(ggml_tallocr_t talloc) → void
ggml_tallocr_get_buffer(ggml_tallocr_t talloc) → Pointer<ggml_backend_buffer>
ggml_tallocr_is_measure(ggml_tallocr_t talloc) → bool
ggml_tallocr_max_size(ggml_tallocr_t talloc) → int
ggml_tallocr_new(Pointer<Void> data, int size, int alignment) → ggml_tallocr_t
ggml_tallocr_new_from_backend(Pointer<ggml_backend> backend, int size) → ggml_tallocr_t
ggml_tallocr_new_from_buffer(Pointer<ggml_backend_buffer> buffer) → ggml_tallocr_t
ggml_tallocr_new_from_buft(Pointer<ggml_backend_buffer_type> buft, int size) → ggml_tallocr_t
ggml_tallocr_new_measure(int alignment) → ggml_tallocr_t
ggml_tallocr_new_measure_from_backend(Pointer<ggml_backend> backend) → ggml_tallocr_t
ggml_tallocr_new_measure_from_buft(Pointer<ggml_backend_buffer_type> buft) → ggml_tallocr_t
ggml_tallocr_reset(ggml_tallocr_t talloc) → void
ggml_tanh(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tanh_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_tensor_overhead() → int
ggml_time_init() → void
ggml_time_ms() → int
ggml_time_us() → int
ggml_top_k(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int k) → Pointer<ggml_tensor>
ggml_transpose(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor>
ggml_type_name(int type) → Pointer<Char>
ggml_type_size(int type) → int
ggml_type_sizef(int type) → double
ggml_unary(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op) → Pointer<ggml_tensor>
ggml_unary_inplace(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int op) → Pointer<ggml_tensor>
ggml_unary_op_name(int op) → Pointer<Char>
ggml_unravel_index(Pointer<ggml_tensor> tensor, int i, Pointer<Int64> i0, Pointer<Int64> i1, Pointer<Int64> i2, Pointer<Int64> i3) → void
ggml_upscale(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int scale_factor) → Pointer<ggml_tensor>
ggml_used_mem(Pointer<ggml_context> ctx) → int
ggml_view_1d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int offset) → Pointer<ggml_tensor>
ggml_view_2d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int nb1, int offset) → Pointer<ggml_tensor>
ggml_view_3d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int nb1, int nb2, int offset) → Pointer<ggml_tensor>
ggml_view_4d(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor>
ggml_view_tensor(Pointer<ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor>
ggml_win_part(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w) → Pointer<ggml_tensor>
ggml_win_unpart(Pointer<ggml_context> ctx, Pointer<ggml_tensor> a, int w0, int h0, int w) → Pointer<ggml_tensor>
gguf_add_tensor(Pointer<gguf_context> ctx, Pointer<ggml_tensor> tensor) → void
gguf_find_key(Pointer<gguf_context> ctx, Pointer<Char> key) → int
gguf_find_tensor(Pointer<gguf_context> ctx, Pointer<Char> name) → int
gguf_free(Pointer<gguf_context> ctx) → void
gguf_get_alignment(Pointer<gguf_context> ctx) → int
gguf_get_arr_data(Pointer<gguf_context> ctx, int key_id) → Pointer<Void>
gguf_get_arr_n(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_arr_str(Pointer<gguf_context> ctx, int key_id, int i) → Pointer<Char>
gguf_get_arr_type(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_data(Pointer<gguf_context> ctx) → Pointer<Void>
gguf_get_data_offset(Pointer<gguf_context> ctx) → int
gguf_get_key(Pointer<gguf_context> ctx, int key_id) → Pointer<Char>
gguf_get_kv_type(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_meta_data(Pointer<gguf_context> ctx, Pointer<Void> data) → void
gguf_get_meta_size(Pointer<gguf_context> ctx) → int
gguf_get_n_kv(Pointer<gguf_context> ctx) → int
gguf_get_n_tensors(Pointer<gguf_context> ctx) → int
gguf_get_tensor_name(Pointer<gguf_context> ctx, int i) → Pointer<Char>
gguf_get_tensor_offset(Pointer<gguf_context> ctx, int i) → int
gguf_get_tensor_type(Pointer<gguf_context> ctx, int i) → int
gguf_get_val_bool(Pointer<gguf_context> ctx, int key_id) → bool
gguf_get_val_data(Pointer<gguf_context> ctx, int key_id) → Pointer<Void>
gguf_get_val_f32(Pointer<gguf_context> ctx, int key_id) → double
gguf_get_val_f64(Pointer<gguf_context> ctx, int key_id) → double
gguf_get_val_i16(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_i32(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_i64(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_i8(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_str(Pointer<gguf_context> ctx, int key_id) → Pointer<Char>
gguf_get_val_u16(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_u32(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_u64(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_val_u8(Pointer<gguf_context> ctx, int key_id) → int
gguf_get_version(Pointer<gguf_context> ctx) → int
gguf_init_empty() → Pointer<gguf_context>
gguf_init_from_file(Pointer<Char> fname, gguf_init_params params) → Pointer<gguf_context>
gguf_set_arr_data(Pointer<gguf_context> ctx, Pointer<Char> key, int type, Pointer<Void> data, int n) → void
gguf_set_arr_str(Pointer<gguf_context> ctx, Pointer<Char> key, Pointer<Pointer<Char>> data, int n) → void
gguf_set_kv(Pointer<gguf_context> ctx, Pointer<gguf_context> src) → void
gguf_set_tensor_data(Pointer<gguf_context> ctx, Pointer<Char> name, Pointer<Void> data, int size) → void
gguf_set_tensor_type(Pointer<gguf_context> ctx, Pointer<Char> name, int type) → void
gguf_set_val_bool(Pointer<gguf_context> ctx, Pointer<Char> key, bool val) → void
gguf_set_val_f32(Pointer<gguf_context> ctx, Pointer<Char> key, double val) → void
gguf_set_val_f64(Pointer<gguf_context> ctx, Pointer<Char> key, double val) → void
gguf_set_val_i16(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_i32(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_i64(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_i8(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_str(Pointer<gguf_context> ctx, Pointer<Char> key, Pointer<Char> val) → void
gguf_set_val_u16(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_u32(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_u64(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_set_val_u8(Pointer<gguf_context> ctx, Pointer<Char> key, int val) → void
gguf_type_name(int type) → Pointer<Char>
gguf_write_to_file(Pointer<gguf_context> ctx, Pointer<Char> fname, bool only_meta) → void
llama_add_bos_token(Pointer<llama_model> model) → int
llama_add_eos_token(Pointer<llama_model> model) → int
llama_apply_lora_from_file(Pointer<llama_context> ctx, Pointer<Char> path_lora, double scale, Pointer<Char> path_base_model, int n_threads) → int
llama_backend_free() → void
llama_backend_init(bool numa) → void
llama_batch_free(llama_batch batch) → void
llama_batch_get_one(Pointer<llama_token> tokens, int n_tokens, int pos_0, int seq_id) → llama_batch
llama_batch_init(int n_tokens, int embd, int n_seq_max) → llama_batch
llama_beam_search(Pointer<llama_context> ctx, llama_beam_search_callback_fn_t callback, Pointer<Void> callback_data, int n_beams, int n_past, int n_predict) → void: @details Deterministically returns entire sentence constructed by a beam search. @param ctx Pointer to the llama_context. @param callback Invoked for each iteration of the beam_search loop, passing in beams_state. @param callback_data A pointer that is simply passed back to callback. @param n_beams Number of beams to use. @param n_past Number of tokens already evaluated. @param n_predict Maximum number of tokens to predict. EOS may occur earlier.
llama_context_default_params() → llama_context_params
llama_copy_state_data(Pointer<llama_context> ctx, Pointer<Uint8> dst) → int
llama_decode(Pointer<llama_context> ctx, llama_batch batch) → int
llama_dump_timing_info_yaml(Pointer<FILE> stream, Pointer<llama_context> ctx) → void
llama_eval(Pointer<llama_context> ctx, Pointer<llama_token> tokens, int n_tokens, int n_past) → int
llama_eval_embd(Pointer<llama_context> ctx, Pointer<Float> embd, int n_tokens, int n_past) → int
llama_free(Pointer<llama_context> ctx) → void
llama_free_model(Pointer<llama_model> model) → void
llama_get_embeddings(Pointer<llama_context> ctx) → Pointer<Float>
llama_get_kv_cache_token_count(Pointer<llama_context> ctx) → int
llama_get_kv_cache_used_cells(Pointer<llama_context> ctx) → int
llama_get_logits(Pointer<llama_context> ctx) → Pointer<Float>
llama_get_logits_ith(Pointer<llama_context> ctx, int i) → Pointer<Float>
llama_get_model(Pointer<llama_context> ctx) → Pointer<llama_model>
llama_get_model_tensor(Pointer<llama_model> model, Pointer<Char> name) → Pointer<ggml_tensor>
llama_get_state_size(Pointer<llama_context> ctx) → int
llama_get_timings(Pointer<llama_context> ctx) → llama_timings
llama_grammar_accept_token(Pointer<llama_context> ctx, Pointer<llama_grammar> grammar, int token) → void: @details Accepts the sampled token into the grammar
llama_grammar_copy(Pointer<llama_grammar> grammar) → Pointer<llama_grammar>
llama_grammar_free(Pointer<llama_grammar> grammar) → void
llama_grammar_init(Pointer<Pointer<llama_grammar_element>> rules, int n_rules, int start_rule_index) → Pointer<llama_grammar>
llama_kv_cache_clear(Pointer<llama_context> ctx) → void
llama_kv_cache_seq_cp(Pointer<llama_context> ctx, int seq_id_src, int seq_id_dst, int p0, int p1) → void
llama_kv_cache_seq_div(Pointer<llama_context> ctx, int seq_id, int p0, int p1, int d) → void
llama_kv_cache_seq_keep(Pointer<llama_context> ctx, int seq_id) → void
llama_kv_cache_seq_rm(Pointer<llama_context> ctx, int seq_id, int p0, int p1) → void
llama_kv_cache_seq_shift(Pointer<llama_context> ctx, int seq_id, int p0, int p1, int delta) → void
llama_kv_cache_view_free(Pointer<llama_kv_cache_view> view) → void
llama_kv_cache_view_init(Pointer<llama_context> ctx, int n_max_seq) → llama_kv_cache_view
llama_kv_cache_view_update(Pointer<llama_context> ctx, Pointer<llama_kv_cache_view> view) → void
llama_load_model_from_file(Pointer<Char> path_model, llama_model_params params) → Pointer<llama_model>
llama_load_session_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → bool
llama_log_set(ggml_log_callback log_callback, Pointer<Void> user_data) → void
llama_max_devices() → int
llama_mlock_supported() → bool
llama_mmap_supported() → bool
llama_model_apply_lora_from_file(Pointer<llama_model> model, Pointer<Char> path_lora, double scale, Pointer<Char> path_base_model, int n_threads) → int
llama_model_default_params() → llama_model_params
llama_model_desc(Pointer<llama_model> model, Pointer<Char> buf, int buf_size) → int
llama_model_meta_count(Pointer<llama_model> model) → int
llama_model_meta_key_by_index(Pointer<llama_model> model, int i, Pointer<Char> buf, int buf_size) → int
llama_model_meta_val_str(Pointer<llama_model> model, Pointer<Char> key, Pointer<Char> buf, int buf_size) → int
llama_model_meta_val_str_by_index(Pointer<llama_model> model, int i, Pointer<Char> buf, int buf_size) → int
llama_model_n_params(Pointer<llama_model> model) → int
llama_model_quantize(Pointer<Char> fname_inp, Pointer<Char> fname_out, Pointer<llama_model_quantize_params> params) → int
llama_model_quantize_default_params() → llama_model_quantize_params
llama_model_size(Pointer<llama_model> model) → int
llama_n_batch(Pointer<llama_context> ctx) → int
llama_n_ctx(Pointer<llama_context> ctx) → int
llama_n_ctx_train(Pointer<llama_model> model) → int
llama_n_embd(Pointer<llama_model> model) → int
llama_n_vocab(Pointer<llama_model> model) → int
llama_new_context_with_model(Pointer<llama_model> model, llama_context_params params) → Pointer<llama_context>
llama_print_system_info() → Pointer<Char>
llama_print_timings(Pointer<llama_context> ctx) → void
llama_reset_timings(Pointer<llama_context> ctx) → void
llama_rope_freq_scale_train(Pointer<llama_model> model) → double
llama_sample_apply_guidance(Pointer<llama_context> ctx, Pointer<Float> logits, Pointer<Float> logits_guidance, double scale) → void: @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806 @param logits Logits extracted from the original generation context. @param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context. @param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
llama_sample_classifier_free_guidance(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, Pointer<llama_context> guidance_ctx, double scale) → void
llama_sample_grammar(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, Pointer<llama_grammar> grammar) → void: @details Apply constraints from grammar
llama_sample_min_p(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double p, int min_keep) → void: @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
llama_sample_repetition_penalties(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, Pointer<llama_token> last_tokens, int penalty_last_n, double penalty_repeat, double penalty_freq, double penalty_present) → void: @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix. @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
llama_sample_softmax(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates) → void: @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
llama_sample_tail_free(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double z, int min_keep) → void: @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
llama_sample_temp(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double temp) → void
llama_sample_temperature(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double temp) → void
llama_sample_token(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates) → int: @details Randomly selects a token from the candidates based on their probabilities.
llama_sample_token_greedy(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates) → int: @details Selects the token with the highest probability. Does not compute the token probabilities. Use llama_sample_softmax() instead.
llama_sample_token_mirostat(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double tau, double eta, int m, Pointer<Float> mu) → int: @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation of s_hat. This is an arbitrary value that is used to calculate s_hat, which in turn helps to calculate the value of k. In the paper, they use m = 100, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sample_token_mirostat_v2(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double tau, double eta, Pointer<Float> mu) → int: @details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words. @param candidates A vector of llama_token_data containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to update mu based on the error between the target and observed surprisal of the sampled word. A larger learning rate will cause mu to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau) and is updated in the algorithm based on the error between the target and observed surprisal.
llama_sample_top_k(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, int k, int min_keep) → void: @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
llama_sample_top_p(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double p, int min_keep) → void: @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
llama_sample_typical(Pointer<llama_context> ctx, Pointer<llama_token_data_array> candidates, double p, int min_keep) → void: @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
llama_save_session_file(Pointer<llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens, int n_token_count) → bool
llama_set_n_threads(Pointer<llama_context> ctx, int n_threads, int n_threads_batch) → void
llama_set_rng_seed(Pointer<llama_context> ctx, int seed) → void
llama_set_state_data(Pointer<llama_context> ctx, Pointer<Uint8> src) → int
llama_time_us() → int
llama_token_bos(Pointer<llama_model> model) → int
llama_token_eos(Pointer<llama_model> model) → int
llama_token_eot(Pointer<llama_model> model) → int
llama_token_get_score(Pointer<llama_model> model, int token) → double
llama_token_get_text(Pointer<llama_model> model, int token) → Pointer<Char>
llama_token_get_type(Pointer<llama_model> model, int token) → int
llama_token_middle(Pointer<llama_model> model) → int
llama_token_nl(Pointer<llama_model> model) → int
llama_token_prefix(Pointer<llama_model> model) → int
llama_token_suffix(Pointer<llama_model> model) → int
llama_token_to_piece(Pointer<llama_model> model, int token, Pointer<Char> buf, int length) → int
llama_tokenize(Pointer<llama_model> model, Pointer<Char> text, int text_len, Pointer<llama_token> tokens, int n_max_tokens, bool add_bos, bool special) → int: @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_max_tokens @return Returns a negative number on failure - the number of tokens that would have been returned @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
llama_vocab_type1(Pointer<llama_model> model) → int
noSuchMethod(Invocation invocation) → dynamic: Invoked when a nonexistent method or property is accessed.
inherited
open_memstream(Pointer<Pointer<Char>> __bufp, Pointer<Size> __sizep) → Pointer<FILE>
pclose(Pointer<FILE> arg0) → int
perror(Pointer<Char> arg0) → void
popen(Pointer<Char> arg0, Pointer<Char> arg1) → Pointer<FILE>
printf(Pointer<Char> arg0) → int
putc(int arg0, Pointer<FILE> arg1) → int
putc_unlocked(int arg0, Pointer<FILE> arg1) → int
putchar(int arg0) → int
putchar_unlocked(int arg0) → int
puts(Pointer<Char> arg0) → int
putw(int arg0, Pointer<FILE> arg1) → int
remove(Pointer<Char> arg0) → int
rename(Pointer<Char> __old, Pointer<Char> __new) → int
renameat(int arg0, Pointer<Char> arg1, int arg2, Pointer<Char> arg3) → int
renameatx_np(int arg0, Pointer<Char> arg1, int arg2, Pointer<Char> arg3, int arg4) → int
renamex_np(Pointer<Char> arg0, Pointer<Char> arg1, int arg2) → int
rewind(Pointer<FILE> arg0) → void
scanf(Pointer<Char> arg0) → int
setbuf(Pointer<FILE> arg0, Pointer<Char> arg1) → void
setbuffer(Pointer<FILE> arg0, Pointer<Char> arg1, int arg2) → void
setlinebuf(Pointer<FILE> arg0) → int
setvbuf(Pointer<FILE> arg0, Pointer<Char> arg1, int arg2, int arg3) → int
snprintf(Pointer<Char> __str, int __size, Pointer<Char> __format) → int
sprintf(Pointer<Char> arg0, Pointer<Char> arg1) → int
sscanf(Pointer<Char> arg0, Pointer<Char> arg1) → int
tempnam(Pointer<Char> __dir, Pointer<Char> __prefix) → Pointer<Char>
tmpfile() → Pointer<FILE>
tmpnam(Pointer<Char> arg0) → Pointer<Char>
toString() → String: A string representation of this object.
inherited
ungetc(int arg0, Pointer<FILE> arg1) → int
vasprintf(Pointer<Pointer<Char>> arg0, Pointer<Char> arg1, va_list arg2) → int
vdprintf(int arg0, Pointer<Char> arg1, va_list arg2) → int
vfprintf(Pointer<FILE> arg0, Pointer<Char> arg1, va_list arg2) → int
vfscanf(Pointer<FILE> __stream, Pointer<Char> __format, va_list arg2) → int
vprintf(Pointer<Char> arg0, va_list arg1) → int
vscanf(Pointer<Char> __format, va_list arg1) → int
vsnprintf(Pointer<Char> __str, int __size, Pointer<Char> __format, va_list arg3) → int
vsprintf(Pointer<Char> arg0, Pointer<Char> arg1, va_list arg2) → int
vsscanf(Pointer<Char> __str, Pointer<Char> __format, va_list arg2) → int

Operators

operator ==(Object other) → bool: The equality operator.
inherited

llama_cpp class

Constructors

Properties

Methods

Operators

llama_cpp_dart_ffi library