llama_cpp class
llama.cpp binding
Constructors
- llama_cpp(DynamicLibrary dynamicLibrary)
-
The symbols are looked up in
dynamicLibrary
. -
llama_cpp.fromLookup(Pointer<
T> lookup<T extends NativeType>(String symbolName) ) -
The symbols are looked up with
lookup
.
Properties
- GGML_TENSOR_SIZE → int
-
no setter
- hashCode → int
-
The hash code for this object.
no setterinherited
- runtimeType → Type
-
A representation of the runtime type of the object.
no setterinherited
-
sys_errlist
↔ Pointer<
Pointer< Char> > -
getter/setter pair
- sys_nerr → int
-
no setter
Methods
-
asprintf(
Pointer< Pointer< arg0, Pointer<Char> >Char> arg1) → int -
clearerr(
Pointer< FILE> arg0) → void -
ctermid(
Pointer< Char> arg0) → Pointer<Char> -
ctermid_r(
Pointer< Char> arg0) → Pointer<Char> -
dprintf(
int arg0, Pointer< Char> arg1) → int -
fclose(
Pointer< FILE> arg0) → int -
fdopen(
int arg0, Pointer< Char> arg1) → Pointer<FILE> -
feof(
Pointer< FILE> arg0) → int -
ferror(
Pointer< FILE> arg0) → int -
fflush(
Pointer< FILE> arg0) → int -
fgetc(
Pointer< FILE> arg0) → int -
fgetln(
Pointer< FILE> arg0, Pointer<Size> arg1) → Pointer<Char> -
fgetpos(
Pointer< FILE> arg0, Pointer<fpos_t> arg1) → int -
fgets(
Pointer< Char> arg0, int arg1, Pointer<FILE> arg2) → Pointer<Char> -
fileno(
Pointer< FILE> arg0) → int -
flockfile(
Pointer< FILE> arg0) → void -
fmemopen(
Pointer< Void> __buf, int __size, Pointer<Char> __mode) → Pointer<FILE> -
fmtcheck(
Pointer< Char> arg0, Pointer<Char> arg1) → Pointer<Char> -
fopen(
Pointer< Char> __filename, Pointer<Char> __mode) → Pointer<FILE> -
fprintf(
Pointer< FILE> arg0, Pointer<Char> arg1) → int -
fpurge(
Pointer< FILE> arg0) → int -
fputc(
int arg0, Pointer< FILE> arg1) → int -
fputs(
Pointer< Char> arg0, Pointer<FILE> arg1) → int -
fread(
Pointer< Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int -
freopen(
Pointer< Char> arg0, Pointer<Char> arg1, Pointer<FILE> arg2) → Pointer<FILE> -
fscanf(
Pointer< FILE> arg0, Pointer<Char> arg1) → int -
fseek(
Pointer< FILE> arg0, int arg1, int arg2) → int -
fseeko(
Pointer< FILE> __stream, int __offset, int __whence) → int -
fsetpos(
Pointer< FILE> arg0, Pointer<fpos_t> arg1) → int -
ftell(
Pointer< FILE> arg0) → int -
ftello(
Pointer< FILE> __stream) → int -
ftrylockfile(
Pointer< FILE> arg0) → int -
funlockfile(
Pointer< FILE> arg0) → void -
funopen(
Pointer< Void> arg0, Pointer<NativeFunction< arg1, Pointer<Int Function(Pointer< >Void> , Pointer<Char> , Int)>NativeFunction< arg2, Pointer<Int Function(Pointer< >Void> , Pointer<Char> , Int)>NativeFunction< arg3, Pointer<fpos_t Function(Pointer< >Void> , fpos_t, Int)>NativeFunction< arg4) → Pointer<Int Function(Pointer< >Void> )>FILE> -
fwrite(
Pointer< Void> __ptr, int __size, int __nitems, Pointer<FILE> __stream) → int -
getc(
Pointer< FILE> arg0) → int -
getc_unlocked(
Pointer< FILE> arg0) → int -
getchar(
) → int -
getchar_unlocked(
) → int -
getdelim(
Pointer< Pointer< __linep, Pointer<Char> >Size> __linecapp, int __delimiter, Pointer<FILE> __stream) → int -
getline(
Pointer< Pointer< __linep, Pointer<Char> >Size> __linecapp, Pointer<FILE> __stream) → int -
gets(
Pointer< Char> arg0) → Pointer<Char> -
getw(
Pointer< FILE> arg0) → int -
ggml_abort(
Pointer< Char> file, int line, Pointer<Char> fmt) → void -
ggml_abs(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_abs_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_acc(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_acc_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_add(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add1(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add1_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add_cast(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_type type) → Pointer<ggml_tensor> -
ggml_add_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_add_rel_pos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor> -
ggml_add_rel_pos_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> pw, Pointer<ggml_tensor> ph) → Pointer<ggml_tensor> -
ggml_arange(
Pointer< ggml_context> ctx, double start, double stop, double step) → Pointer<ggml_tensor> -
ggml_are_same_shape(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_are_same_stride(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_argmax(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_argsort(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_sort_order order) → Pointer<ggml_tensor> -
ggml_backend_alloc_buffer(
ggml_backend_t backend, int size) → ggml_backend_buffer_t -
ggml_backend_alloc_ctx_tensors(
Pointer< ggml_context> ctx, ggml_backend_t backend) → Pointer<ggml_backend_buffer> -
ggml_backend_alloc_ctx_tensors_from_buft(
Pointer< ggml_context> ctx, ggml_backend_buffer_type_t buft) → Pointer<ggml_backend_buffer> -
ggml_backend_buffer_clear(
ggml_backend_buffer_t buffer, int value) → void -
ggml_backend_buffer_free(
ggml_backend_buffer_t buffer) → void -
ggml_backend_buffer_get_alignment(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_alloc_size(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor) → int -
ggml_backend_buffer_get_base(
ggml_backend_buffer_t buffer) → Pointer< Void> -
ggml_backend_buffer_get_max_size(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_size(
ggml_backend_buffer_t buffer) → int -
ggml_backend_buffer_get_type(
ggml_backend_buffer_t buffer) → ggml_backend_buffer_type_t -
ggml_backend_buffer_get_usage(
ggml_backend_buffer_t buffer) → ggml_backend_buffer_usage -
ggml_backend_buffer_init_tensor(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor) → void -
ggml_backend_buffer_is_host(
ggml_backend_buffer_t buffer) → bool -
ggml_backend_buffer_name(
ggml_backend_buffer_t buffer) → Pointer< Char> -
ggml_backend_buffer_reset(
ggml_backend_buffer_t buffer) → void -
ggml_backend_buffer_set_usage(
ggml_backend_buffer_t buffer, ggml_backend_buffer_usage usage) → void -
ggml_backend_buft_alloc_buffer(
ggml_backend_buffer_type_t buft, int size) → ggml_backend_buffer_t -
ggml_backend_buft_get_alignment(
ggml_backend_buffer_type_t buft) → int -
ggml_backend_buft_get_alloc_size(
ggml_backend_buffer_type_t buft, Pointer< ggml_tensor> tensor) → int -
ggml_backend_buft_get_device(
ggml_backend_buffer_type_t buft) → ggml_backend_dev_t -
ggml_backend_buft_get_max_size(
ggml_backend_buffer_type_t buft) → int -
ggml_backend_buft_is_host(
ggml_backend_buffer_type_t buft) → bool -
ggml_backend_buft_name(
ggml_backend_buffer_type_t buft) → Pointer< Char> -
ggml_backend_compare_graph_backend(
ggml_backend_t backend1, ggml_backend_t backend2, Pointer< ggml_cgraph> graph, ggml_backend_eval_callback callback, Pointer<Void> user_data) → bool -
ggml_backend_cpu_aarch64_buffer_type(
) → ggml_backend_buffer_type_t -
ggml_backend_cpu_buffer_from_ptr(
Pointer< Void> ptr, int size) → ggml_backend_buffer_t -
ggml_backend_cpu_buffer_type(
) → ggml_backend_buffer_type_t -
ggml_backend_cpu_buft_is_aarch64(
ggml_backend_buffer_type_t buft) → bool -
ggml_backend_cpu_init(
) → ggml_backend_t -
ggml_backend_cpu_reg(
) → ggml_backend_reg_t -
ggml_backend_cpu_set_abort_callback(
ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, Pointer< Void> abort_callback_data) → void -
ggml_backend_cpu_set_n_threads(
ggml_backend_t backend_cpu, int n_threads) → void -
ggml_backend_cpu_set_threadpool(
ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) → void -
ggml_backend_dev_backend_reg(
ggml_backend_dev_t device) → ggml_backend_reg_t -
ggml_backend_dev_buffer_from_host_ptr(
ggml_backend_dev_t device, Pointer< Void> ptr, int size, int max_tensor_size) → ggml_backend_buffer_t -
ggml_backend_dev_buffer_type(
ggml_backend_dev_t device) → ggml_backend_buffer_type_t -
ggml_backend_dev_by_name(
Pointer< Char> name) → ggml_backend_dev_t -
ggml_backend_dev_by_type(
ggml_backend_dev_type type) → ggml_backend_dev_t -
ggml_backend_dev_count(
) → int -
ggml_backend_dev_description(
ggml_backend_dev_t device) → Pointer< Char> -
ggml_backend_dev_get(
int index) → ggml_backend_dev_t -
ggml_backend_dev_get_props(
ggml_backend_dev_t device, Pointer< ggml_backend_dev_props> props) → void -
ggml_backend_dev_host_buffer_type(
ggml_backend_dev_t device) → ggml_backend_buffer_type_t -
ggml_backend_dev_init(
ggml_backend_dev_t device, Pointer< Char> params) → ggml_backend_t -
ggml_backend_dev_memory(
ggml_backend_dev_t device, Pointer< Size> free, Pointer<Size> total) → void -
ggml_backend_dev_name(
ggml_backend_dev_t device) → Pointer< Char> -
ggml_backend_dev_offload_op(
ggml_backend_dev_t device, Pointer< ggml_tensor> op) → bool -
ggml_backend_dev_supports_buft(
ggml_backend_dev_t device, ggml_backend_buffer_type_t buft) → bool -
ggml_backend_dev_supports_op(
ggml_backend_dev_t device, Pointer< ggml_tensor> op) → bool -
ggml_backend_dev_type1(
ggml_backend_dev_t device) → ggml_backend_dev_type -
ggml_backend_event_free(
ggml_backend_event_t event) → void -
ggml_backend_event_new(
ggml_backend_dev_t device) → ggml_backend_event_t -
ggml_backend_event_record(
ggml_backend_event_t event, ggml_backend_t backend) → void -
ggml_backend_event_synchronize(
ggml_backend_event_t event) → void -
ggml_backend_event_wait(
ggml_backend_t backend, ggml_backend_event_t event) → void -
ggml_backend_free(
ggml_backend_t backend) → void -
ggml_backend_get_alignment(
ggml_backend_t backend) → int -
ggml_backend_get_default_buffer_type(
ggml_backend_t backend) → ggml_backend_buffer_type_t -
ggml_backend_get_device(
ggml_backend_t backend) → ggml_backend_dev_t -
ggml_backend_get_max_size(
ggml_backend_t backend) → int -
ggml_backend_graph_compute(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → ggml_status -
ggml_backend_graph_compute_async(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → ggml_status -
ggml_backend_graph_copy1(
ggml_backend_t backend, Pointer< ggml_cgraph> graph) → ggml_backend_graph_copy -
ggml_backend_graph_copy_free(
ggml_backend_graph_copy copy) → void -
ggml_backend_graph_plan_compute(
ggml_backend_t backend, ggml_backend_graph_plan_t plan) → ggml_status -
ggml_backend_graph_plan_create(
ggml_backend_t backend, Pointer< ggml_cgraph> cgraph) → ggml_backend_graph_plan_t -
ggml_backend_graph_plan_free(
ggml_backend_t backend, ggml_backend_graph_plan_t plan) → void -
ggml_backend_guid(
ggml_backend_t backend) → ggml_guid_t -
ggml_backend_init_best(
) → ggml_backend_t -
ggml_backend_init_by_name(
Pointer< Char> name, Pointer<Char> params) → ggml_backend_t -
ggml_backend_init_by_type(
ggml_backend_dev_type type, Pointer< Char> params) → ggml_backend_t -
ggml_backend_is_cpu(
ggml_backend_t backend) → bool -
ggml_backend_name(
ggml_backend_t backend) → Pointer< Char> -
ggml_backend_offload_op(
ggml_backend_t backend, Pointer< ggml_tensor> op) → bool -
ggml_backend_reg_by_name(
Pointer< Char> name) → ggml_backend_reg_t -
ggml_backend_reg_count(
) → int -
ggml_backend_reg_dev_count(
ggml_backend_reg_t reg) → int -
ggml_backend_reg_dev_get(
ggml_backend_reg_t reg, int index) → ggml_backend_dev_t -
ggml_backend_reg_get(
int index) → ggml_backend_reg_t -
ggml_backend_reg_get_proc_address(
ggml_backend_reg_t reg, Pointer< Char> name) → Pointer<Void> -
ggml_backend_reg_name(
ggml_backend_reg_t reg) → Pointer< Char> -
ggml_backend_sched_alloc_graph(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → bool -
ggml_backend_sched_free(
ggml_backend_sched_t sched) → void -
ggml_backend_sched_get_backend(
ggml_backend_sched_t sched, int i) → ggml_backend_t -
ggml_backend_sched_get_buffer_size(
ggml_backend_sched_t sched, ggml_backend_t backend) → int -
ggml_backend_sched_get_n_backends(
ggml_backend_sched_t sched) → int -
ggml_backend_sched_get_n_copies(
ggml_backend_sched_t sched) → int -
ggml_backend_sched_get_n_splits(
ggml_backend_sched_t sched) → int -
ggml_backend_sched_get_tensor_backend(
ggml_backend_sched_t sched, Pointer< ggml_tensor> node) → ggml_backend_t -
ggml_backend_sched_graph_compute(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → ggml_status -
ggml_backend_sched_graph_compute_async(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> graph) → ggml_status -
ggml_backend_sched_new(
Pointer< ggml_backend_t> backends, Pointer<ggml_backend_buffer_type_t> bufts, int n_backends, int graph_size, bool parallel) → ggml_backend_sched_t -
ggml_backend_sched_reserve(
ggml_backend_sched_t sched, Pointer< ggml_cgraph> measure_graph) → bool -
ggml_backend_sched_reset(
ggml_backend_sched_t sched) → void -
ggml_backend_sched_set_eval_callback(
ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, Pointer< Void> user_data) → void -
ggml_backend_sched_set_tensor_backend(
ggml_backend_sched_t sched, Pointer< ggml_tensor> node, ggml_backend_t backend) → void -
ggml_backend_sched_synchronize(
ggml_backend_sched_t sched) → void -
ggml_backend_supports_buft(
ggml_backend_t backend, ggml_backend_buffer_type_t buft) → bool -
ggml_backend_supports_op(
ggml_backend_t backend, Pointer< ggml_tensor> op) → bool -
ggml_backend_synchronize(
ggml_backend_t backend) → void -
ggml_backend_tensor_alloc(
ggml_backend_buffer_t buffer, Pointer< ggml_tensor> tensor, Pointer<Void> addr) → void -
ggml_backend_tensor_copy(
Pointer< ggml_tensor> src, Pointer<ggml_tensor> dst) → void -
ggml_backend_tensor_copy_async(
ggml_backend_t backend_src, ggml_backend_t backend_dst, Pointer< ggml_tensor> src, Pointer<ggml_tensor> dst) → void -
ggml_backend_tensor_get(
Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_tensor_get_async(
ggml_backend_t backend, Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_tensor_memset(
Pointer< ggml_tensor> tensor, int value, int offset, int size) → void -
ggml_backend_tensor_set(
Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_tensor_set_async(
ggml_backend_t backend, Pointer< ggml_tensor> tensor, Pointer<Void> data, int offset, int size) → void -
ggml_backend_view_init(
Pointer< ggml_tensor> tensor) → void -
ggml_bf16_to_fp32(
ggml_bf16_t arg0) → double -
ggml_bf16_to_fp32_row(
Pointer< ggml_bf16_t> arg0, Pointer<Float> arg1, int arg2) → void -
ggml_blck_size(
ggml_type type) → int -
ggml_build_backward_expand(
Pointer< ggml_context> ctx_static, Pointer<ggml_context> ctx_compute, Pointer<ggml_cgraph> cgraph, bool accumulate) → void -
ggml_build_forward_expand(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void -
ggml_can_repeat(
Pointer< ggml_tensor> t0, Pointer<ggml_tensor> t1) → bool -
ggml_cast(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_type type) → Pointer<ggml_tensor> -
ggml_clamp(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double min, double max) → Pointer<ggml_tensor> -
ggml_concat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int dim) → Pointer<ggml_tensor> -
ggml_cont(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_cont_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor> -
ggml_cont_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_cont_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> -
ggml_cont_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_conv_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor> -
ggml_conv_1d_ph(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s, int d) → Pointer<ggml_tensor> -
ggml_conv_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor> -
ggml_conv_2d_s1_ph(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_conv_2d_sk_p0(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_conv_depthwise_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1) → Pointer<ggml_tensor> -
ggml_conv_transpose_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int p0, int d0) → Pointer<ggml_tensor> -
ggml_conv_transpose_2d_p0(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int stride) → Pointer<ggml_tensor> -
ggml_cos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_cos_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_count_equal(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_cpu_get_sve_cnt(
) → int -
ggml_cpu_has_amx_int8(
) → int -
ggml_cpu_has_arm_fma(
) → int -
ggml_cpu_has_avx(
) → int -
ggml_cpu_has_avx2(
) → int -
ggml_cpu_has_avx512(
) → int -
ggml_cpu_has_avx512_bf16(
) → int -
ggml_cpu_has_avx512_vbmi(
) → int -
ggml_cpu_has_avx512_vnni(
) → int -
ggml_cpu_has_avx_vnni(
) → int -
ggml_cpu_has_f16c(
) → int -
ggml_cpu_has_fma(
) → int -
ggml_cpu_has_fp16_va(
) → int -
ggml_cpu_has_llamafile(
) → int -
ggml_cpu_has_matmul_int8(
) → int -
ggml_cpu_has_neon(
) → int -
ggml_cpu_has_riscv_v(
) → int -
ggml_cpu_has_sse3(
) → int -
ggml_cpu_has_ssse3(
) → int -
ggml_cpu_has_sve(
) → int -
ggml_cpu_has_vsx(
) → int -
ggml_cpu_has_wasm_simd(
) → int -
ggml_cpu_init(
) → void -
ggml_cpy(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_cross_entropy_loss(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_cross_entropy_loss_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_cycles(
) → int -
ggml_cycles_per_ms(
) → int -
ggml_diag(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_diag_mask_inf(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> -
ggml_diag_mask_inf_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> -
ggml_diag_mask_zero(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> -
ggml_diag_mask_zero_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_past) → Pointer<ggml_tensor> -
ggml_div(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_div_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_dup(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_dup_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_dup_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor> -
ggml_element_size(
Pointer< ggml_tensor> tensor) → int -
ggml_elu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_elu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_exp(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_exp_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_flash_attn_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> d, bool masked) → Pointer<ggml_tensor> -
ggml_flash_attn_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> q, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> mask, double scale, double max_bias, double logit_softcap) → Pointer<ggml_tensor> -
ggml_flash_attn_ext_get_prec(
Pointer< ggml_tensor> a) → ggml_prec -
ggml_flash_attn_ext_set_prec(
Pointer< ggml_tensor> a, ggml_prec prec) → void -
ggml_fopen(
Pointer< Char> fname, Pointer<Char> mode) → Pointer<FILE> -
ggml_format_name(
Pointer< ggml_tensor> tensor, Pointer<Char> fmt) → Pointer<ggml_tensor> -
ggml_fp16_to_fp32(
int arg0) → double -
ggml_fp16_to_fp32_row(
Pointer< ggml_fp16_t> arg0, Pointer<Float> arg1, int arg2) → void -
ggml_fp32_to_bf16(
double arg0) → ggml_bf16_t -
ggml_fp32_to_bf16_row(
Pointer< Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void -
ggml_fp32_to_bf16_row_ref(
Pointer< Float> arg0, Pointer<ggml_bf16_t> arg1, int arg2) → void -
ggml_fp32_to_fp16(
double arg0) → int -
ggml_fp32_to_fp16_row(
Pointer< Float> arg0, Pointer<ggml_fp16_t> arg1, int arg2) → void -
ggml_free(
Pointer< ggml_context> ctx) → void -
ggml_ftype_to_ggml_type(
ggml_ftype ftype) → ggml_type -
ggml_gallocr_alloc_graph(
ggml_gallocr_t galloc, Pointer< ggml_cgraph> graph) → bool -
ggml_gallocr_free(
ggml_gallocr_t galloc) → void -
ggml_gallocr_get_buffer_size(
ggml_gallocr_t galloc, int buffer_id) → int -
ggml_gallocr_new(
ggml_backend_buffer_type_t buft) → ggml_gallocr_t -
ggml_gallocr_new_n(
Pointer< ggml_backend_buffer_type_t> bufts, int n_bufs) → ggml_gallocr_t -
ggml_gallocr_reserve(
ggml_gallocr_t galloc, Pointer< ggml_cgraph> graph) → bool -
ggml_gallocr_reserve_n(
ggml_gallocr_t galloc, Pointer< ggml_cgraph> graph, Pointer<Int> node_buffer_ids, Pointer<Int> leaf_buffer_ids) → bool -
ggml_gelu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_quick(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_gelu_quick_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_get_data(
Pointer< ggml_tensor> tensor) → Pointer<Void> -
ggml_get_data_f32(
Pointer< ggml_tensor> tensor) → Pointer<Float> -
ggml_get_f32_1d(
Pointer< ggml_tensor> tensor, int i) → double -
ggml_get_f32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3) → double -
ggml_get_first_tensor(
Pointer< ggml_context> ctx) → Pointer<ggml_tensor> -
ggml_get_i32_1d(
Pointer< ggml_tensor> tensor, int i) → int -
ggml_get_i32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3) → int -
ggml_get_max_tensor_size(
Pointer< ggml_context> ctx) → int -
ggml_get_mem_buffer(
Pointer< ggml_context> ctx) → Pointer<Void> -
ggml_get_mem_size(
Pointer< ggml_context> ctx) → int -
ggml_get_name(
Pointer< ggml_tensor> tensor) → Pointer<Char> -
ggml_get_next_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> tensor) → Pointer<ggml_tensor> -
ggml_get_no_alloc(
Pointer< ggml_context> ctx) → bool -
ggml_get_rel_pos(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int qh, int kh) → Pointer<ggml_tensor> -
ggml_get_rows(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_get_rows_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_get_tensor(
Pointer< ggml_context> ctx, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_get_type_traits(
ggml_type type) → Pointer< ggml_type_traits> -
ggml_get_type_traits_cpu(
ggml_type type) → Pointer< ggml_type_traits_cpu> -
ggml_get_unary_op(
Pointer< ggml_tensor> tensor) → ggml_unary_op -
ggml_graph_add_node(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> tensor) → void -
ggml_graph_clear(
Pointer< ggml_cgraph> cgraph) → void -
ggml_graph_compute(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_cplan> cplan) → ggml_status -
ggml_graph_compute_with_ctx(
Pointer< ggml_context> ctx, Pointer<ggml_cgraph> cgraph, int n_threads) → ggml_status -
ggml_graph_cpy(
Pointer< ggml_cgraph> src, Pointer<ggml_cgraph> dst) → void -
ggml_graph_dump_dot(
Pointer< ggml_cgraph> gb, Pointer<ggml_cgraph> gf, Pointer<Char> filename) → void -
ggml_graph_dup(
Pointer< ggml_context> ctx, Pointer<ggml_cgraph> cgraph) → Pointer<ggml_cgraph> -
ggml_graph_export(
Pointer< ggml_cgraph> cgraph, Pointer<Char> fname) → void -
ggml_graph_get_grad(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor> -
ggml_graph_get_grad_acc(
Pointer< ggml_cgraph> cgraph, Pointer<ggml_tensor> node) → Pointer<ggml_tensor> -
ggml_graph_get_tensor(
Pointer< ggml_cgraph> cgraph, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_graph_import(
Pointer< Char> fname, Pointer<Pointer< ctx_data, Pointer<ggml_context> >Pointer< ctx_eval) → Pointer<ggml_context> >ggml_cgraph> -
ggml_graph_n_nodes(
Pointer< ggml_cgraph> cgraph) → int -
ggml_graph_node(
Pointer< ggml_cgraph> cgraph, int i) → Pointer<ggml_tensor> -
ggml_graph_nodes(
Pointer< ggml_cgraph> cgraph) → Pointer<Pointer< ggml_tensor> > -
ggml_graph_overhead(
) → int -
ggml_graph_overhead_custom(
int size, bool grads) → int -
ggml_graph_plan(
Pointer< ggml_cgraph> cgraph, int n_threads, Pointer<ggml_threadpool> threadpool) → ggml_cplan -
ggml_graph_print(
Pointer< ggml_cgraph> cgraph) → void -
ggml_graph_reset(
Pointer< ggml_cgraph> cgraph) → void -
ggml_graph_size(
Pointer< ggml_cgraph> cgraph) → int -
ggml_group_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor> -
ggml_group_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int n_groups, double eps) → Pointer<ggml_tensor> -
ggml_guid_matches(
ggml_guid_t guid_a, ggml_guid_t guid_b) → bool -
ggml_hardsigmoid(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_hardswish(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_im2col(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D, ggml_type dst_type) → Pointer<ggml_tensor> -
ggml_im2col_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<Int64> ne, int s0, int s1, int p0, int p1, int d0, int d1, bool is_2D) → Pointer<ggml_tensor> -
ggml_init(
ggml_init_params params) → Pointer< ggml_context> -
ggml_is_3d(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_0(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_1(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_contiguous_2(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_empty(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_matrix(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_numa(
) → bool -
ggml_is_permuted(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_quantized(
ggml_type type) → bool -
ggml_is_scalar(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_transposed(
Pointer< ggml_tensor> tensor) → bool -
ggml_is_vector(
Pointer< ggml_tensor> tensor) → bool -
ggml_leaky_relu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double negative_slope, bool inplace) → Pointer<ggml_tensor> -
ggml_log(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_log_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_log_set(
ggml_log_callback log_callback, Pointer< Void> user_data) → void -
ggml_map_binary_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_binary_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_binary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom1(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom1_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom1_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom1_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_custom1_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom2(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom2_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom2_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom2_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, ggml_custom2_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom3(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom3_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_custom3_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_t fun, int n_tasks, Pointer<Void> userdata) → Pointer<ggml_tensor> -
ggml_map_custom3_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, ggml_custom3_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_unary_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_map_unary_inplace_f32(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op_f32_t fun) → Pointer<ggml_tensor> -
ggml_mean(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_mul(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_mul_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_mul_mat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_mul_mat_id(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> as1, Pointer<ggml_tensor> b, Pointer<ggml_tensor> ids) → Pointer<ggml_tensor> -
ggml_mul_mat_set_prec(
Pointer< ggml_tensor> a, ggml_prec prec) → void -
ggml_n_dims(
Pointer< ggml_tensor> tensor) → int -
ggml_nbytes(
Pointer< ggml_tensor> tensor) → int -
ggml_nbytes_pad(
Pointer< ggml_tensor> tensor) → int -
ggml_neg(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_neg_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_nelements(
Pointer< ggml_tensor> tensor) → int -
ggml_new_buffer(
Pointer< ggml_context> ctx, int nbytes) → Pointer<Void> -
ggml_new_f32(
Pointer< ggml_context> ctx, double value) → Pointer<ggml_tensor> -
ggml_new_graph(
Pointer< ggml_context> ctx) → Pointer<ggml_cgraph> -
ggml_new_graph_custom(
Pointer< ggml_context> ctx, int size, bool grads) → Pointer<ggml_cgraph> -
ggml_new_i32(
Pointer< ggml_context> ctx, int value) → Pointer<ggml_tensor> -
ggml_new_tensor(
Pointer< ggml_context> ctx, ggml_type type, int n_dims, Pointer<Int64> ne) → Pointer<ggml_tensor> -
ggml_new_tensor_1d(
Pointer< ggml_context> ctx, ggml_type type, int ne0) → Pointer<ggml_tensor> -
ggml_new_tensor_2d(
Pointer< ggml_context> ctx, ggml_type type, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_new_tensor_3d(
Pointer< ggml_context> ctx, ggml_type type, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> -
ggml_new_tensor_4d(
Pointer< ggml_context> ctx, ggml_type type, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_nrows(
Pointer< ggml_tensor> tensor) → int -
ggml_numa_init(
ggml_numa_strategy numa) → void -
ggml_op_desc(
Pointer< ggml_tensor> t) → Pointer<Char> -
ggml_op_name(
ggml_op op) → Pointer< Char> -
ggml_op_symbol(
ggml_op op) → Pointer< Char> -
ggml_opt_step_adamw(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> grad, Pointer<ggml_tensor> m, Pointer<ggml_tensor> v, Pointer<ggml_tensor> adamw_params) → Pointer<ggml_tensor> -
ggml_out_prod(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_pad(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int p0, int p1, int p2, int p3) → Pointer<ggml_tensor> -
ggml_permute(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int axis0, int axis1, int axis2, int axis3) → Pointer<ggml_tensor> -
ggml_pool_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_op_pool op, int k0, int s0, int p0) → Pointer<ggml_tensor> -
ggml_pool_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_op_pool op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor> -
ggml_pool_2d_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> af, ggml_op_pool op, int k0, int k1, int s0, int s1, double p0, double p1) → Pointer<ggml_tensor> -
ggml_print_object(
Pointer< ggml_object> obj) → void -
ggml_print_objects(
Pointer< ggml_context> ctx) → void -
ggml_quantize_chunk(
ggml_type type, Pointer< Float> src, Pointer<Void> dst, int start, int nrows, int n_per_row, Pointer<Float> imatrix) → int -
ggml_quantize_free(
) → void -
ggml_quantize_init(
ggml_type type) → void -
ggml_quantize_requires_imatrix(
ggml_type type) → bool -
ggml_relu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_relu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_repeat(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_repeat_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_reset(
Pointer< ggml_context> ctx) → void -
ggml_reshape(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_reshape_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0) → Pointer<ggml_tensor> -
ggml_reshape_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1) → Pointer<ggml_tensor> -
ggml_reshape_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2) → Pointer<ggml_tensor> -
ggml_reshape_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_rms_norm(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_rms_norm_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, double eps) → Pointer<ggml_tensor> -
ggml_rms_norm_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double eps) → Pointer<ggml_tensor> -
ggml_rope(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor> -
ggml_rope_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_custom(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_custom_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_ext_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, Pointer<ggml_tensor> c, int n_dims, int mode, int n_ctx_orig, double freq_base, double freq_scale, double ext_factor, double attn_factor, double beta_fast, double beta_slow) → Pointer<ggml_tensor> -
ggml_rope_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int n_dims, int mode) → Pointer<ggml_tensor> -
ggml_rope_yarn_corr_dims(
int n_dims, int n_ctx_orig, double freq_base, double beta_fast, double beta_slow, Pointer< Float> dims) → void -
ggml_row_size(
ggml_type type, int ne) → int -
ggml_rwkv_wkv6(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> k, Pointer<ggml_tensor> v, Pointer<ggml_tensor> r, Pointer<ggml_tensor> tf, Pointer<ggml_tensor> td, Pointer<ggml_tensor> state) → Pointer<ggml_tensor> -
ggml_scale(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor> -
ggml_scale_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, double s) → Pointer<ggml_tensor> -
ggml_set(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_set_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor> -
ggml_set_1d_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int offset) → Pointer<ggml_tensor> -
ggml_set_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor> -
ggml_set_2d_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int offset) → Pointer<ggml_tensor> -
ggml_set_f32(
Pointer< ggml_tensor> tensor, double value) → Pointer<ggml_tensor> -
ggml_set_f32_1d(
Pointer< ggml_tensor> tensor, int i, double value) → void -
ggml_set_f32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3, double value) → void -
ggml_set_i32(
Pointer< ggml_tensor> tensor, int value) → Pointer<ggml_tensor> -
ggml_set_i32_1d(
Pointer< ggml_tensor> tensor, int i, int value) → void -
ggml_set_i32_nd(
Pointer< ggml_tensor> tensor, int i0, int i1, int i2, int i3, int value) → void -
ggml_set_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_set_input(
Pointer< ggml_tensor> tensor) → void -
ggml_set_loss(
Pointer< ggml_tensor> tensor) → void -
ggml_set_name(
Pointer< ggml_tensor> tensor, Pointer<Char> name) → Pointer<ggml_tensor> -
ggml_set_no_alloc(
Pointer< ggml_context> ctx, bool no_alloc) → void -
ggml_set_output(
Pointer< ggml_tensor> tensor) → void -
ggml_set_param(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> tensor) → void -
ggml_set_zero(
Pointer< ggml_tensor> tensor) → Pointer<ggml_tensor> -
ggml_sgn(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sgn_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sigmoid(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sigmoid_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_silu(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_silu_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_silu_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sin(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sin_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_soft_max(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_soft_max_back(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_soft_max_back_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_soft_max_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> mask, double scale, double max_bias) → Pointer<ggml_tensor> -
ggml_soft_max_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqr(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqr_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqrt(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sqrt_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_ssm_conv(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> sx, Pointer<ggml_tensor> c) → Pointer<ggml_tensor> -
ggml_ssm_scan(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> s, Pointer<ggml_tensor> x, Pointer<ggml_tensor> dt, Pointer<ggml_tensor> A, Pointer<ggml_tensor> B, Pointer<ggml_tensor> C) → Pointer<ggml_tensor> -
ggml_status_to_string(
ggml_status status) → Pointer< Char> -
ggml_step(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_step_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sub(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_sub_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, Pointer<ggml_tensor> b) → Pointer<ggml_tensor> -
ggml_sum(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_sum_rows(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_tallocr_alloc(
Pointer< ggml_tallocr> talloc, Pointer<ggml_tensor> tensor) → void -
ggml_tallocr_new(
ggml_backend_buffer_t buffer) → ggml_tallocr -
ggml_tanh(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_tanh_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_tensor_overhead(
) → int -
ggml_threadpool_free(
Pointer< ggml_threadpool> threadpool) → void -
ggml_threadpool_get_n_threads(
Pointer< ggml_threadpool> threadpool) → int -
ggml_threadpool_new(
Pointer< ggml_threadpool_params> params) → Pointer<ggml_threadpool> -
ggml_threadpool_params_default(
int n_threads) → ggml_threadpool_params -
ggml_threadpool_params_init(
Pointer< ggml_threadpool_params> p, int n_threads) → void -
ggml_threadpool_params_match(
Pointer< ggml_threadpool_params> p0, Pointer<ggml_threadpool_params> p1) → bool -
ggml_threadpool_pause(
Pointer< ggml_threadpool> threadpool) → void -
ggml_threadpool_resume(
Pointer< ggml_threadpool> threadpool) → void -
ggml_time_init(
) → void -
ggml_time_ms(
) → int -
ggml_time_us(
) → int -
ggml_timestep_embedding(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> timesteps, int dim, int max_period) → Pointer<ggml_tensor> -
ggml_top_k(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int k) → Pointer<ggml_tensor> -
ggml_transpose(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a) → Pointer<ggml_tensor> -
ggml_type_name(
ggml_type type) → Pointer< Char> -
ggml_type_size(
ggml_type type) → int -
ggml_type_sizef(
ggml_type type) → double -
ggml_unary(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op op) → Pointer<ggml_tensor> -
ggml_unary_inplace(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, ggml_unary_op op) → Pointer<ggml_tensor> -
ggml_unary_op_name(
ggml_unary_op op) → Pointer< Char> -
ggml_unravel_index(
Pointer< ggml_tensor> tensor, int i, Pointer<Int64> i0, Pointer<Int64> i1, Pointer<Int64> i2, Pointer<Int64> i3) → void -
ggml_upscale(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int scale_factor) → Pointer<ggml_tensor> -
ggml_upscale_ext(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3) → Pointer<ggml_tensor> -
ggml_used_mem(
Pointer< ggml_context> ctx) → int -
ggml_validate_row_data(
ggml_type type, Pointer< Void> data, int nbytes) → bool -
ggml_view_1d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int offset) → Pointer<ggml_tensor> -
ggml_view_2d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int nb1, int offset) → Pointer<ggml_tensor> -
ggml_view_3d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int nb1, int nb2, int offset) → Pointer<ggml_tensor> -
ggml_view_4d(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int ne0, int ne1, int ne2, int ne3, int nb1, int nb2, int nb3, int offset) → Pointer<ggml_tensor> -
ggml_view_tensor(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> src) → Pointer<ggml_tensor> -
ggml_win_part(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int w) → Pointer<ggml_tensor> -
ggml_win_unpart(
Pointer< ggml_context> ctx, Pointer<ggml_tensor> a, int w0, int h0, int w) → Pointer<ggml_tensor> -
gguf_add_tensor(
Pointer< gguf_context> ctx, Pointer<ggml_tensor> tensor) → void -
gguf_find_key(
Pointer< gguf_context> ctx, Pointer<Char> key) → int -
gguf_find_tensor(
Pointer< gguf_context> ctx, Pointer<Char> name) → int -
gguf_free(
Pointer< gguf_context> ctx) → void -
gguf_get_alignment(
Pointer< gguf_context> ctx) → int -
gguf_get_arr_data(
Pointer< gguf_context> ctx, int key_id) → Pointer<Void> -
gguf_get_arr_n(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_arr_str(
Pointer< gguf_context> ctx, int key_id, int i) → Pointer<Char> -
gguf_get_arr_type(
Pointer< gguf_context> ctx, int key_id) → gguf_type -
gguf_get_data(
Pointer< gguf_context> ctx) → Pointer<Void> -
gguf_get_data_offset(
Pointer< gguf_context> ctx) → int -
gguf_get_key(
Pointer< gguf_context> ctx, int key_id) → Pointer<Char> -
gguf_get_kv_type(
Pointer< gguf_context> ctx, int key_id) → gguf_type -
gguf_get_meta_data(
Pointer< gguf_context> ctx, Pointer<Void> data) → void -
gguf_get_meta_size(
Pointer< gguf_context> ctx) → int -
gguf_get_n_kv(
Pointer< gguf_context> ctx) → int -
gguf_get_n_tensors(
Pointer< gguf_context> ctx) → int -
gguf_get_tensor_name(
Pointer< gguf_context> ctx, int i) → Pointer<Char> -
gguf_get_tensor_offset(
Pointer< gguf_context> ctx, int i) → int -
gguf_get_tensor_type(
Pointer< gguf_context> ctx, int i) → ggml_type -
gguf_get_val_bool(
Pointer< gguf_context> ctx, int key_id) → bool -
gguf_get_val_data(
Pointer< gguf_context> ctx, int key_id) → Pointer<Void> -
gguf_get_val_f32(
Pointer< gguf_context> ctx, int key_id) → double -
gguf_get_val_f64(
Pointer< gguf_context> ctx, int key_id) → double -
gguf_get_val_i16(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_i32(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_i64(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_i8(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_str(
Pointer< gguf_context> ctx, int key_id) → Pointer<Char> -
gguf_get_val_u16(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_u32(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_u64(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_val_u8(
Pointer< gguf_context> ctx, int key_id) → int -
gguf_get_version(
Pointer< gguf_context> ctx) → int -
gguf_init_empty(
) → Pointer< gguf_context> -
gguf_init_from_file(
Pointer< Char> fname, gguf_init_params params) → Pointer<gguf_context> -
gguf_remove_key(
Pointer< gguf_context> ctx, Pointer<Char> key) → void -
gguf_set_arr_data(
Pointer< gguf_context> ctx, Pointer<Char> key, gguf_type type, Pointer<Void> data, int n) → void -
gguf_set_arr_str(
Pointer< gguf_context> ctx, Pointer<Char> key, Pointer<Pointer< data, int n) → voidChar> > -
gguf_set_kv(
Pointer< gguf_context> ctx, Pointer<gguf_context> src) → void -
gguf_set_tensor_data(
Pointer< gguf_context> ctx, Pointer<Char> name, Pointer<Void> data, int size) → void -
gguf_set_tensor_type(
Pointer< gguf_context> ctx, Pointer<Char> name, ggml_type type) → void -
gguf_set_val_bool(
Pointer< gguf_context> ctx, Pointer<Char> key, bool val) → void -
gguf_set_val_f32(
Pointer< gguf_context> ctx, Pointer<Char> key, double val) → void -
gguf_set_val_f64(
Pointer< gguf_context> ctx, Pointer<Char> key, double val) → void -
gguf_set_val_i16(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_i32(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_i64(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_i8(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_str(
Pointer< gguf_context> ctx, Pointer<Char> key, Pointer<Char> val) → void -
gguf_set_val_u16(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_u32(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_u64(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_set_val_u8(
Pointer< gguf_context> ctx, Pointer<Char> key, int val) → void -
gguf_type_name(
gguf_type type) → Pointer< Char> -
gguf_write_to_file(
Pointer< gguf_context> ctx, Pointer<Char> fname, bool only_meta) → void -
llama_add_bos_token(
Pointer< llama_model> model) → bool -
llama_add_eos_token(
Pointer< llama_model> model) → bool -
llama_attach_threadpool(
Pointer< llama_context> ctx, ggml_threadpool_t threadpool, ggml_threadpool_t threadpool_batch) → void -
llama_backend_free(
) → void -
llama_backend_init(
) → void -
llama_batch_free(
llama_batch batch) → void -
llama_batch_get_one(
Pointer< llama_token> tokens, int n_tokens) → llama_batch -
llama_batch_init(
int n_tokens, int embd, int n_seq_max) → llama_batch -
llama_chat_apply_template(
Pointer< llama_model> model, Pointer<Char> tmpl, Pointer<llama_chat_message> chat, int n_msg, bool add_ass, Pointer<Char> buf, int length) → int - Apply chat template. Inspired by hf apply_chat_template() on python. Both "model" and "custom_template" are optional, but at least one is required. "custom_template" has higher precedence than "model" NOTE: This function does not use a jinja parser. It only support a pre-defined list of template. See more: https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template @param tmpl A Jinja template to use for this chat. If this is nullptr, the model’s default chat template will be used instead. @param chat Pointer to a list of multiple llama_chat_message @param n_msg Number of llama_chat_message in this chat @param add_ass Whether to end the prompt with the token(s) that indicate the start of an assistant message. @param buf A buffer to hold the output formatted prompt. The recommended alloc size is 2 * (total number of characters of all messages) @param length The size of the allocated buffer @return The total number of bytes of the formatted prompt. If is it larger than the size of buffer, you may need to re-alloc it and then re-apply the template.
-
llama_context_default_params(
) → llama_context_params -
llama_control_vector_apply(
Pointer< llama_context> lctx, Pointer<Float> data, int len, int n_embd, int il_start, int il_end) → int -
llama_copy_state_data(
Pointer< llama_context> ctx, Pointer<Uint8> dst) → int -
llama_decode(
Pointer< llama_context> ctx, llama_batch batch) → int -
llama_detach_threadpool(
Pointer< llama_context> ctx) → void -
llama_detokenize(
Pointer< llama_model> model, Pointer<llama_token> tokens, int n_tokens, Pointer<Char> text, int text_len_max, bool remove_special, bool unparse_special) → int - @details Convert the provided tokens into text (inverse of llama_tokenize()). @param text The char pointer must be large enough to hold the resulting text. @return Returns the number of chars/bytes on success, no more than text_len_max. @return Returns a negative number on failure - the number of chars/bytes that would have been returned. @param remove_special Allow to remove BOS and EOS tokens if model is configured to do so. @param unparse_special If true, special tokens are rendered in the output.
-
llama_encode(
Pointer< llama_context> ctx, llama_batch batch) → int -
llama_free(
Pointer< llama_context> ctx) → void -
llama_free_model(
Pointer< llama_model> model) → void -
llama_get_embeddings(
Pointer< llama_context> ctx) → Pointer<Float> -
llama_get_embeddings_ith(
Pointer< llama_context> ctx, int i) → Pointer<Float> -
llama_get_embeddings_seq(
Pointer< llama_context> ctx, int seq_id) → Pointer<Float> -
llama_get_kv_cache_token_count(
Pointer< llama_context> ctx) → int -
llama_get_kv_cache_used_cells(
Pointer< llama_context> ctx) → int -
llama_get_logits(
Pointer< llama_context> ctx) → Pointer<Float> -
llama_get_logits_ith(
Pointer< llama_context> ctx, int i) → Pointer<Float> -
llama_get_model(
Pointer< llama_context> ctx) → Pointer<llama_model> -
llama_get_model_tensor(
Pointer< llama_model> model, Pointer<Char> name) → Pointer<ggml_tensor> -
llama_get_state_size(
Pointer< llama_context> ctx) → int -
llama_kv_cache_can_shift(
Pointer< llama_context> ctx) → bool -
llama_kv_cache_clear(
Pointer< llama_context> ctx) → void -
llama_kv_cache_defrag(
Pointer< llama_context> ctx) → void -
llama_kv_cache_seq_add(
Pointer< llama_context> ctx, int seq_id, int p0, int p1, int delta) → void -
llama_kv_cache_seq_cp(
Pointer< llama_context> ctx, int seq_id_src, int seq_id_dst, int p0, int p1) → void -
llama_kv_cache_seq_div(
Pointer< llama_context> ctx, int seq_id, int p0, int p1, int d) → void -
llama_kv_cache_seq_keep(
Pointer< llama_context> ctx, int seq_id) → void -
llama_kv_cache_seq_pos_max(
Pointer< llama_context> ctx, int seq_id) → int -
llama_kv_cache_seq_rm(
Pointer< llama_context> ctx, int seq_id, int p0, int p1) → bool -
llama_kv_cache_update(
Pointer< llama_context> ctx) → void -
llama_kv_cache_view_free(
Pointer< llama_kv_cache_view> view) → void -
llama_kv_cache_view_init(
Pointer< llama_context> ctx, int n_seq_max) → llama_kv_cache_view -
llama_kv_cache_view_update(
Pointer< llama_context> ctx, Pointer<llama_kv_cache_view> view) → void -
llama_load_model_from_file(
Pointer< Char> path_model, llama_model_params params) → Pointer<llama_model> -
llama_load_session_file(
Pointer< llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → bool -
llama_log_set(
ggml_log_callback log_callback, Pointer< Void> user_data) → void -
llama_lora_adapter_clear(
Pointer< llama_context> ctx) → void -
llama_lora_adapter_free(
Pointer< llama_lora_adapter> adapter) → void -
llama_lora_adapter_init(
Pointer< llama_model> model, Pointer<Char> path_lora) → Pointer<llama_lora_adapter> -
llama_lora_adapter_remove(
Pointer< llama_context> ctx, Pointer<llama_lora_adapter> adapter) → int -
llama_lora_adapter_set(
Pointer< llama_context> ctx, Pointer<llama_lora_adapter> adapter, double scale) → int -
llama_max_devices(
) → int -
llama_model_decoder_start_token(
Pointer< llama_model> model) → int -
llama_model_default_params(
) → llama_model_params -
llama_model_desc(
Pointer< llama_model> model, Pointer<Char> buf, int buf_size) → int -
llama_model_has_decoder(
Pointer< llama_model> model) → bool -
llama_model_has_encoder(
Pointer< llama_model> model) → bool -
llama_model_is_recurrent(
Pointer< llama_model> model) → bool -
llama_model_meta_count(
Pointer< llama_model> model) → int -
llama_model_meta_key_by_index(
Pointer< llama_model> model, int i, Pointer<Char> buf, int buf_size) → int -
llama_model_meta_val_str(
Pointer< llama_model> model, Pointer<Char> key, Pointer<Char> buf, int buf_size) → int -
llama_model_meta_val_str_by_index(
Pointer< llama_model> model, int i, Pointer<Char> buf, int buf_size) → int -
llama_model_n_params(
Pointer< llama_model> model) → int -
llama_model_quantize(
Pointer< Char> fname_inp, Pointer<Char> fname_out, Pointer<llama_model_quantize_params> params) → int -
llama_model_quantize_default_params(
) → llama_model_quantize_params -
llama_model_size(
Pointer< llama_model> model) → int -
llama_n_batch(
Pointer< llama_context> ctx) → int -
llama_n_ctx(
Pointer< llama_context> ctx) → int -
llama_n_ctx_train(
Pointer< llama_model> model) → int -
llama_n_embd(
Pointer< llama_model> model) → int -
llama_n_head(
Pointer< llama_model> model) → int -
llama_n_layer(
Pointer< llama_model> model) → int -
llama_n_seq_max(
Pointer< llama_context> ctx) → int -
llama_n_threads(
Pointer< llama_context> ctx) → int -
llama_n_threads_batch(
Pointer< llama_context> ctx) → int -
llama_n_ubatch(
Pointer< llama_context> ctx) → int -
llama_n_vocab(
Pointer< llama_model> model) → int -
llama_new_context_with_model(
Pointer< llama_model> model, llama_context_params params) → Pointer<llama_context> -
llama_numa_init(
ggml_numa_strategy numa) → void -
llama_perf_context(
Pointer< llama_context> ctx) → llama_perf_context_data -
llama_perf_context_print(
Pointer< llama_context> ctx) → void -
llama_perf_context_reset(
Pointer< llama_context> ctx) → void -
llama_perf_sampler(
Pointer< llama_sampler> chain) → llama_perf_sampler_data -
llama_perf_sampler_print(
Pointer< llama_sampler> chain) → void -
llama_perf_sampler_reset(
Pointer< llama_sampler> chain) → void -
llama_pooling_type1(
Pointer< llama_context> ctx) → llama_pooling_type -
llama_print_system_info(
) → Pointer< Char> -
llama_rope_freq_scale_train(
Pointer< llama_model> model) → double -
llama_rope_type1(
Pointer< llama_model> model) → llama_rope_type -
llama_sampler_accept(
Pointer< llama_sampler> smpl, int token) → void -
llama_sampler_apply(
Pointer< llama_sampler> smpl, Pointer<llama_token_data_array> cur_p) → void -
llama_sampler_chain_add(
Pointer< llama_sampler> chain, Pointer<llama_sampler> smpl) → void -
llama_sampler_chain_default_params(
) → llama_sampler_chain_params -
llama_sampler_chain_get(
Pointer< llama_sampler> chain, int i) → Pointer<llama_sampler> -
llama_sampler_chain_init(
llama_sampler_chain_params params) → Pointer< llama_sampler> -
llama_sampler_chain_n(
Pointer< llama_sampler> chain) → int -
llama_sampler_chain_remove(
Pointer< llama_sampler> chain, int i) → Pointer<llama_sampler> -
llama_sampler_clone(
Pointer< llama_sampler> smpl) → Pointer<llama_sampler> -
llama_sampler_free(
Pointer< llama_sampler> smpl) → void -
llama_sampler_get_seed(
Pointer< llama_sampler> smpl) → int -
llama_sampler_init_dist(
int seed) → Pointer< llama_sampler> -
llama_sampler_init_dry(
Pointer< llama_model> model, double dry_multiplier, double dry_base, int dry_allowed_length, int dry_penalty_last_n, Pointer<Pointer< seq_breakers, int num_breakers) → Pointer<Char> >llama_sampler> - @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
-
llama_sampler_init_grammar(
Pointer< llama_model> model, Pointer<Char> grammar_str, Pointer<Char> grammar_root) → Pointer<llama_sampler> -
llama_sampler_init_greedy(
) → Pointer< llama_sampler> -
llama_sampler_init_infill(
Pointer< llama_model> model) → Pointer<llama_sampler> -
llama_sampler_init_logit_bias(
int n_vocab, int n_logit_bias, Pointer< llama_logit_bias> logit_bias) → Pointer<llama_sampler> -
llama_sampler_init_min_p(
double p, int min_keep) → Pointer< llama_sampler> - @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
-
llama_sampler_init_mirostat(
int n_vocab, int seed, double tau, double eta, int m) → Pointer< llama_sampler> -
@details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of
llama_token_data
containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to updatemu
based on the error between the target and observed surprisal of the sampled word. A larger learning rate will causemu
to be updated more quickly, while a smaller learning rate will result in slower updates. @param m The number of tokens considered in the estimation ofs_hat
. This is an arbitrary value that is used to calculates_hat
, which in turn helps to calculate the value ofk
. In the paper, they usem = 100
, but you can experiment with different values to see how it affects the performance of the algorithm. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau
) and is updated in the algorithm based on the error between the target and observed surprisal. -
llama_sampler_init_mirostat_v2(
int seed, double tau, double eta) → Pointer< llama_sampler> -
@details Mirostat 2.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
@param candidates A vector of
llama_token_data
containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text. @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text. @param eta The learning rate used to updatemu
based on the error between the target and observed surprisal of the sampled word. A larger learning rate will causemu
to be updated more quickly, while a smaller learning rate will result in slower updates. @param mu Maximum cross-entropy. This value is initialized to be twice the target cross-entropy (2 * tau
) and is updated in the algorithm based on the error between the target and observed surprisal. -
llama_sampler_init_penalties(
int n_vocab, int special_eos_id, int linefeed_id, int penalty_last_n, double penalty_repeat, double penalty_freq, double penalty_present, bool penalize_nl, bool ignore_eos) → Pointer< llama_sampler> -
llama_sampler_init_softmax(
) → Pointer< llama_sampler> - @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits. NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
-
llama_sampler_init_temp(
double t) → Pointer< llama_sampler> - #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
-
llama_sampler_init_temp_ext(
double t, double delta, double exponent) → Pointer< llama_sampler> - @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
-
llama_sampler_init_top_k(
int k) → Pointer< llama_sampler> - @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
-
llama_sampler_init_top_p(
double p, int min_keep) → Pointer< llama_sampler> - @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
-
llama_sampler_init_typical(
double p, int min_keep) → Pointer< llama_sampler> - @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
-
llama_sampler_init_xtc(
double p, double t, int min_keep, int seed) → Pointer< llama_sampler> - @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
-
llama_sampler_name(
Pointer< llama_sampler> smpl) → Pointer<Char> -
llama_sampler_reset(
Pointer< llama_sampler> smpl) → void -
llama_sampler_sample(
Pointer< llama_sampler> smpl, Pointer<llama_context> ctx, int idx) → int -
llama_save_session_file(
Pointer< llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens, int n_token_count) → bool -
llama_set_abort_callback(
Pointer< llama_context> ctx, ggml_abort_callback abort_callback, Pointer<Void> abort_callback_data) → void -
llama_set_causal_attn(
Pointer< llama_context> ctx, bool causal_attn) → void -
llama_set_embeddings(
Pointer< llama_context> ctx, bool embeddings) → void -
llama_set_n_threads(
Pointer< llama_context> ctx, int n_threads, int n_threads_batch) → void -
llama_set_state_data(
Pointer< llama_context> ctx, Pointer<Uint8> src) → int -
llama_split_path(
Pointer< Char> split_path, int maxlen, Pointer<Char> path_prefix, int split_no, int split_count) → int - @details Build a split GGUF final path for this chunk. llama_split_path(split_path, sizeof(split_path), "/models/ggml-model-q4_0", 2, 4) => split_path = "/models/ggml-model-q4_0-00002-of-00004.gguf"
-
llama_split_prefix(
Pointer< Char> split_prefix, int maxlen, Pointer<Char> split_path, int split_no, int split_count) → int - @details Extract the path prefix from the split_path if and only if the split_no and split_count match. llama_split_prefix(split_prefix, 64, "/models/ggml-model-q4_0-00002-of-00004.gguf", 2, 4) => split_prefix = "/models/ggml-model-q4_0"
-
llama_state_get_data(
Pointer< llama_context> ctx, Pointer<Uint8> dst, int size) → int -
llama_state_get_size(
Pointer< llama_context> ctx) → int -
llama_state_load_file(
Pointer< llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → bool -
llama_state_save_file(
Pointer< llama_context> ctx, Pointer<Char> path_session, Pointer<llama_token> tokens, int n_token_count) → bool -
llama_state_seq_get_data(
Pointer< llama_context> ctx, Pointer<Uint8> dst, int size, int seq_id) → int -
llama_state_seq_get_size(
Pointer< llama_context> ctx, int seq_id) → int -
llama_state_seq_load_file(
Pointer< llama_context> ctx, Pointer<Char> filepath, int dest_seq_id, Pointer<llama_token> tokens_out, int n_token_capacity, Pointer<Size> n_token_count_out) → int -
llama_state_seq_save_file(
Pointer< llama_context> ctx, Pointer<Char> filepath, int seq_id, Pointer<llama_token> tokens, int n_token_count) → int -
llama_state_seq_set_data(
Pointer< llama_context> ctx, Pointer<Uint8> src, int size, int dest_seq_id) → int -
llama_state_set_data(
Pointer< llama_context> ctx, Pointer<Uint8> src, int size) → int -
llama_supports_gpu_offload(
) → bool -
llama_supports_mlock(
) → bool -
llama_supports_mmap(
) → bool -
llama_supports_rpc(
) → bool -
llama_synchronize(
Pointer< llama_context> ctx) → void -
llama_time_us(
) → int -
llama_token_bos(
Pointer< llama_model> model) → int -
llama_token_cls(
Pointer< llama_model> model) → int -
llama_token_eos(
Pointer< llama_model> model) → int -
llama_token_eot(
Pointer< llama_model> model) → int -
llama_token_fim_mid(
Pointer< llama_model> model) → int -
llama_token_fim_pad(
Pointer< llama_model> model) → int -
llama_token_fim_pre(
Pointer< llama_model> model) → int -
llama_token_fim_rep(
Pointer< llama_model> model) → int -
llama_token_fim_sep(
Pointer< llama_model> model) → int -
llama_token_fim_suf(
Pointer< llama_model> model) → int -
llama_token_get_attr(
Pointer< llama_model> model, Dartllama_token token) → llama_token_attr -
llama_token_get_score(
Pointer< llama_model> model, int token) → double -
llama_token_get_text(
Pointer< llama_model> model, int token) → Pointer<Char> -
llama_token_is_control(
Pointer< llama_model> model, int token) → bool -
llama_token_is_eog(
Pointer< llama_model> model, int token) → bool -
llama_token_middle(
Pointer< llama_model> model) → int -
llama_token_nl(
Pointer< llama_model> model) → int -
llama_token_pad(
Pointer< llama_model> model) → int -
llama_token_prefix(
Pointer< llama_model> model) → int -
llama_token_sep(
Pointer< llama_model> model) → int -
llama_token_suffix(
Pointer< llama_model> model) → int -
llama_token_to_piece(
Pointer< llama_model> model, int token, Pointer<Char> buf, int length, int lstrip, bool special) → int -
llama_tokenize(
Pointer< llama_model> model, Pointer<Char> text, int text_len, Pointer<llama_token> tokens, int n_tokens_max, bool add_special, bool parse_special) → int - @details Convert the provided text into tokens. @param tokens The tokens pointer must be large enough to hold the resulting tokens. @return Returns the number of tokens on success, no more than n_tokens_max @return Returns a negative number on failure - the number of tokens that would have been returned @param add_special Allow to add BOS and EOS tokens if model is configured to do so. @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext. Does not insert a leading space.
-
llama_vocab_type1(
Pointer< llama_model> model) → llama_vocab_type -
noSuchMethod(
Invocation invocation) → dynamic -
Invoked when a nonexistent method or property is accessed.
inherited
-
open_memstream(
Pointer< Pointer< __bufp, Pointer<Char> >Size> __sizep) → Pointer<FILE> -
pclose(
Pointer< FILE> arg0) → int -
perror(
Pointer< Char> arg0) → void -
popen(
Pointer< Char> arg0, Pointer<Char> arg1) → Pointer<FILE> -
printf(
Pointer< Char> arg0) → int -
putc(
int arg0, Pointer< FILE> arg1) → int -
putc_unlocked(
int arg0, Pointer< FILE> arg1) → int -
putchar(
int arg0) → int -
putchar_unlocked(
int arg0) → int -
puts(
Pointer< Char> arg0) → int -
putw(
int arg0, Pointer< FILE> arg1) → int -
remove(
Pointer< Char> arg0) → int -
rename(
Pointer< Char> __old, Pointer<Char> __new) → int -
renameat(
int arg0, Pointer< Char> arg1, int arg2, Pointer<Char> arg3) → int -
renameatx_np(
int arg0, Pointer< Char> arg1, int arg2, Pointer<Char> arg3, int arg4) → int -
renamex_np(
Pointer< Char> arg0, Pointer<Char> arg1, int arg2) → int -
rewind(
Pointer< FILE> arg0) → void -
scanf(
Pointer< Char> arg0) → int -
setbuf(
Pointer< FILE> arg0, Pointer<Char> arg1) → void -
setbuffer(
Pointer< FILE> arg0, Pointer<Char> arg1, int arg2) → void -
setlinebuf(
Pointer< FILE> arg0) → int -
setvbuf(
Pointer< FILE> arg0, Pointer<Char> arg1, int arg2, int arg3) → int -
snprintf(
Pointer< Char> __str, int __size, Pointer<Char> __format) → int -
sprintf(
Pointer< Char> arg0, Pointer<Char> arg1) → int -
sscanf(
Pointer< Char> arg0, Pointer<Char> arg1) → int -
tempnam(
Pointer< Char> __dir, Pointer<Char> __prefix) → Pointer<Char> -
tmpfile(
) → Pointer< FILE> -
tmpnam(
Pointer< Char> arg0) → Pointer<Char> -
toString(
) → String -
A string representation of this object.
inherited
-
ungetc(
int arg0, Pointer< FILE> arg1) → int -
vasprintf(
Pointer< Pointer< arg0, Pointer<Char> >Char> arg1, va_list arg2) → int -
vdprintf(
int arg0, Pointer< Char> arg1, va_list arg2) → int -
vfprintf(
Pointer< FILE> arg0, Pointer<Char> arg1, va_list arg2) → int -
vfscanf(
Pointer< FILE> __stream, Pointer<Char> __format, va_list arg2) → int -
vprintf(
Pointer< Char> arg0, va_list arg1) → int -
vscanf(
Pointer< Char> __format, va_list arg1) → int -
vsnprintf(
Pointer< Char> __str, int __size, Pointer<Char> __format, va_list arg3) → int -
vsprintf(
Pointer< Char> arg0, Pointer<Char> arg1, va_list arg2) → int -
vsscanf(
Pointer< Char> __str, Pointer<Char> __format, va_list arg2) → int
Operators
-
operator ==(
Object other) → bool -
The equality operator.
inherited