CudaEngine constructor
CudaEngine()
Implementation
CudaEngine() {
_lib = ffi.DynamicLibrary.open('${Directory.current.path}/libmat_mul.so');
createTensor = _lib.lookupFunction<_C_create, _D_create>('create_tensor');
destroyTensor = _lib.lookupFunction<_C_destroy, _D_destroy>(
'destroy_tensor',
);
getTensorData = _lib.lookupFunction<_C_copy, _D_copy>('get_tensor_data');
getTensorGrad = _lib.lookupFunction<_C_copy, _D_copy>('get_tensor_grad');
backward = _lib.lookupFunction<_C_destroy, _D_destroy>('backward');
zeroGrad = _lib.lookupFunction<_C_destroy, _D_destroy>('zero_grad');
tensorStep = _lib.lookupFunction<_C_step, _D_step>('tensor_step');
addTensors = _lib.lookupFunction<_C_op2, _D_op2>('add_tensors');
subTensors = _lib.lookupFunction<_C_op2, _D_op2>('sub_tensors');
mulTensors = _lib.lookupFunction<_C_op2, _D_op2>('mul_tensors');
divTensors = _lib.lookupFunction<_C_op2, _D_op2>('div_tensors');
matmulTensors = _lib.lookupFunction<_C_op2, _D_op2>('matmul_tensors');
powTensor = _lib.lookupFunction<_C_pow, _D_pow>('pow_tensor');
reluTensor = _lib.lookupFunction<_C_op1, _D_op1>('relu_tensor');
tanhTensor = _lib.lookupFunction<_C_op1, _D_op1>('tanh_tensor');
sigmoidTensor = _lib.lookupFunction<_C_op1, _D_op1>('sigmoid_tensor');
logTensor = _lib.lookupFunction<_C_op1, _D_op1>('log_tensor');
aftForward = _lib.lookupFunction<_C_aft, _D_aft>('aft_forward');
aftCrossForward = _lib.lookupFunction<_C_aft_cross, _D_aft_cross>(
'aft_cross_forward',
);
concatTensors = _lib.lookupFunction<_C_concat, _D_concat>(
'concat_tensors_gpu',
);
layernormForward = _lib.lookupFunction<_C_layernorm, _D_layernorm>(
'layernorm_forward',
);
geluTensor = _lib.lookupFunction<_C_op1, _D_op1>('gelu_tensor');
embeddingForward = _lib.lookupFunction<_C_embedding, _D_embedding>(
'embedding_forward',
);
crossEntropyLoss = _lib.lookupFunction<_C_loss, _D_loss>(
'cross_entropy_loss',
);
tensorToHost = _lib.lookupFunction<_C_to_host, _D_to_host>(
'tensor_to_host',
);
adamStep = _lib.lookupFunction<_C_adam_step, _D_adam_step>('adam_step');
sdgStep = _lib.lookupFunction<_C_adam_step, _D_adam_step>('sdg_step');
// Ensure you also have zeroGrad defined
zeroGrad = _lib
.lookupFunction<
ffi.Void Function(ffi.Pointer<ffi.Void>),
void Function(ffi.Pointer<ffi.Void>)
>('zero_grad');
clipGradients = _lib.lookupFunction<_C_clip, _D_clip>('clip_gradients');
setTensorData = _lib.lookupFunction<_C_set_data, _D_set_data>(
'set_tensor_data',
);
sliceTensor = _lib.lookupFunction<_C_slice, _D_slice>('slice_tensor');
abs_tensor = _lib.lookupFunction<UnaryOpFn, UnaryOpDart>('abs_tensor');
softmax_forward = _lib.lookupFunction<UnaryOpFn, UnaryOpDart>(
'softmax_forward',
);
_computeCostMatrix = _lib
.lookup<ffi.NativeFunction<NativeComputeCost>>('compute_cost_matrix')
.asFunction<DartComputeCost>();
sumTensor = _lib.lookupFunction<_C_reduce, _D_reduce>('sum_tensor');
// Mean reduction: returns a 1x1 Tensor pointer
meanTensor = _lib.lookupFunction<_C_reduce, _D_reduce>('mean_tensor');
_tensorXavierInit = _lib.lookupFunction<_XavierInitC, _XavierInitDart>(
'tensor_xavier_init',
);
_tensorZeroInit = _lib.lookupFunction<_ZeroInitC, _ZeroInitDart>(
'tensor_zero_init',
);
// l2Normalize = _lib.lookupFunction<_C_l2norm, _D_l2norm>(
// 'l2_normalize_tensor',
// );
layerNorm = _lib.lookupFunction<_C_l2norm, _D_l2norm>('layer_norm_tensor');
im2col = _lib.lookupFunction<_C_im2col, _D_im2col>('im2col_cuda');
col2im = _lib.lookupFunction<_C_im2col, _D_im2col>('col2im_cuda');
}