loadModuleBinary function
Reads a binary file and pushes the weights into GPU VRAM
Implementation
Future<bool> loadModuleBinary(Module model, String filePath) async {
final file = File(filePath);
if (!await file.exists()) return false;
final Uint8List allBytes = await file.readAsBytes();
final Float32List allFloats = allBytes.buffer.asFloat32List();
final List<Tensor> params = model.parameters();
// Safety Check
final int totalExpected = params.fold(0, (sum, p) => sum + p.length);
if (allFloats.length != totalExpected) {
print(
'⚠️ Mismatch! Model needs $totalExpected floats, file has ${allFloats.length}',
);
return false;
}
print('🚀 Injecting weights into GPU VRAM...');
int offset = 0;
for (var p in params) {
final int len = p.length;
// p.data (setter) triggers engine.setTensorData -> cudaMemcpyHostToDevice
p.data = allFloats.sublist(offset, offset + len).toList();
offset += len;
}
return true;
}