main function

void main()

Implementation

void main() {
  print("🚀 Starting Final GPU Conv2d Validation...");

  // 1. Setup Input: 1 channel, 5x5 image, all 1s
  // Ensure the shape matches [Channels, Height, Width]
  final input = Tensor.fromList([1, 5, 5], List.generate(25, (i) => 1.0));

  // 2. Setup Conv: 3x3 kernel, 1 in-channel, 1 out-channel (to keep math simple)
  final convLayer = Conv2d(
    inChannels: 1,
    outChannels: 1,
    kernelSize: 3,
    stride: 1,
    padding: 1,
  );

  // 3. Manually set weights/bias to 1.0 for predictable verification
  // Weight shape: [OutChannels, InChannels * K * K] -> [1, 9]
  convLayer.weight = Tensor.fromList([1, 9], List.generate(9, (i) => 1.0));
  convLayer.bias = Tensor.fromList([1, 1], [0.0]);

  List<Tensor> tracker = [];

  try {
    // 4. Run Forward Pass
    // 'output' will have shape [1, 25] or [1, 5, 5] depending on your matmul/+ implementation
    final output = convLayer.forward(input, tracker);

    // 5. Fetch Data from GPU to CPU
    final data = output.fetchData();

    print("--- Results ---");
    print("Top-Left Pixel Result: ${data[0]}");

    // 6. Manually print the 5x5 grid from the flat data
    // For a 3x3 kernel of 1s on an image of 1s:
    // Corner pixel (with padding) should be 4.0
    // Edge pixel should be 6.0
    // Center pixel should be 9.0
    print("Sample Output Matrix (5x5):");
    for (int i = 0; i < 5; i++) {
      // sublist(start, end) takes the row from the flattened array
      print(data.sublist(i * 5, (i + 1) * 5));
    }

    if (data[0] > 0) {
      print("✅ SUCCESS: Conv2d is producing non-zero data!");
    } else {
      print("❌ STILL ZERO: Check the C++ wrapper for pointer casting.");
    }
  } catch (e) {
    print("Caught Error: $e");
  } finally {
    // 7. Cleanup
    input.dispose();
    convLayer.dispose();
    for (var t in tracker) t.dispose();
  }
}