loadModel method

  1. @override
bool loadModel({
  1. required String modelPath,
})
override

Implementation

@override
bool loadModel({
  required String modelPath,
}) {
  {
    LlamaLibrary._modelPath = modelPath;
  }
  if (_isInIsolate == false) {
    return true;
  }

  if (isDeviceSupport() == false || isCrash()) {
    return false;
  }
  {
    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper.llama_log_set(
        Pointer.fromFunction(ggmlLogCallbackFunction),
        "log".toNativeUtf8().cast<Void>());
    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper.ggml_log_set(
        Pointer.fromFunction(ggmlLogCallbackFunction),
        "log".toNativeUtf8().cast<Void>());
  }

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .ggml_backend_load_all();
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_backend_init();

  final LLamaModelParams modelParamsDart = LLamaModelParams();
  var modelParams = modelParamsDart.get(
    llama: LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper,
  );

  final modelPathPtr = LlamaLibrary._modelPath.toNativeUtf8().cast<Char>();
  try {
    LlamaLibrary._modelContext = LlamaLibrary
        ._llamaLibrarySharedBindingsByGeneralDeveloper
        .llama_load_model_from_file(modelPathPtr, modelParams);
    if (LlamaLibrary._modelContext.address == 0) {
      // throw LlamaException("Could not load model at $modelPath");
    }
  } finally {
    malloc.free(modelPathPtr);
  }
  LlamaLibrary._vocab = LlamaLibrary
      ._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_model_get_vocab(LlamaLibrary._modelContext);

  const size = 512 * 4;
  LLamaContextParams contextParamsDart = LLamaContextParams();
  contextParamsDart.nThreadsBatch = 4;
  contextParamsDart.nThreads = 4;
  contextParamsDart.nCtx = size;
  contextParamsDart.nBatch = size;
  contextParamsDart.nUbatch = size;
  contextParamsDart.nPredit = 512;

  _nPredict = contextParamsDart.nPredit;

  var contextParams = contextParamsDart.get(
    llama: LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper,
  );

  LlamaLibrary._llamaContext = LlamaLibrary
      ._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_new_context_with_model(
          LlamaLibrary._modelContext, contextParams);
  if (LlamaLibrary._llamaContext.address == 0) {}

  final samplerParams = LLamaSamplerParams();

  // Initialize sampler chain
  llama_sampler_chain_params sparams = LlamaLibrary
      ._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_default_params();
  sparams.no_perf = false;
  LlamaLibrary._llamaSampler = LlamaLibrary
      ._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_init(sparams);

  // Add samplers based on params
  if (samplerParams.greedy) {
    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
        .llama_sampler_chain_add(
            LlamaLibrary._llamaSampler,
            LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
                .llama_sampler_init_greedy());
  }

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_dist(samplerParams.seed));

  if (samplerParams.softmax) {
    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
        .llama_sampler_chain_add(
            LlamaLibrary._llamaSampler,
            LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
                .llama_sampler_init_softmax());
  }

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_top_k(samplerParams.topK));
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_top_p(
                  samplerParams.topP, samplerParams.topPKeep));
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_min_p(
                  samplerParams.minP, samplerParams.minPKeep));
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_typical(
                  samplerParams.typical, samplerParams.typicalKeep));
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_temp(samplerParams.temp));
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_xtc(
                  samplerParams.xtcTemperature,
                  samplerParams.xtcStartValue,
                  samplerParams.xtcKeep,
                  samplerParams.xtcLength));

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_mirostat(
                  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
                      .llama_n_vocab(LlamaLibrary._vocab),
                  samplerParams.seed,
                  samplerParams.mirostatTau,
                  samplerParams.mirostatEta,
                  samplerParams.mirostatM));

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_mirostat_v2(samplerParams.seed,
                  samplerParams.mirostat2Tau, samplerParams.mirostat2Eta));

  final grammarStrPtr = samplerParams.grammarStr.toNativeUtf8().cast<Char>();
  final grammarRootPtr =
      samplerParams.grammarRoot.toNativeUtf8().cast<Char>();
  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
          LlamaLibrary._llamaSampler,
          LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
              .llama_sampler_init_grammar(
                  LlamaLibrary._vocab, grammarStrPtr, grammarRootPtr));
  calloc.free(grammarStrPtr);
  calloc.free(grammarRootPtr);

  LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
      .llama_sampler_chain_add(
    LlamaLibrary._llamaSampler,
    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
        .llama_sampler_init_penalties(
      samplerParams.penaltyLastTokens,
      samplerParams.penaltyRepeat,
      samplerParams.penaltyFreq,
      samplerParams.penaltyPresent,
    ),
  );

  final seqBreakers = samplerParams.dryBreakers;
  final numBreakers = seqBreakers.length;
  final seqBreakersPointer = calloc<Pointer<Char>>(numBreakers);

  try {
    for (var i = 0; i < numBreakers; i++) {
      seqBreakersPointer[i] = seqBreakers[i].toNativeUtf8().cast<Char>();
    }

    LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
        .llama_sampler_chain_add(
      LlamaLibrary._llamaSampler,
      LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper
          .llama_sampler_init_penalties(
        samplerParams.penaltyLastTokens,
        samplerParams.penaltyRepeat,
        samplerParams.penaltyFreq,
        samplerParams.penaltyPresent,
      ),
    );
  } finally {
    // Clean up DRY sampler allocations
    for (var i = 0; i < numBreakers; i++) {
      calloc.free(seqBreakersPointer[i]);
    }
    calloc.free(seqBreakersPointer);
  }

  // LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper.llama_sampler_chain_add(LlamaLibrary._llamaSampler, LlamaLibrary._llamaLibrarySharedBindingsByGeneralDeveloper.llama_sampler_init_infill(model));

  // _tokenPtr = malloc<llama_token>();

  return true;
}