diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4f2834312..91c52a050 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -84,15 +84,12 @@ endif()
 
 set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
 
-# see https://github.com/ggerganov/ggml/pull/682
-add_definitions(-DGGML_MAX_NAME=128)
-
 # deps
 add_subdirectory(ggml)
 
 add_subdirectory(thirdparty)
 
-target_link_libraries(${SD_LIB} PUBLIC ggml zip)
+target_link_libraries(${SD_LIB} PUBLIC ggml_sd zip)
 target_include_directories(${SD_LIB} PUBLIC . thirdparty)
 target_compile_features(${SD_LIB} PUBLIC cxx_std_11)
 
diff --git a/model.cpp b/model.cpp
index c8cc5e32f..05626b70b 100644
--- a/model.cpp
+++ b/model.cpp
@@ -7,6 +7,8 @@
 #include <vector>
 
 #include "model.h"
+
+
 #include "stable-diffusion.h"
 #include "util.h"
 #include "vocab.hpp"
@@ -768,7 +770,6 @@ bool ModelLoader::init_from_gguf_file(const std::string& file_path, const std::s
         size_t offset             = data_offset + gguf_get_tensor_offset(ctx_gguf_, i);
 
         // LOG_DEBUG("%s", name.c_str());
-
         TensorStorage tensor_storage(prefix + name, dummy->type, dummy->ne, ggml_n_dims(dummy), file_index, offset);
 
         GGML_ASSERT(ggml_nbytes(dummy) == tensor_storage.nbytes());
@@ -1356,7 +1357,6 @@ std::vector<TensorStorage> remove_duplicates(const std::vector<TensorStorage>& v
 bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, ggml_backend_t backend) {
     std::vector<TensorStorage> processed_tensor_storages;
     for (auto& tensor_storage : tensor_storages) {
-        // LOG_DEBUG("%s", name.c_str());
 
         if (is_unused_tensor(tensor_storage.name)) {
             continue;
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index b489b499f..3c2e96986 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -103,6 +103,8 @@ class StableDiffusionGGML {
 
     std::string trigger_word = "img";  // should be user settable
 
+    bool model_loaded = false;
+
     StableDiffusionGGML() = default;
 
     StableDiffusionGGML(int n_threads,
@@ -1530,6 +1532,73 @@ sd_ctx_t* new_sd_ctx(const char* model_path_c_str,
     return sd_ctx;
 }
 
+sd_ctx_t* new_sd_ctx_direct(
+                            const char* lora_model_dir,
+                            bool vae_decode_only,
+                            bool free_params_immediately,
+                            int n_threads,
+                            enum rng_type_t rng_type) {
+    sd_ctx_t* sd_ctx = (sd_ctx_t*)malloc(sizeof(sd_ctx_t));
+    if (sd_ctx == NULL) {
+        return NULL;
+    }
+    sd_ctx->sd = new StableDiffusionGGML(n_threads,
+                vae_decode_only,
+                free_params_immediately,
+                lora_model_dir,
+                rng_type);
+    if (sd_ctx->sd == NULL) {
+        return NULL;
+    }
+    sd_ctx->sd->lora_model_dir = std::string(lora_model_dir);
+    sd_ctx->sd->model_loaded = false;
+    return sd_ctx;
+}
+
+bool model_loaded = false;
+bool sd_model_is_loaded() {
+    return model_loaded;
+}
+
+void set_model_loaded() {
+    model_loaded = true;
+}
+
+void load_model(sd_ctx_t* sd_ctx,
+                            const char* model_path_c_str,
+                            const char* vae_path_c_str,
+                            const char* taesd_path_c_str,
+                            const char* control_net_path_c_str,
+                            const char* embed_dir_c_str,
+                            enum sd_type_t wtype,
+                            bool vae_tiling,
+                            enum schedule_t s,
+                            bool keep_control_net_cpu) {
+    std::string model_path(model_path_c_str);
+    std::string vae_path(vae_path_c_str);
+    std::string taesd_path(taesd_path_c_str);
+    std::string control_net_path(control_net_path_c_str);
+    std::string embd_path(embed_dir_c_str);
+    std::string id_embd_path("");
+    if (!sd_ctx->sd->load_from_file(model_path,
+                                    vae_path,
+                                    control_net_path,
+                                    embd_path,
+                                    id_embd_path,
+                                    taesd_path,
+                                    vae_tiling,
+                                    static_cast<ggml_type>(wtype),
+                                    s,
+                                    false,
+                                    keep_control_net_cpu,
+                                    false)) {
+        delete sd_ctx->sd;
+        sd_ctx->sd = NULL;
+        free(sd_ctx);
+    }
+}
+
+
 void free_sd_ctx(sd_ctx_t* sd_ctx) {
     if (sd_ctx->sd != NULL) {
         delete sd_ctx->sd;
diff --git a/stable-diffusion.h b/stable-diffusion.h
index 369600418..0d8374740 100644
--- a/stable-diffusion.h
+++ b/stable-diffusion.h
@@ -99,7 +99,7 @@ typedef void (*sd_progress_cb_t)(int step, int steps, float time, void* data);
 
 SD_API void sd_set_log_callback(sd_log_cb_t sd_log_cb, void* data);
 SD_API void sd_set_progress_callback(sd_progress_cb_t cb, void* data);
-SD_API int32_t get_num_physical_cores();
+
 SD_API const char* sd_get_system_info();
 
 typedef struct {
@@ -129,6 +129,29 @@ SD_API sd_ctx_t* new_sd_ctx(const char* model_path,
                             bool keep_control_net_cpu,
                             bool keep_vae_on_cpu);
 
+
+SD_API sd_ctx_t* new_sd_ctx_direct(
+                            const char* lora_model_dir,
+                            bool vae_decode_only,
+                            bool free_params_immediately,
+                            int n_threads,
+                            enum rng_type_t rng_type);
+
+SD_API bool     sd_model_is_loaded();
+SD_API void     set_model_loaded();
+
+
+SD_API void     load_model(sd_ctx_t* sd_ctx,
+                            const char* model_path,
+                            const char* vae_path,
+                            const char* taesd_path,
+                            const char* control_net_path_c_str,
+                            const char* embed_dir_c_str,
+                            enum sd_type_t wtype,
+                            bool vae_tiling,
+                            enum schedule_t s,
+                            bool keep_control_net_cpu);
+
 SD_API void free_sd_ctx(sd_ctx_t* sd_ctx);
 
 SD_API sd_image_t* txt2img(sd_ctx_t* sd_ctx,
diff --git a/util.cpp b/util.cpp
index 0755cc32e..fa5b009fa 100644
--- a/util.cpp
+++ b/util.cpp
@@ -195,6 +195,10 @@ std::vector<std::string> get_files_from_dir(const std::string& dir) {
 // get_num_physical_cores is copy from
 // https://github.com/ggerganov/llama.cpp/blob/master/examples/common.cpp
 // LICENSE: https://github.com/ggerganov/llama.cpp/blob/master/LICENSE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 int32_t get_num_physical_cores() {
 #ifdef __linux__
     // enumerate the set of thread siblings, num entries is num cores
@@ -230,6 +234,10 @@ int32_t get_num_physical_cores() {
     return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
 }
 
+#ifdef __cplusplus
+}
+#endif
+
 static sd_progress_cb_t sd_progress_cb = NULL;
 void* sd_progress_cb_data              = NULL;