diff --git a/CMakeLists.txt b/CMakeLists.txt
index bca4bdb..b983917 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,6 +1,7 @@
 cmake_minimum_required(VERSION 3.12)
 project("CLIP.cpp" C CXX)
 
+set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
diff --git a/build.bat b/build.bat
new file mode 100644
index 0000000..b955093
--- /dev/null
+++ b/build.bat
@@ -0,0 +1,8 @@
+rd /s /q build
+md build
+cd build
+git submodule update --recursive
+cmake ..
+cmake --build . --config Release
+@REM cmake --build . --target CLIP.cpp --config Release
+pause
\ No newline at end of file
diff --git a/clip.cpp b/clip.cpp
index ff1f5ca..3247c89 100644
--- a/clip.cpp
+++ b/clip.cpp
@@ -5,7 +5,8 @@
 #include <fstream>
 #include <iostream>
 #include <map>
-#include <pthread.h>
+//#include <pthread.h>
+#include <thread>
 #include <regex>
 #include <stdexcept>
 #include <thread>
@@ -803,7 +804,7 @@ typedef struct {
 } ImageData;
 
 // Function to preprocess a single image in a thread
-void * preprocess_image(void * arg) {
+void preprocess_image(void * arg) {
     ImageData * imageData = static_cast<ImageData *>(arg);
     const clip_image_u8 * input = imageData->input;
     clip_image_f32 * resized = imageData->resized;
@@ -812,7 +813,7 @@ void * preprocess_image(void * arg) {
     // Call the original preprocess function on the image
     clip_image_preprocess(ctx, input, resized);
 
-    pthread_exit(NULL);
+    //pthread_exit(NULL);
 }
 
 // Function to batch-preprocess multiple images i
@@ -834,7 +835,8 @@ void clip_image_batch_preprocess(const clip_ctx * ctx, const int n_threads, cons
     } else {
         // Multi-threaded case
 
-        std::vector<pthread_t> threads(num_threads);
+        //std::vector<pthread_t> threads(num_threads);
+        std::vector<std::thread> threads;
         std::vector<ImageData> imageData(img_inputs->size);
 
         for (t = 0; t < num_threads; t++) {
@@ -849,12 +851,15 @@ void clip_image_batch_preprocess(const clip_ctx * ctx, const int n_threads, cons
             }
 
             // Create a thread for each batch of images
-            pthread_create(&threads[t], NULL, preprocess_image, static_cast<void *>(&imageData[start_index]));
+            //pthread_create(&threads[t], NULL, preprocess_image, static_cast<void *>(&imageData[start_index]));
+            std::thread proc_thread(preprocess_image,static_cast<void *>(&imageData[start_index]));
+            threads.push_back(std::move(proc_thread));
         }
 
         // Wait for all threads to finish
         for (t = 0; t < num_threads; t++) {
-            pthread_join(threads[t], NULL);
+            //pthread_join(threads[t], NULL);
+            threads[t].join();
         }
     }
 }
@@ -1392,8 +1397,10 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons
 
     // prepare image and text vectors
     const int projection_dim = ctx->vision_model.hparams.projection_dim;
-    float img_vec[projection_dim];
-    float txt_vec[projection_dim];
+    //float img_vec[projection_dim];
+    //float txt_vec[projection_dim];
+    float * img_vec = new float[projection_dim];
+    float * txt_vec = new float[projection_dim];
 
     // tokenize and encode text
     clip_tokens tokens;
@@ -1419,6 +1426,8 @@ bool clip_compare_text_and_image(const clip_ctx * ctx, const int n_threads, cons
     // compute similarity
     *score = clip_similarity_score(img_vec, txt_vec, projection_dim);
 
+    delete[] img_vec;
+    delete[] txt_vec;
     return true;
 }
 
@@ -1487,14 +1496,17 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons
 
     clip_image_preprocess(ctx, input_img, &img_res);
 
-    float img_vec[vec_dim];
+    //float img_vec[vec_dim];
+    float * img_vec = new float[vec_dim];
     if (!clip_image_encode(ctx, n_threads, &img_res, img_vec, false)) {
         return false;
     }
 
     // encode texts and compute similarities
-    float txt_vec[vec_dim];
-    float similarities[n_labels];
+    //float txt_vec[vec_dim];
+    //float similarities[n_labels];
+    float * txt_vec = new float[vec_dim];
+    float * similarities = new float[n_labels];
 
     for (int i = 0; i < n_labels; i++) {
         const auto & text = labels[i];
@@ -1503,10 +1515,14 @@ bool clip_zero_shot_label_image(struct clip_ctx * ctx, const int n_threads, cons
         clip_text_encode(ctx, n_threads, &tokens, txt_vec, false);
         similarities[i] = clip_similarity_score(img_vec, txt_vec, vec_dim);
     }
+    delete[] img_vec;
+    delete[] txt_vec;
 
     // apply softmax and sort scores
     softmax_with_sorting(similarities, n_labels, scores, indices);
 
+    delete[] similarities;
+
     return true;
 }
 
diff --git a/examples/extract.cpp b/examples/extract.cpp
index 22e736e..ab965ec 100644
--- a/examples/extract.cpp
+++ b/examples/extract.cpp
@@ -45,12 +45,14 @@ int main(int argc, char ** argv) {
 
         const int vec_dim = clip_get_vision_hparams(ctx)->projection_dim;
         int shape[2] = {1, vec_dim};
-        float vec[vec_dim];
+        //float vec[vec_dim];
+        float* vec = new float[vec_dim];
         clip_image_encode(ctx, params.n_threads, &img_res, vec, false);
 
         // Generate a unique output filename for each image
         std::string output_filename = "./img_vec_" + img_path.substr(img_path.find_last_of('/') + 1) + ".npy";
         writeNpyFile(output_filename.c_str(), vec, shape, 2);
+        delete[] vec;
 
         // Update progress
         processedInputs++;
@@ -69,7 +71,8 @@ int main(int argc, char ** argv) {
 
         const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;
         int shape[2] = {1, vec_dim};
-        float vec[vec_dim];
+        //float vec[vec_dim];
+        float* vec = new float[vec_dim];
 
         if (!clip_text_encode(ctx, params.n_threads, &tokens, vec, false)) {
             printf("Unable to encode text\n");
@@ -85,6 +88,8 @@ int main(int argc, char ** argv) {
         // Generate a unique output filename for each text
         std::string output_filename = "./text_vec_" + std::to_string(textCounter++) + ".npy";
         writeNpyFile(output_filename.c_str(), vec, shape, 2);
+
+        delete[] vec;
     }
 
     printf("\n"); // Print a newline to clear the progress bar line
diff --git a/examples/simple.c b/examples/simple.c
index 36aa0d3..1d39aee 100644
--- a/examples/simple.c
+++ b/examples/simple.c
@@ -35,7 +35,8 @@ int main() {
     }
 
     // Encode image
-    float img_vec[vec_dim];
+    //float img_vec[vec_dim];
+    float * img_vec = (float *)malloc(sizeof(float) * vec_dim);
     if (!clip_image_encode(ctx, n_threads, img_res, img_vec, true)) {
         fprintf(stderr, "%s: failed to encode image\n", __func__);
         return 1;
@@ -46,7 +47,8 @@ int main() {
     clip_tokenize(ctx, text, tokens);
 
     // Encode text
-    float txt_vec[vec_dim];
+    //float txt_vec[vec_dim];
+    float * txt_vec = (float *)malloc(sizeof(float) * vec_dim);
     if (!clip_text_encode(ctx, n_threads, tokens, txt_vec, true)) {
         fprintf(stderr, "%s: failed to encode text\n", __func__);
         return 1;
@@ -55,6 +57,9 @@ int main() {
     // Calculate image-text similarity
     float score = clip_similarity_score(img_vec, txt_vec, vec_dim);
 
+    free(img_vec);
+    free(txt_vec);
+
     // Alternatively, you can replace the above steps with:
     //  float score;
     //  if (!clip_compare_text_and_image_c(ctx, n_threads, text, img0, &score)) {
diff --git a/examples/zsl.cpp b/examples/zsl.cpp
index b13f832..2610844 100644
--- a/examples/zsl.cpp
+++ b/examples/zsl.cpp
@@ -15,7 +15,8 @@ int main(int argc, char ** argv) {
         printf("%s: You must specify at least 2 texts for zero-shot labeling\n", __func__);
     }
 
-    const char * labels[n_labels];
+    // const char * labels[n_labels];
+    const char** labels = new const char*[n_labels];
     for (size_t i = 0; i < n_labels; ++i) {
         labels[i] = params.texts[i].c_str();
     }
@@ -34,8 +35,11 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    float sorted_scores[n_labels];
-    int sorted_indices[n_labels];
+    //float sorted_scores[n_labels];
+    //int sorted_indices[n_labels];
+    float* sorted_scores = new float[n_labels];
+    int* sorted_indices = new int[n_labels];
+
     if (!clip_zero_shot_label_image(ctx, params.n_threads, &input_img, labels, n_labels, sorted_scores, sorted_indices)) {
         fprintf(stderr, "Unable to apply ZSL\n");
         return 1;
@@ -46,6 +50,9 @@ int main(int argc, char ** argv) {
         float score = sorted_scores[i];
         printf("%s = %1.4f\n", label, score);
     }
+    delete[] labels;
+    delete[] sorted_scores; 
+    delete[] sorted_indices; 
 
     clip_free(ctx);
 
diff --git a/tests/benchmark.cpp b/tests/benchmark.cpp
index 3640ae5..959a666 100644
--- a/tests/benchmark.cpp
+++ b/tests/benchmark.cpp
@@ -52,7 +52,8 @@ int main(int argc, char ** argv) {
 
     const int vec_dim = clip_get_text_hparams(ctx)->projection_dim;
 
-    float txt_vecs[n_labels * vec_dim];
+    //float txt_vecs[n_labels * vec_dim];
+    float* txt_vecs = new float[n_labels * vec_dim];
 
     ggml_time_init();
 
@@ -79,11 +80,15 @@ int main(int argc, char ** argv) {
     int n_total_items = 0;         // total number of images processed
     float total_acc1_score = 0.0f; // total accuracy at 1 for the intire dataset
     float total_acc5_score = 0.0f; // total accuracy at 5 in intitre dataset
-    float img_vecs[vec_dim * batch_size];
-
-    float similarities[n_labels];
-    float sorted_scores[n_labels];
-    int indices[n_labels];
+    //float img_vecs[vec_dim * batch_size];
+    float* img_vecs = new float[vec_dim * batch_size];
+
+    //float similarities[n_labels];
+    //float sorted_scores[n_labels];
+    //int indices[n_labels];
+    float* similarities = new float[n_labels];
+    float* sorted_scores = new float[n_labels];
+    int* indices = new int[n_labels];
     std::vector<clip_image_u8> img_inputs(batch_size);
     std::vector<clip_image_f32> imgs_resized(batch_size);
 
@@ -138,6 +143,10 @@ int main(int argc, char ** argv) {
                 n_total_items += 1;
             }
         }
+        delete[] img_vecs;
+        delete[] similarities;
+        delete[] sorted_scores;
+        delete[] indices;
 
         float acc1_score = (float)n_acc1 / n_items;
         float acc5_score = (float)n_acc5 / n_items;