From 15553084e5d1a7d6abd3aac176bee95b0247c379 Mon Sep 17 00:00:00 2001 From: sohzm Date: Wed, 19 Jun 2024 04:08:18 +0530 Subject: [PATCH] wip --- CMakeLists.txt | 7 +++++++ ggml | 2 +- ggml_extend.hpp | 6 +++++- model.cpp | 4 ++++ stable-diffusion.cpp | 6 +++++- upscaler.cpp | 4 ++++ 6 files changed, 26 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28a03fb..6b5679f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE}) option(SD_CUBLAS "sd: cuda backend" OFF) option(SD_HIPBLAS "sd: rocm backend" OFF) option(SD_METAL "sd: metal backend" OFF) +option(SD_VULKAN "sd: vulkan backend" OFF) option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF) option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF) option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF) @@ -44,6 +45,12 @@ if(SD_METAL) add_definitions(-DSD_USE_METAL) endif() +if (SD_VULKAN) + message("Use Vulkan as backend stable-diffusion") + set(GGML_VULKAN ON) + add_definitions(-DSD_USE_VULKAN) +endif () + if (SD_HIPBLAS) message("Use HIPBLAS as backend stable-diffusion") set(GGML_HIPBLAS ON) diff --git a/ggml b/ggml index 9d562d7..5653a19 160000 --- a/ggml +++ b/ggml @@ -1 +1 @@ -Subproject commit 9d562d712513c77a4de44ad0428be62bc3f2a9cf +Subproject commit 5653a195935ea3ac54652644c9daf154dbc1571b diff --git a/ggml_extend.hpp b/ggml_extend.hpp index dbe9303..1236996 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -32,6 +32,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #include "rng.hpp" #include "util.h" @@ -588,7 +592,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx struct ggml_tensor* k, struct ggml_tensor* v, bool mask = false) { -#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) +#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN) struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head] #else float d_head = (float)q->ne[0]; diff --git a/model.cpp b/model.cpp index c4556a9..db8bae8 100644 --- a/model.cpp +++ b/model.cpp @@ -21,6 +21,10 @@ #include "ggml-metal.h" #endif +#ifdef SD_USE_VULKAN +#include "ggml-vulkan.h" +#endif + #define ST_HEADER_SIZE_LEN 8 uint64_t read_u64(uint8_t* buffer) { diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 8e439d2..3521e76 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -154,13 +154,17 @@ class StableDiffusionGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(); +#endif if (!backend) { LOG_DEBUG("Using CPU backend"); backend = ggml_backend_cpu_init(); } #ifdef SD_USE_FLASH_ATTENTION -#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) +#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_VULKAN) LOG_WARN("Flash Attention not supported with GPU Backend"); #else LOG_INFO("Flash Attention enabled"); diff --git a/upscaler.cpp b/upscaler.cpp index 0e3f95d..7623f9b 100644 --- a/upscaler.cpp +++ b/upscaler.cpp @@ -24,6 +24,10 @@ struct UpscalerGGML { ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr); backend = ggml_backend_metal_init(); #endif +#ifdef SD_USE_VULKAN + LOG_DEBUG("Using Vulkan backend"); + backend = ggml_backend_vk_init(0); +#endif if (!backend) { LOG_DEBUG("Using CPU backend");