1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
From 15553084e5d1a7d6abd3aac176bee95b0247c379 Mon Sep 17 00:00:00 2001
From: sohzm <sohxm7@gmail.com>
Date: Wed, 19 Jun 2024 04:08:18 +0530
Subject: [PATCH] wip
---
CMakeLists.txt | 7 +++++++
ggml | 2 +-
ggml_extend.hpp | 6 +++++-
model.cpp | 4 ++++
stable-diffusion.cpp | 6 +++++-
upscaler.cpp | 4 ++++
6 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 28a03fb..6b5679f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -27,6 +27,7 @@ option(SD_BUILD_EXAMPLES "sd: build examples" ${SD_STANDALONE})
option(SD_CUBLAS "sd: cuda backend" OFF)
option(SD_HIPBLAS "sd: rocm backend" OFF)
option(SD_METAL "sd: metal backend" OFF)
+option(SD_VULKAN "sd: vulkan backend" OFF)
option(SD_FLASH_ATTN "sd: use flash attention for x4 less memory usage" OFF)
option(SD_FAST_SOFTMAX "sd: x1.5 faster softmax, indeterministic (sometimes, same seed don't generate same image), cuda only" OFF)
option(SD_BUILD_SHARED_LIBS "sd: build shared libs" OFF)
@@ -44,6 +45,12 @@ if(SD_METAL)
add_definitions(-DSD_USE_METAL)
endif()
+if (SD_VULKAN)
+ message("Use Vulkan as backend stable-diffusion")
+ set(GGML_VULKAN ON)
+ add_definitions(-DSD_USE_VULKAN)
+endif ()
+
if (SD_HIPBLAS)
message("Use HIPBLAS as backend stable-diffusion")
set(GGML_HIPBLAS ON)
diff --git a/ggml b/ggml
index 9d562d7..5653a19 160000
--- a/ggml
+++ b/ggml
@@ -1 +1 @@
-Subproject commit 9d562d712513c77a4de44ad0428be62bc3f2a9cf
+Subproject commit 5653a195935ea3ac54652644c9daf154dbc1571b
diff --git a/ggml_extend.hpp b/ggml_extend.hpp
index dbe9303..1236996 100644
--- a/ggml_extend.hpp
+++ b/ggml_extend.hpp
@@ -32,6 +32,10 @@
#include "ggml-metal.h"
#endif
+#ifdef SD_USE_VULKAN
+#include "ggml-vulkan.h"
+#endif
+
#include "rng.hpp"
#include "util.h"
@@ -588,7 +592,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx
struct ggml_tensor* k,
struct ggml_tensor* v,
bool mask = false) {
-#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL)
+#if defined(SD_USE_FLASH_ATTENTION) && !defined(SD_USE_CUBLAS) && !defined(SD_USE_METAL) && !defined(SD_USE_VULKAN)
struct ggml_tensor* kqv = ggml_flash_attn(ctx, q, k, v, false); // [N * n_head, n_token, d_head]
#else
float d_head = (float)q->ne[0];
diff --git a/model.cpp b/model.cpp
index c4556a9..db8bae8 100644
--- a/model.cpp
+++ b/model.cpp
@@ -21,6 +21,10 @@
#include "ggml-metal.h"
#endif
+#ifdef SD_USE_VULKAN
+#include "ggml-vulkan.h"
+#endif
+
#define ST_HEADER_SIZE_LEN 8
uint64_t read_u64(uint8_t* buffer) {
diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp
index 8e439d2..3521e76 100644
--- a/stable-diffusion.cpp
+++ b/stable-diffusion.cpp
@@ -154,13 +154,17 @@ class StableDiffusionGGML {
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
backend = ggml_backend_metal_init();
#endif
+#ifdef SD_USE_VULKAN
+ LOG_DEBUG("Using Vulkan backend");
+ backend = ggml_backend_vk_init();
+#endif
if (!backend) {
LOG_DEBUG("Using CPU backend");
backend = ggml_backend_cpu_init();
}
#ifdef SD_USE_FLASH_ATTENTION
-#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL)
+#if defined(SD_USE_CUBLAS) || defined(SD_USE_METAL) || defined(SD_USE_VULKAN)
LOG_WARN("Flash Attention not supported with GPU Backend");
#else
LOG_INFO("Flash Attention enabled");
diff --git a/upscaler.cpp b/upscaler.cpp
index 0e3f95d..7623f9b 100644
--- a/upscaler.cpp
+++ b/upscaler.cpp
@@ -24,6 +24,10 @@ struct UpscalerGGML {
ggml_backend_metal_log_set_callback(ggml_log_callback_default, nullptr);
backend = ggml_backend_metal_init();
#endif
+#ifdef SD_USE_VULKAN
+ LOG_DEBUG("Using Vulkan backend");
+ backend = ggml_backend_vk_init(0);
+#endif
if (!backend) {
LOG_DEBUG("Using CPU backend");
|