-
-
Notifications
You must be signed in to change notification settings - Fork 22k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Metal: Improve startup times by using concurrent shader compilation APIs #96052
Merged
akien-mga
merged 1 commit into
godotengine:master
from
stuartcarnie:sgc/metal_shader_compilation
Aug 25, 2024
Merged
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,9 +50,12 @@ | |
|
||
#import "metal_objects.h" | ||
|
||
#import "metal_utils.h" | ||
#import "pixel_formats.h" | ||
#import "rendering_device_driver_metal.h" | ||
|
||
#import <os/signpost.h> | ||
|
||
void MDCommandBuffer::begin() { | ||
DEV_ASSERT(commandBuffer == nil); | ||
commandBuffer = queue.commandBuffer; | ||
|
@@ -850,7 +853,7 @@ | |
type = MDCommandBufferStateType::None; | ||
} | ||
|
||
MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, id<MTLLibrary> p_kernel) : | ||
MDComputeShader::MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *p_kernel) : | ||
MDShader(p_name, p_sets), kernel(p_kernel) { | ||
} | ||
|
||
|
@@ -868,7 +871,7 @@ | |
[enc setBytes:ptr length:length atIndex:push_constants.binding]; | ||
} | ||
|
||
MDRenderShader::MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, id<MTLLibrary> _Nonnull p_vert, id<MTLLibrary> _Nonnull p_frag) : | ||
MDRenderShader::MDRenderShader(CharString p_name, Vector<UniformSet> p_sets, MDLibrary *_Nonnull p_vert, MDLibrary *_Nonnull p_frag) : | ||
MDShader(p_name, p_sets), vert(p_vert), frag(p_frag) { | ||
} | ||
|
||
|
@@ -1378,3 +1381,204 @@ fragment ClearColorsOut fragClear(VaryingsPos varyings [[stage_in]], constant Cl | |
} | ||
return *val; | ||
} | ||
|
||
static const char *SHADER_STAGE_NAMES[] = { | ||
[RD::SHADER_STAGE_VERTEX] = "vert", | ||
[RD::SHADER_STAGE_FRAGMENT] = "frag", | ||
[RD::SHADER_STAGE_TESSELATION_CONTROL] = "tess_ctrl", | ||
[RD::SHADER_STAGE_TESSELATION_EVALUATION] = "tess_eval", | ||
[RD::SHADER_STAGE_COMPUTE] = "comp", | ||
}; | ||
|
||
void ShaderCacheEntry::notify_free() const { | ||
owner.shader_cache_free_entry(key); | ||
} | ||
|
||
@interface MDLibrary () | ||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry; | ||
- (ShaderCacheEntry *)entry; | ||
@end | ||
|
||
@interface MDLazyLibrary : MDLibrary { | ||
id<MTLLibrary> _library; | ||
NSError *_error; | ||
std::shared_mutex _mu; | ||
bool _loaded; | ||
id<MTLDevice> _device; | ||
NSString *_source; | ||
MTLCompileOptions *_options; | ||
} | ||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry | ||
device:(id<MTLDevice>)device | ||
source:(NSString *)source | ||
options:(MTLCompileOptions *)options; | ||
@end | ||
|
||
@interface MDImmediateLibrary : MDLibrary { | ||
id<MTLLibrary> _library; | ||
NSError *_error; | ||
std::mutex _cv_mutex; | ||
std::condition_variable _cv; | ||
std::atomic<bool> _complete; | ||
bool _ready; | ||
} | ||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry | ||
device:(id<MTLDevice>)device | ||
source:(NSString *)source | ||
options:(MTLCompileOptions *)options; | ||
@end | ||
|
||
@implementation MDLibrary { | ||
ShaderCacheEntry *_entry; | ||
} | ||
|
||
+ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry | ||
device:(id<MTLDevice>)device | ||
source:(NSString *)source | ||
options:(MTLCompileOptions *)options | ||
strategy:(ShaderLoadStrategy)strategy { | ||
switch (strategy) { | ||
case ShaderLoadStrategy::DEFAULT: | ||
[[fallthrough]]; | ||
default: | ||
return [[MDImmediateLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; | ||
case ShaderLoadStrategy::LAZY: | ||
return [[MDLazyLibrary alloc] initWithCacheEntry:entry device:device source:source options:options]; | ||
} | ||
} | ||
|
||
- (ShaderCacheEntry *)entry { | ||
return _entry; | ||
} | ||
|
||
- (id<MTLLibrary>)library { | ||
CRASH_NOW_MSG("Not implemented"); | ||
return nil; | ||
} | ||
|
||
- (NSError *)error { | ||
CRASH_NOW_MSG("Not implemented"); | ||
return nil; | ||
} | ||
Comment on lines
+1454
to
+1462
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are overridden in the subclasses, so these methods are unreachable. |
||
|
||
- (void)setLabel:(NSString *)label { | ||
} | ||
|
||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry { | ||
self = [super init]; | ||
_entry = entry; | ||
_entry->library = self; | ||
return self; | ||
} | ||
|
||
- (void)dealloc { | ||
_entry->notify_free(); | ||
} | ||
|
||
@end | ||
|
||
@implementation MDImmediateLibrary | ||
|
||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry | ||
device:(id<MTLDevice>)device | ||
source:(NSString *)source | ||
options:(MTLCompileOptions *)options { | ||
self = [super initWithCacheEntry:entry]; | ||
_complete = false; | ||
_ready = false; | ||
|
||
__block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; | ||
os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", | ||
"shader_name=%{public}s stage=%{public}s hash=%{public}s", | ||
entry->name.get_data(), SHADER_STAGE_NAMES[entry->stage], entry->short_sha.get_data()); | ||
|
||
[device newLibraryWithSource:source | ||
options:options | ||
completionHandler:^(id<MTLLibrary> library, NSError *error) { | ||
os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); | ||
self->_library = library; | ||
self->_error = error; | ||
if (error) { | ||
ERR_PRINT(String(U"Error compiling shader %s: %s").format(entry->name.get_data(), error.localizedDescription.UTF8String)); | ||
} | ||
|
||
{ | ||
std::lock_guard<std::mutex> lock(self->_cv_mutex); | ||
_ready = true; | ||
} | ||
_cv.notify_all(); | ||
_complete = true; | ||
}]; | ||
return self; | ||
} | ||
|
||
- (id<MTLLibrary>)library { | ||
if (!_complete) { | ||
std::unique_lock<std::mutex> lock(_cv_mutex); | ||
_cv.wait(lock, [&] { return _ready; }); | ||
} | ||
return _library; | ||
} | ||
|
||
- (NSError *)error { | ||
if (!_complete) { | ||
std::unique_lock<std::mutex> lock(_cv_mutex); | ||
_cv.wait(lock, [&] { return _ready; }); | ||
} | ||
return _error; | ||
} | ||
|
||
@end | ||
|
||
@implementation MDLazyLibrary | ||
- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry | ||
device:(id<MTLDevice>)device | ||
source:(NSString *)source | ||
options:(MTLCompileOptions *)options { | ||
self = [super initWithCacheEntry:entry]; | ||
_device = device; | ||
_source = source; | ||
_options = options; | ||
|
||
return self; | ||
} | ||
|
||
- (void)load { | ||
{ | ||
std::shared_lock<std::shared_mutex> lock(_mu); | ||
if (_loaded) { | ||
return; | ||
} | ||
} | ||
|
||
std::unique_lock<std::shared_mutex> lock(_mu); | ||
if (_loaded) { | ||
return; | ||
} | ||
|
||
ShaderCacheEntry *entry = [self entry]; | ||
|
||
__block os_signpost_id_t compile_id = (os_signpost_id_t)(uintptr_t)self; | ||
os_signpost_interval_begin(LOG_INTERVALS, compile_id, "shader_compile", | ||
"shader_name=%{public}s stage=%{public}s hash=%{public}s", | ||
entry->name.get_data(), SHADER_STAGE_NAMES[entry->stage], entry->short_sha.get_data()); | ||
NSError *error; | ||
_library = [_device newLibraryWithSource:_source options:_options error:&error]; | ||
os_signpost_interval_end(LOG_INTERVALS, compile_id, "shader_compile"); | ||
_device = nil; | ||
_source = nil; | ||
_options = nil; | ||
_loaded = true; | ||
} | ||
|
||
- (id<MTLLibrary>)library { | ||
[self load]; | ||
return _library; | ||
} | ||
|
||
- (NSError *)error { | ||
[self load]; | ||
return _error; | ||
} | ||
|
||
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
DEFAULT
uses the background compilation APIs, managed by the OS, to compile all shaders, which improves overall performance, since Godot schedules these requests serially. Only when a pipeline uses theMTLLibrary
, will Godot block to wait for the compilation to complete. Previously, Godot blocked when compiling the library, so it had to wait for over 1600 shaders to compile. With this change, only about 250 shaders block. 936 shaders are compiled, and the OS only allows a max of 3 concurrent requests, so the 250 shaders still have to wait for the others to compile, but the result is a vast improvement overall.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably worth mentioning this exact behavior is introduced by #90400 but at a general level for all APIs. The same strategy is adopted, where it'll spawn as much work as possible for the global task scheduler we have and wait on it until it is required if it needs to draw.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thats great!