From 3e1870d567c77fbb11fca3f8753d09dc67fbef30 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Tue, 22 Aug 2023 19:22:27 +0200 Subject: [PATCH] early-access version 3828 --- README.md | 2 +- src/android/app/build.gradle.kts | 5 + .../yuzu_emu/adapters/HomeSettingAdapter.kt | 24 +- .../fragments/HomeSettingsFragment.kt | 12 +- .../org/yuzu/yuzu_emu/model/HomeSetting.kt | 6 +- .../org/yuzu/yuzu_emu/model/HomeViewModel.kt | 19 ++ .../org/yuzu/yuzu_emu/ui/main/MainActivity.kt | 1 + .../src/main/res/layout/card_home_option.xml | 17 ++ src/core/crypto/key_manager.cpp | 208 +++++++++++------- src/core/crypto/key_manager.h | 54 +++-- src/core/file_sys/submission_package.cpp | 35 +-- src/core/file_sys/submission_package.h | 1 - src/core/hle/service/es/es.cpp | 10 +- src/video_core/host_shaders/CMakeLists.txt | 4 +- .../host_shaders/queries_prefix_scan_sum.comp | 166 ++++++++------ .../queries_prefix_scan_sum_nosubgroups.comp | 120 ++++++++++ .../renderer_vulkan/vk_compute_pass.cpp | 31 ++- .../renderer_vulkan/vk_compute_pass.h | 4 +- 18 files changed, 506 insertions(+), 213 deletions(-) create mode 100755 src/video_core/host_shaders/queries_prefix_scan_sum_nosubgroups.comp diff --git a/README.md b/README.md index 2557afd86..0b7dcf1c4 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3827. +This is the source code for early-access 3828. ## Legal Notice diff --git a/src/android/app/build.gradle.kts b/src/android/app/build.gradle.kts index 9a47e2bd8..a8db70511 100755 --- a/src/android/app/build.gradle.kts +++ b/src/android/app/build.gradle.kts @@ -160,6 +160,11 @@ android { } } +tasks.create("ktlintReset") { + delete(File(buildDir.path + File.separator + "intermediates/ktLint")) +} + +tasks.getByPath("loadKtlintReporters").dependsOn("ktlintReset") tasks.getByPath("preBuild").dependsOn("ktlintCheck") ktlint { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/HomeSettingAdapter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/HomeSettingAdapter.kt index aadc445f9..9f859b442 100755 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/HomeSettingAdapter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/adapters/HomeSettingAdapter.kt @@ -3,19 +3,25 @@ package org.yuzu.yuzu_emu.adapters +import android.text.TextUtils import android.view.LayoutInflater import android.view.View import android.view.ViewGroup import androidx.appcompat.app.AppCompatActivity import androidx.core.content.ContextCompat import androidx.core.content.res.ResourcesCompat +import androidx.lifecycle.LifecycleOwner import androidx.recyclerview.widget.RecyclerView import org.yuzu.yuzu_emu.R import org.yuzu.yuzu_emu.databinding.CardHomeOptionBinding import org.yuzu.yuzu_emu.fragments.MessageDialogFragment import org.yuzu.yuzu_emu.model.HomeSetting -class HomeSettingAdapter(private val activity: AppCompatActivity, var options: List) : +class HomeSettingAdapter( + private val activity: AppCompatActivity, + private val viewLifecycle: LifecycleOwner, + var options: List +) : RecyclerView.Adapter(), View.OnClickListener { override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): HomeOptionViewHolder { @@ -79,6 +85,22 @@ class HomeSettingAdapter(private val activity: AppCompatActivity, var options: L binding.optionDescription.alpha = 0.5f binding.optionIcon.alpha = 0.5f } + + option.details.observe(viewLifecycle) { updateOptionDetails(it) } + binding.optionDetail.postDelayed( + { + binding.optionDetail.ellipsize = TextUtils.TruncateAt.MARQUEE + binding.optionDetail.isSelected = true + }, + 3000 + ) + } + + private fun updateOptionDetails(detailString: String) { + if (detailString.isNotEmpty()) { + binding.optionDetail.text = detailString + binding.optionDetail.visibility = View.VISIBLE + } } } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt index c001af892..d5e793491 100755 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/HomeSettingsFragment.kt @@ -129,7 +129,11 @@ class HomeSettingsFragment : Fragment() { mainActivity.getGamesDirectory.launch( Intent(Intent.ACTION_OPEN_DOCUMENT_TREE).data ) - } + }, + { true }, + 0, + 0, + homeViewModel.gamesDir ) ) add( @@ -201,7 +205,11 @@ class HomeSettingsFragment : Fragment() { binding.homeSettingsList.apply { layoutManager = LinearLayoutManager(requireContext()) - adapter = HomeSettingAdapter(requireActivity() as AppCompatActivity, optionsList) + adapter = HomeSettingAdapter( + requireActivity() as AppCompatActivity, + viewLifecycleOwner, + optionsList + ) } setInsets() diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeSetting.kt index 522d07c37..498c222fa 100755 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeSetting.kt @@ -3,6 +3,9 @@ package org.yuzu.yuzu_emu.model +import androidx.lifecycle.LiveData +import androidx.lifecycle.MutableLiveData + data class HomeSetting( val titleId: Int, val descriptionId: Int, @@ -10,5 +13,6 @@ data class HomeSetting( val onClick: () -> Unit, val isEnabled: () -> Boolean = { true }, val disabledTitleId: Int = 0, - val disabledMessageId: Int = 0 + val disabledMessageId: Int = 0, + val details: LiveData = MutableLiveData("") ) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt index e13d84c9c..a48ef7a88 100755 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/model/HomeViewModel.kt @@ -3,9 +3,15 @@ package org.yuzu.yuzu_emu.model +import android.net.Uri +import androidx.fragment.app.FragmentActivity import androidx.lifecycle.LiveData import androidx.lifecycle.MutableLiveData import androidx.lifecycle.ViewModel +import androidx.lifecycle.ViewModelProvider +import androidx.preference.PreferenceManager +import org.yuzu.yuzu_emu.YuzuApplication +import org.yuzu.yuzu_emu.utils.GameHelper class HomeViewModel : ViewModel() { private val _navigationVisible = MutableLiveData>() @@ -17,6 +23,14 @@ class HomeViewModel : ViewModel() { private val _shouldPageForward = MutableLiveData(false) val shouldPageForward: LiveData get() = _shouldPageForward + private val _gamesDir = MutableLiveData( + Uri.parse( + PreferenceManager.getDefaultSharedPreferences(YuzuApplication.appContext) + .getString(GameHelper.KEY_GAME_PATH, "") + ).path ?: "" + ) + val gamesDir: LiveData get() = _gamesDir + var navigatedToSetup = false init { @@ -40,4 +54,9 @@ class HomeViewModel : ViewModel() { fun setShouldPageForward(pageForward: Boolean) { _shouldPageForward.value = pageForward } + + fun setGamesDir(activity: FragmentActivity, dir: String) { + ViewModelProvider(activity)[GamesViewModel::class.java].reloadGames(true) + _gamesDir.value = dir + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt index f77d06262..aaf3a0ec1 100755 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/ui/main/MainActivity.kt @@ -290,6 +290,7 @@ class MainActivity : AppCompatActivity(), ThemeProvider { ).show() gamesViewModel.reloadGames(true) + homeViewModel.setGamesDir(this, result.path!!) } val getProdKey = diff --git a/src/android/app/src/main/res/layout/card_home_option.xml b/src/android/app/src/main/res/layout/card_home_option.xml index dc289db17..f9f1d89fb 100755 --- a/src/android/app/src/main/res/layout/card_home_option.xml +++ b/src/android/app/src/main/res/layout/card_home_option.xml @@ -53,6 +53,23 @@ android:layout_marginTop="5dp" tools:text="@string/install_prod_keys_description" /> + + diff --git a/src/core/crypto/key_manager.cpp b/src/core/crypto/key_manager.cpp index 0f6ac1e6f..dcabcd080 100755 --- a/src/core/crypto/key_manager.cpp +++ b/src/core/crypto/key_manager.cpp @@ -35,7 +35,6 @@ namespace Core::Crypto { namespace { constexpr u64 CURRENT_CRYPTO_REVISION = 0x5; -constexpr u64 FULL_TICKET_SIZE = 0x400; using Common::AsArray; @@ -156,6 +155,10 @@ u64 GetSignatureTypePaddingSize(SignatureType type) { UNREACHABLE(); } +bool Ticket::IsValid() const { + return !std::holds_alternative(data); +} + SignatureType Ticket::GetSignatureType() const { if (const auto* ticket = std::get_if(&data)) { return ticket->sig_type; @@ -210,6 +213,54 @@ Ticket Ticket::SynthesizeCommon(Key128 title_key, const std::array& righ return Ticket{out}; } +Ticket Ticket::Read(const FileSys::VirtualFile& file) { + // Attempt to read up to the largest ticket size, and make sure we read at least a signature + // type. + std::array raw_data{}; + auto read_size = file->Read(raw_data.data(), raw_data.size(), 0); + if (read_size < sizeof(SignatureType)) { + LOG_WARNING(Crypto, "Attempted to read ticket file with invalid size {}.", read_size); + return Ticket{std::monostate()}; + } + return Read(std::span{raw_data}); +} + +Ticket Ticket::Read(std::span raw_data) { + // Some tools read only 0x180 bytes of ticket data instead of 0x2C0, so + // just make sure we have at least the bare minimum of data to work with. + SignatureType sig_type; + if (raw_data.size() < sizeof(SignatureType)) { + LOG_WARNING(Crypto, "Attempted to parse ticket buffer with invalid size {}.", + raw_data.size()); + return Ticket{std::monostate()}; + } + std::memcpy(&sig_type, raw_data.data(), sizeof(sig_type)); + + switch (sig_type) { + case SignatureType::RSA_4096_SHA1: + case SignatureType::RSA_4096_SHA256: { + RSA4096Ticket ticket{}; + std::memcpy(&ticket, raw_data.data(), sizeof(ticket)); + return Ticket{ticket}; + } + case SignatureType::RSA_2048_SHA1: + case SignatureType::RSA_2048_SHA256: { + RSA2048Ticket ticket{}; + std::memcpy(&ticket, raw_data.data(), sizeof(ticket)); + return Ticket{ticket}; + } + case SignatureType::ECDSA_SHA1: + case SignatureType::ECDSA_SHA256: { + ECDSATicket ticket{}; + std::memcpy(&ticket, raw_data.data(), sizeof(ticket)); + return Ticket{ticket}; + } + default: + LOG_WARNING(Crypto, "Attempted to parse ticket buffer with invalid type {}.", sig_type); + return Ticket{std::monostate()}; + } +} + Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed) { Key128 out{}; @@ -290,9 +341,9 @@ void KeyManager::DeriveGeneralPurposeKeys(std::size_t crypto_revision) { } } -RSAKeyPair<2048> KeyManager::GetETicketRSAKey() const { +void KeyManager::DeriveETicketRSAKey() { if (IsAllZeroArray(eticket_extended_kek) || !HasKey(S128KeyType::ETicketRSAKek)) { - return {}; + return; } const auto eticket_final = GetKey(S128KeyType::ETicketRSAKek); @@ -304,12 +355,12 @@ RSAKeyPair<2048> KeyManager::GetETicketRSAKey() const { rsa_1.Transcode(eticket_extended_kek.data() + 0x10, eticket_extended_kek.size() - 0x10, extended_dec.data(), Op::Decrypt); - RSAKeyPair<2048> rsa_key{}; - std::memcpy(rsa_key.decryption_key.data(), extended_dec.data(), rsa_key.decryption_key.size()); - std::memcpy(rsa_key.modulus.data(), extended_dec.data() + 0x100, rsa_key.modulus.size()); - std::memcpy(rsa_key.exponent.data(), extended_dec.data() + 0x200, rsa_key.exponent.size()); - - return rsa_key; + std::memcpy(eticket_rsa_keypair.decryption_key.data(), extended_dec.data(), + eticket_rsa_keypair.decryption_key.size()); + std::memcpy(eticket_rsa_keypair.modulus.data(), extended_dec.data() + 0x100, + eticket_rsa_keypair.modulus.size()); + std::memcpy(eticket_rsa_keypair.exponent.data(), extended_dec.data() + 0x200, + eticket_rsa_keypair.exponent.size()); } Key128 DeriveKeyblobMACKey(const Key128& keyblob_key, const Key128& mac_source) { @@ -447,10 +498,12 @@ std::vector GetTicketblob(const Common::FS::IOFile& ticket_save) { for (std::size_t offset = 0; offset + 0x4 < buffer.size(); ++offset) { if (buffer[offset] == 0x4 && buffer[offset + 1] == 0x0 && buffer[offset + 2] == 0x1 && buffer[offset + 3] == 0x0) { - out.emplace_back(); - auto& next = out.back(); - std::memcpy(&next, buffer.data() + offset, sizeof(Ticket)); - offset += FULL_TICKET_SIZE; + // NOTE: Assumes ticket blob will only contain RSA-2048 tickets. + auto ticket = Ticket::Read(std::span{buffer.data() + offset, sizeof(RSA2048Ticket)}); + offset += sizeof(RSA2048Ticket); + if (ticket.IsValid()) { + out.push_back(ticket); + } } } @@ -503,25 +556,35 @@ static std::optional FindTicketOffset(const std::array& data) { return offset; } -std::optional> ParseTicket(const Ticket& ticket, - const RSAKeyPair<2048>& key) { +std::optional KeyManager::ParseTicketTitleKey(const Ticket& ticket) { + if (eticket_rsa_keypair == RSAKeyPair<2048>{}) { + LOG_WARNING(Crypto, + "Skipping ticket title key parsing due to missing ETicket RSA key-pair."); + return std::nullopt; + } + + if (!ticket.IsValid()) { + LOG_WARNING(Crypto, "Attempted to parse title key of invalid ticket."); + return std::nullopt; + } + + if (ticket.GetData().rights_id == Key128{}) { + LOG_WARNING(Crypto, "Attempted to parse title key of ticket with no rights ID."); + return std::nullopt; + } + const auto issuer = ticket.GetData().issuer; if (IsAllZeroArray(issuer)) { + LOG_WARNING(Crypto, "Attempted to parse title key of ticket with invalid issuer."); return std::nullopt; } + if (issuer[0] != 'R' || issuer[1] != 'o' || issuer[2] != 'o' || issuer[3] != 't') { - LOG_INFO(Crypto, "Attempting to parse ticket with non-standard certificate authority."); + LOG_WARNING(Crypto, "Parsing ticket with non-standard certificate authority."); } - Key128 rights_id = ticket.GetData().rights_id; - - if (rights_id == Key128{}) { - return std::nullopt; - } - - if (!std::any_of(ticket.GetData().title_key_common_pad.begin(), - ticket.GetData().title_key_common_pad.end(), [](u8 b) { return b != 0; })) { - return std::make_pair(rights_id, ticket.GetData().title_key_common); + if (ticket.GetData().type == TitleKeyType::Common) { + return ticket.GetData().title_key_common; } mbedtls_mpi D; // RSA Private Exponent @@ -534,9 +597,12 @@ std::optional> ParseTicket(const Ticket& ticket, mbedtls_mpi_init(&S); mbedtls_mpi_init(&M); - mbedtls_mpi_read_binary(&D, key.decryption_key.data(), key.decryption_key.size()); - mbedtls_mpi_read_binary(&N, key.modulus.data(), key.modulus.size()); - mbedtls_mpi_read_binary(&S, ticket.GetData().title_key_block.data(), 0x100); + const auto& title_key_block = ticket.GetData().title_key_block; + mbedtls_mpi_read_binary(&D, eticket_rsa_keypair.decryption_key.data(), + eticket_rsa_keypair.decryption_key.size()); + mbedtls_mpi_read_binary(&N, eticket_rsa_keypair.modulus.data(), + eticket_rsa_keypair.modulus.size()); + mbedtls_mpi_read_binary(&S, title_key_block.data(), title_key_block.size()); mbedtls_mpi_exp_mod(&M, &S, &D, &N, nullptr); @@ -564,8 +630,7 @@ std::optional> ParseTicket(const Ticket& ticket, Key128 key_temp{}; std::memcpy(key_temp.data(), m_2.data() + *offset, key_temp.size()); - - return std::make_pair(rights_id, key_temp); + return key_temp; } KeyManager::KeyManager() { @@ -669,6 +734,14 @@ void KeyManager::LoadFromFile(const std::filesystem::path& file_path, bool is_ti encrypted_keyblobs[index] = Common::HexStringToArray<0xB0>(out[1]); } else if (out[0].compare(0, 20, "eticket_extended_kek") == 0) { eticket_extended_kek = Common::HexStringToArray<576>(out[1]); + } else if (out[0].compare(0, 19, "eticket_rsa_keypair") == 0) { + const auto key_data = Common::HexStringToArray<528>(out[1]); + std::memcpy(eticket_rsa_keypair.decryption_key.data(), key_data.data(), + eticket_rsa_keypair.decryption_key.size()); + std::memcpy(eticket_rsa_keypair.modulus.data(), key_data.data() + 0x100, + eticket_rsa_keypair.modulus.size()); + std::memcpy(eticket_rsa_keypair.exponent.data(), key_data.data() + 0x200, + eticket_rsa_keypair.exponent.size()); } else { for (const auto& kv : KEYS_VARIABLE_LENGTH) { if (!ValidCryptoRevisionString(out[0], kv.second.size(), 2)) { @@ -1110,13 +1183,12 @@ void KeyManager::DeriveETicket(PartitionDataManager& data, eticket_extended_kek = data.GetETicketExtendedKek(); WriteKeyToFile(KeyCategory::Console, "eticket_extended_kek", eticket_extended_kek); + DeriveETicketRSAKey(); PopulateTickets(); } void KeyManager::PopulateTickets() { - const auto rsa_key = GetETicketRSAKey(); - - if (rsa_key == RSAKeyPair<2048>{}) { + if (eticket_rsa_keypair == RSAKeyPair<2048>{}) { return; } @@ -1136,30 +1208,12 @@ void KeyManager::PopulateTickets() { const Common::FS::IOFile save_e2{system_save_e2_path, Common::FS::FileAccessMode::Read, Common::FS::FileType::BinaryFile}; + auto tickets = GetTicketblob(save_e1); const auto blob2 = GetTicketblob(save_e2); - auto res = GetTicketblob(save_e1); + tickets.insert(tickets.end(), blob2.begin(), blob2.end()); - const auto idx = res.size(); - res.insert(res.end(), blob2.begin(), blob2.end()); - - for (std::size_t i = 0; i < res.size(); ++i) { - const auto common = i < idx; - const auto pair = ParseTicket(res[i], rsa_key); - if (!pair) { - continue; - } - - const auto& [rid, key] = *pair; - u128 rights_id; - std::memcpy(rights_id.data(), rid.data(), rid.size()); - - if (common) { - common_tickets[rights_id] = res[i]; - } else { - personal_tickets[rights_id] = res[i]; - } - - SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); + for (const auto& ticket : tickets) { + AddTicket(ticket); } } @@ -1291,41 +1345,33 @@ const std::map& KeyManager::GetPersonalizedTickets() const { return personal_tickets; } -bool KeyManager::AddTicketCommon(Ticket raw) { - const auto rsa_key = GetETicketRSAKey(); - if (rsa_key == RSAKeyPair<2048>{}) { +bool KeyManager::AddTicket(const Ticket& ticket) { + if (!ticket.IsValid()) { + LOG_WARNING(Crypto, "Attempted to add invalid ticket."); return false; } - const auto pair = ParseTicket(raw, rsa_key); - if (!pair) { - return false; - } - - const auto& [rid, key] = *pair; + const auto& rid = ticket.GetData().rights_id; u128 rights_id; std::memcpy(rights_id.data(), rid.data(), rid.size()); - common_tickets[rights_id] = raw; - SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); - return true; -} - -bool KeyManager::AddTicketPersonalized(Ticket raw) { - const auto rsa_key = GetETicketRSAKey(); - if (rsa_key == RSAKeyPair<2048>{}) { - return false; + if (ticket.GetData().type == Core::Crypto::TitleKeyType::Common) { + common_tickets[rights_id] = ticket; + } else { + personal_tickets[rights_id] = ticket; } - const auto pair = ParseTicket(raw, rsa_key); - if (!pair) { - return false; + if (HasKey(S128KeyType::Titlekey, rights_id[1], rights_id[0])) { + LOG_DEBUG(Crypto, + "Skipping parsing title key from ticket for known rights ID {:016X}{:016X}.", + rights_id[1], rights_id[0]); + return true; } - const auto& [rid, key] = *pair; - u128 rights_id; - std::memcpy(rights_id.data(), rid.data(), rid.size()); - common_tickets[rights_id] = raw; - SetKey(S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); + const auto key = ParseTicketTitleKey(ticket); + if (!key) { + return false; + } + SetKey(S128KeyType::Titlekey, key.value(), rights_id[1], rights_id[0]); return true; } } // namespace Core::Crypto diff --git a/src/core/crypto/key_manager.h b/src/core/crypto/key_manager.h index e0c0332c8..e2c3c7ce2 100755 --- a/src/core/crypto/key_manager.h +++ b/src/core/crypto/key_manager.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -29,8 +30,6 @@ enum class ResultStatus : u16; namespace Core::Crypto { -constexpr u64 TICKET_FILE_TITLEKEY_OFFSET = 0x180; - using Key128 = std::array; using Key256 = std::array; using SHA256Hash = std::array; @@ -82,6 +81,7 @@ struct RSA4096Ticket { INSERT_PADDING_BYTES(0x3C); TicketData data; }; +static_assert(sizeof(RSA4096Ticket) == 0x500, "RSA4096Ticket has incorrect size."); struct RSA2048Ticket { SignatureType sig_type; @@ -89,6 +89,7 @@ struct RSA2048Ticket { INSERT_PADDING_BYTES(0x3C); TicketData data; }; +static_assert(sizeof(RSA2048Ticket) == 0x400, "RSA2048Ticket has incorrect size."); struct ECDSATicket { SignatureType sig_type; @@ -96,16 +97,41 @@ struct ECDSATicket { INSERT_PADDING_BYTES(0x40); TicketData data; }; +static_assert(sizeof(ECDSATicket) == 0x340, "ECDSATicket has incorrect size."); struct Ticket { - std::variant data; + std::variant data; - SignatureType GetSignatureType() const; - TicketData& GetData(); - const TicketData& GetData() const; - u64 GetSize() const; + [[nodiscard]] bool IsValid() const; + [[nodiscard]] SignatureType GetSignatureType() const; + [[nodiscard]] TicketData& GetData(); + [[nodiscard]] const TicketData& GetData() const; + [[nodiscard]] u64 GetSize() const; + /** + * Synthesizes a common ticket given a title key and rights ID. + * + * @param title_key Title key to store in the ticket. + * @param rights_id Rights ID the ticket is for. + * @return The synthesized common ticket. + */ static Ticket SynthesizeCommon(Key128 title_key, const std::array& rights_id); + + /** + * Reads a ticket from a file. + * + * @param file File to read the ticket from. + * @return The read ticket. If the ticket data is invalid, Ticket::IsValid() will be false. + */ + static Ticket Read(const FileSys::VirtualFile& file); + + /** + * Reads a ticket from a memory buffer. + * + * @param raw_data Buffer to read the ticket from. + * @return The read ticket. If the ticket data is invalid, Ticket::IsValid() will be false. + */ + static Ticket Read(std::span raw_data); }; static_assert(sizeof(Key128) == 16, "Key128 must be 128 bytes big."); @@ -264,8 +290,7 @@ public: const std::map& GetCommonTickets() const; const std::map& GetPersonalizedTickets() const; - bool AddTicketCommon(Ticket raw); - bool AddTicketPersonalized(Ticket raw); + bool AddTicket(const Ticket& ticket); void ReloadKeys(); bool AreKeysLoaded() const; @@ -283,6 +308,7 @@ private: std::array, 0x20> encrypted_keyblobs{}; std::array, 0x20> keyblobs{}; std::array eticket_extended_kek{}; + RSAKeyPair<2048> eticket_rsa_keypair{}; bool dev_mode; void LoadFromFile(const std::filesystem::path& file_path, bool is_title_keys); @@ -293,10 +319,13 @@ private: void DeriveGeneralPurposeKeys(std::size_t crypto_revision); - RSAKeyPair<2048> GetETicketRSAKey() const; + void DeriveETicketRSAKey(); void SetKeyWrapped(S128KeyType id, Key128 key, u64 field1 = 0, u64 field2 = 0); void SetKeyWrapped(S256KeyType id, Key256 key, u64 field1 = 0, u64 field2 = 0); + + /// Parses the title key section of a ticket. + std::optional ParseTicketTitleKey(const Ticket& ticket); }; Key128 GenerateKeyEncryptionKey(Key128 source, Key128 master, Key128 kek_seed, Key128 key_seed); @@ -311,9 +340,4 @@ Loader::ResultStatus DeriveSDKeys(std::array& sd_keys, KeyManager& ke std::vector GetTicketblob(const Common::FS::IOFile& ticket_save); -// Returns a pair of {rights_id, titlekey}. Fails if the ticket has no certificate authority -// (offset 0x140-0x144 is zero) -std::optional> ParseTicket(const Ticket& ticket, - const RSAKeyPair<2048>& eticket_extended_key); - } // namespace Core::Crypto diff --git a/src/core/file_sys/submission_package.cpp b/src/core/file_sys/submission_package.cpp index 831fdac53..d6d12ea03 100755 --- a/src/core/file_sys/submission_package.cpp +++ b/src/core/file_sys/submission_package.cpp @@ -164,24 +164,6 @@ VirtualFile NSP::GetNCAFile(u64 title_id, ContentRecordType type, TitleType titl return nullptr; } -std::vector NSP::GetTitlekey() const { - if (extracted) - LOG_WARNING(Service_FS, "called on an NSP that is of type extracted."); - std::vector out; - for (const auto& ticket_file : ticket_files) { - if (ticket_file == nullptr || - ticket_file->GetSize() < - Core::Crypto::TICKET_FILE_TITLEKEY_OFFSET + sizeof(Core::Crypto::Key128)) { - continue; - } - - out.emplace_back(); - ticket_file->Read(out.back().data(), out.back().size(), - Core::Crypto::TICKET_FILE_TITLEKEY_OFFSET); - } - return out; -} - std::vector NSP::GetFiles() const { return pfs->GetFiles(); } @@ -208,22 +190,11 @@ void NSP::SetTicketKeys(const std::vector& files) { continue; } - if (ticket_file->GetSize() < - Core::Crypto::TICKET_FILE_TITLEKEY_OFFSET + sizeof(Core::Crypto::Key128)) { + auto ticket = Core::Crypto::Ticket::Read(ticket_file); + if (!keys.AddTicket(ticket)) { + LOG_WARNING(Common_Filesystem, "Could not load NSP ticket {}", ticket_file->GetName()); continue; } - - Core::Crypto::Key128 key{}; - ticket_file->Read(key.data(), key.size(), Core::Crypto::TICKET_FILE_TITLEKEY_OFFSET); - - // We get the name without the extension in order to create the rights ID. - std::string name_only(ticket_file->GetName()); - name_only.erase(name_only.size() - 4); - - const auto rights_id_raw = Common::HexStringToArray<16>(name_only); - u128 rights_id; - std::memcpy(rights_id.data(), rights_id_raw.data(), sizeof(u128)); - keys.SetKey(Core::Crypto::S128KeyType::Titlekey, key, rights_id[1], rights_id[0]); } } diff --git a/src/core/file_sys/submission_package.h b/src/core/file_sys/submission_package.h index 4190edda2..e6031f924 100755 --- a/src/core/file_sys/submission_package.h +++ b/src/core/file_sys/submission_package.h @@ -53,7 +53,6 @@ public: TitleType title_type = TitleType::Application) const; VirtualFile GetNCAFile(u64 title_id, ContentRecordType type, TitleType title_type = TitleType::Application) const; - std::vector GetTitlekey() const; std::vector GetFiles() const override; diff --git a/src/core/hle/service/es/es.cpp b/src/core/hle/service/es/es.cpp index 4fcb3156c..529ce8890 100755 --- a/src/core/hle/service/es/es.cpp +++ b/src/core/hle/service/es/es.cpp @@ -122,20 +122,18 @@ private: } void ImportTicket(HLERequestContext& ctx) { - const auto ticket = ctx.ReadBuffer(); + const auto raw_ticket = ctx.ReadBuffer(); [[maybe_unused]] const auto cert = ctx.ReadBuffer(1); - if (ticket.size() < sizeof(Core::Crypto::Ticket)) { + if (raw_ticket.size() < sizeof(Core::Crypto::Ticket)) { LOG_ERROR(Service_ETicket, "The input buffer is not large enough!"); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERROR_INVALID_ARGUMENT); return; } - Core::Crypto::Ticket raw{}; - std::memcpy(&raw, ticket.data(), sizeof(Core::Crypto::Ticket)); - - if (!keys.AddTicketPersonalized(raw)) { + Core::Crypto::Ticket ticket = Core::Crypto::Ticket::Read(raw_ticket); + if (!keys.AddTicket(ticket)) { LOG_ERROR(Service_ETicket, "The ticket could not be imported!"); IPC::ResponseBuilder rb{ctx, 2}; rb.Push(ERROR_INVALID_ARGUMENT); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 4257f1186..a06cde47c 100755 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -42,6 +42,7 @@ set(SHADER_FILES present_bicubic.frag present_gaussian.frag queries_prefix_scan_sum.comp + queries_prefix_scan_sum_nosubgroups.comp resolve_conditional_render.comp smaa_edge_detection.vert smaa_edge_detection.frag @@ -72,6 +73,7 @@ if ("${GLSLANGVALIDATOR}" STREQUAL "GLSLANGVALIDATOR-NOTFOUND") endif() set(GLSL_FLAGS "") +set(SPIR_V_VERSION "spirv1.3") set(QUIET_FLAG "--quiet") set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) @@ -125,7 +127,7 @@ foreach(FILENAME IN ITEMS ${SHADER_FILES}) OUTPUT ${SPIRV_HEADER_FILE} COMMAND - ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} + ${GLSLANGVALIDATOR} -V ${QUIET_FLAG} -I"${FIDELITYFX_INCLUDE_DIR}" ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE} --target-env ${SPIR_V_VERSION} MAIN_DEPENDENCY ${SOURCE_FILE} ) diff --git a/src/video_core/host_shaders/queries_prefix_scan_sum.comp b/src/video_core/host_shaders/queries_prefix_scan_sum.comp index dce1279fe..8f10e248e 100755 --- a/src/video_core/host_shaders/queries_prefix_scan_sum.comp +++ b/src/video_core/host_shaders/queries_prefix_scan_sum.comp @@ -1,26 +1,24 @@ -// SPDX-FileCopyrightText: Copyright 2015 Graham Sellers, Richard Wright Jr. and Nicholas Haemel -// SPDX-License-Identifier: MIT - -// Code obtained from OpenGL SuperBible, Seventh Edition by Graham Sellers, Richard Wright Jr. and -// Nicholas Haemel. Modified to suit needs and optimize for subgroup +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later #version 460 core +#extension GL_KHR_shader_subgroup_basic : require +#extension GL_KHR_shader_subgroup_shuffle : require +#extension GL_KHR_shader_subgroup_shuffle_relative : require +#extension GL_KHR_shader_subgroup_arithmetic : require + #ifdef VULKAN -#extension GL_KHR_shader_subgroup_arithmetic : enable #define HAS_EXTENDED_TYPES 1 #define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { -#define END_PUSH_CONSTANTS \ - } \ - ; +#define END_PUSH_CONSTANTS }; #define UNIFORM(n) #define BINDING_INPUT_BUFFER 0 #define BINDING_OUTPUT_IMAGE 1 #else // ^^^ Vulkan ^^^ // vvv OpenGL vvv -#extension GL_KHR_shader_subgroup_arithmetic : enable #extension GL_NV_gpu_shader5 : enable #ifdef GL_NV_gpu_shader5 #define HAS_EXTENDED_TYPES 1 @@ -43,19 +41,20 @@ END_PUSH_CONSTANTS layout(local_size_x = 32) in; layout(std430, binding = 0) readonly buffer block1 { - uvec2 input_data[gl_WorkGroupSize.x]; + uvec2 input_data[]; }; -layout(std430, binding = 1) writeonly coherent buffer block2 { - uvec2 output_data[gl_WorkGroupSize.x]; +layout(std430, binding = 1) coherent buffer block2 { + uvec2 output_data[]; }; layout(std430, binding = 2) coherent buffer block3 { uvec2 accumulated_data; }; -shared uvec2 shared_data[gl_WorkGroupSize.x * 2]; +shared uvec2 shared_data[2]; +// Simple Uint64 add that uses 2 uint variables for GPUs that don't support uint64 uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { uint carry = 0; uvec2 result; @@ -64,61 +63,102 @@ uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { return result; } -void main(void) { - uint id = gl_LocalInvocationID.x; - uvec2 base_value_1 = (id * 2) < max_accumulation_base ? accumulated_data : uvec2(0); - uvec2 base_value_2 = (id * 2 + 1) < max_accumulation_base ? accumulated_data : uvec2(0); - uint work_size = gl_WorkGroupSize.x; - uint rd_id; - uint wr_id; - uint mask; - uvec2 input_1 = input_data[id * 2]; - uvec2 input_2 = input_data[id * 2 + 1]; - // The number of steps is the log base 2 of the - // work group size, which should be a power of 2 - const uint steps = uint(log2(work_size)) + 1; - uint step = 0; +// do subgroup Prefix Sum using Hillis and Steele's algorithm +uvec2 subgroupInclusiveAddUint64(uvec2 value) { + uvec2 result = value; + for (uint i = 1; i < gl_SubgroupSize; i *= 2) { + if (i <= gl_SubgroupInvocationID) { + uvec2 other = subgroupShuffleUp(result, i); // get value from subgroup_inv_id - i; + result = AddUint64(result, other); + } + } + return result; +} - // Each invocation is responsible for the content of - // two elements of the output array - shared_data[id * 2] = input_1; - shared_data[id * 2 + 1] = input_2; - // Synchronize to make sure that everyone has initialized - // their elements of shared_data[] with data loaded from - // the input arrays +// Writes down the results to the output buffer and to the accumulation buffer +void WriteResults(uvec2 result) { + uint current_global_id = gl_GlobalInvocationID.x; + uvec2 base_data = current_global_id < max_accumulation_base ? accumulated_data : uvec2(0); + output_data[current_global_id] = result + base_data; + if (max_accumulation_base >= accumulation_limit + 1) { + if (current_global_id == accumulation_limit) { + accumulated_data = result; + } + return; + } + // We have that ugly case in which the accumulation data is reset in the middle somewhere. + barrier(); + groupMemoryBarrier(); + if (current_global_id == accumulation_limit) { + uvec2 value_1 = output_data[max_accumulation_base]; + accumulated_data = AddUint64(result, -value_1); + } +} + +void main() { + uint subgroup_inv_id = gl_SubgroupInvocationID; + uint subgroup_id = gl_SubgroupID; + uint last_subgroup_id = subgroupMax(subgroup_inv_id); + uint current_global_id = gl_GlobalInvocationID.x; + uint total_work = gl_NumWorkGroups.x * gl_WorkGroupSize.x; + uvec2 data = input_data[current_global_id]; + // make sure all input data has been loaded + subgroupBarrier(); + subgroupMemoryBarrier(); + + uvec2 result = subgroupInclusiveAddUint64(data); + + // if we had less queries than our subgroup, just write down the results. + if (total_work <= gl_SubgroupSize) { // This condition is constant per dispatch. + WriteResults(result); + return; + } + + // We now have more, so lets write the last result into shared memory. + // Only pick the last subgroup. + if (subgroup_inv_id == last_subgroup_id) { + shared_data[subgroup_id] = result; + } + // wait until everyone loaded their stuffs barrier(); memoryBarrierShared(); - // For each step... - for (step = 0; step < steps; step++) { - // Calculate the read and write index in the - // shared array - mask = (1 << step) - 1; - rd_id = ((id >> step) << (step + 1)) + mask; - wr_id = rd_id + 1 + (id & mask); - // Accumulate the read data into our element - shared_data[wr_id] = AddUint64(shared_data[rd_id], shared_data[wr_id]); - // Synchronize again to make sure that everyone - // has caught up with us + // Case 1: the total work for the grouped results can be calculated in a single subgroup + // operation (about 1024 queries). + uint total_extra_work = gl_NumSubgroups * gl_NumWorkGroups.x; + if (total_extra_work <= gl_SubgroupSize) { // This condition is constant per dispatch. + if (subgroup_id != 0) { + uvec2 tmp = shared_data[subgroup_inv_id]; + subgroupBarrier(); + subgroupMemoryBarrierShared(); + tmp = subgroupInclusiveAddUint64(tmp); + result = AddUint64(result, subgroupShuffle(tmp, subgroup_id - 1)); + } + + WriteResults(result); + return; + } + + // Case 2: our work amount is huge, so lets do it in O(log n) steps. + const uint extra = (total_extra_work ^ (total_extra_work - 1)) != 0 ? 1 : 0; + const uint steps = 1 << (findMSB(total_extra_work) + extra); + uint step; + // Hillis and Steele's algorithm + for (step = 1; step < steps; step *= 2) { + if (current_global_id < steps && current_global_id >= step) { + uvec2 current = shared_data[current_global_id]; + uvec2 other = shared_data[current_global_id - step]; + shared_data[current_global_id] = AddUint64(current, other); + } + // steps is constant, so this will always execute in ever workgroup's thread. barrier(); memoryBarrierShared(); } - // Add the accumulation - shared_data[id * 2] = AddUint64(shared_data[id * 2], base_value_1); - shared_data[id * 2 + 1] = AddUint64(shared_data[id * 2 + 1], base_value_2); - barrier(); - memoryBarrierShared(); - - // Finally write our data back to the output buffer - output_data[id * 2] = shared_data[id * 2]; - output_data[id * 2 + 1] = shared_data[id * 2 + 1]; - if (id == 0) { - if (max_accumulation_base >= accumulation_limit + 1) { - accumulated_data = shared_data[accumulation_limit]; - return; - } - uvec2 value_1 = shared_data[max_accumulation_base]; - uvec2 value_2 = shared_data[accumulation_limit]; - accumulated_data = AddUint64(value_1, -value_2); + // Only add results for groups higher than 0 + if (subgroup_id != 0) { + result = AddUint64(result, shared_data[subgroup_id - 1]); } + + // Just write the final results. We are done + WriteResults(result); } \ No newline at end of file diff --git a/src/video_core/host_shaders/queries_prefix_scan_sum_nosubgroups.comp b/src/video_core/host_shaders/queries_prefix_scan_sum_nosubgroups.comp new file mode 100755 index 000000000..8021476ed --- /dev/null +++ b/src/video_core/host_shaders/queries_prefix_scan_sum_nosubgroups.comp @@ -0,0 +1,120 @@ +// SPDX-FileCopyrightText: Copyright 2015 Graham Sellers, Richard Wright Jr. and Nicholas Haemel +// SPDX-License-Identifier: MIT + +// Code obtained from OpenGL SuperBible, Seventh Edition by Graham Sellers, Richard Wright Jr. and +// Nicholas Haemel. Modified to suit needs. + +#version 460 core + +#ifdef VULKAN + +#define HAS_EXTENDED_TYPES 1 +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 1 + +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv + +#extension GL_NV_gpu_shader5 : enable +#ifdef GL_NV_gpu_shader5 +#define HAS_EXTENDED_TYPES 1 +#else +#define HAS_EXTENDED_TYPES 0 +#endif +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout(location = n) uniform +#define BINDING_INPUT_BUFFER 0 +#define BINDING_OUTPUT_IMAGE 0 + +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) uint max_accumulation_base; +UNIFORM(1) uint accumulation_limit; +END_PUSH_CONSTANTS + +layout(local_size_x = 32) in; + +layout(std430, binding = 0) readonly buffer block1 { + uvec2 input_data[gl_WorkGroupSize.x]; +}; + +layout(std430, binding = 1) writeonly coherent buffer block2 { + uvec2 output_data[gl_WorkGroupSize.x]; +}; + +layout(std430, binding = 2) coherent buffer block3 { + uvec2 accumulated_data; +}; + +shared uvec2 shared_data[gl_WorkGroupSize.x * 2]; + +uvec2 AddUint64(uvec2 value_1, uvec2 value_2) { + uint carry = 0; + uvec2 result; + result.x = uaddCarry(value_1.x, value_2.x, carry); + result.y = value_1.y + value_2.y + carry; + return result; +} + +void main(void) { + uint id = gl_LocalInvocationID.x; + uvec2 base_value_1 = (id * 2) < max_accumulation_base ? accumulated_data : uvec2(0); + uvec2 base_value_2 = (id * 2 + 1) < max_accumulation_base ? accumulated_data : uvec2(0); + uint work_size = gl_WorkGroupSize.x; + uint rd_id; + uint wr_id; + uint mask; + uvec2 input_1 = input_data[id * 2]; + uvec2 input_2 = input_data[id * 2 + 1]; + // The number of steps is the log base 2 of the + // work group size, which should be a power of 2 + const uint steps = uint(log2(work_size)) + 1; + uint step = 0; + + // Each invocation is responsible for the content of + // two elements of the output array + shared_data[id * 2] = input_1; + shared_data[id * 2 + 1] = input_2; + // Synchronize to make sure that everyone has initialized + // their elements of shared_data[] with data loaded from + // the input arrays + barrier(); + memoryBarrierShared(); + // For each step... + for (step = 0; step < steps; step++) { + // Calculate the read and write index in the + // shared array + mask = (1 << step) - 1; + rd_id = ((id >> step) << (step + 1)) + mask; + wr_id = rd_id + 1 + (id & mask); + // Accumulate the read data into our element + + shared_data[wr_id] = AddUint64(shared_data[rd_id], shared_data[wr_id]); + // Synchronize again to make sure that everyone + // has caught up with us + barrier(); + memoryBarrierShared(); + } + // Add the accumulation + shared_data[id * 2] = AddUint64(shared_data[id * 2], base_value_1); + shared_data[id * 2 + 1] = AddUint64(shared_data[id * 2 + 1], base_value_2); + barrier(); + memoryBarrierShared(); + + // Finally write our data back to the output buffer + output_data[id * 2] = shared_data[id * 2]; + output_data[id * 2 + 1] = shared_data[id * 2 + 1]; + if (id == 0) { + if (max_accumulation_base >= accumulation_limit + 1) { + accumulated_data = shared_data[accumulation_limit]; + return; + } + uvec2 value_1 = shared_data[max_accumulation_base]; + uvec2 value_2 = shared_data[accumulation_limit]; + accumulated_data = AddUint64(value_1, -value_2); + } +} \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index eeef7297d..381440c12 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -13,6 +13,7 @@ #include "common/div_ceil.h" #include "video_core/host_shaders/astc_decoder_comp_spv.h" #include "video_core/host_shaders/queries_prefix_scan_sum_comp_spv.h" +#include "video_core/host_shaders/queries_prefix_scan_sum_nosubgroups_comp_spv.h" #include "video_core/host_shaders/resolve_conditional_render_comp_spv.h" #include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h" #include "video_core/host_shaders/vulkan_uint8_comp_spv.h" @@ -187,7 +188,8 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code) + vk::Span push_constants, std::span code, + std::optional optional_subgroup_size) : device{device_} { descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, @@ -228,13 +230,19 @@ ComputePass::ComputePass(const Device& device_, DescriptorPool& descriptor_pool, .pCode = code.data(), }); device.SaveShader(code); + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = optional_subgroup_size ? *optional_subgroup_size : 32U, + }; + bool use_setup_size = device.IsExtSubgroupSizeControlSupported() && optional_subgroup_size; pipeline = device.GetLogical().CreateComputePipeline({ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .pNext = nullptr, .flags = 0, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, + .pNext = use_setup_size ? &subgroup_size_ci : nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *module, @@ -374,7 +382,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ static constexpr VkMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_NONE, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT | VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, }; static constexpr VkMemoryBarrier write_barrier{ @@ -399,10 +407,17 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_ QueriesPrefixScanPass::QueriesPrefixScanPass( const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) - : ComputePass(device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, - QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, - COMPUTE_PUSH_CONSTANT_RANGE, - QUERIES_PREFIX_SCAN_SUM_COMP_SPV), + : ComputePass( + device_, descriptor_pool_, QUERIES_SCAN_DESCRIPTOR_SET_BINDINGS, + QUERIES_SCAN_DESCRIPTOR_UPDATE_TEMPLATE, QUERIES_SCAN_BANK_INFO, + COMPUTE_PUSH_CONSTANT_RANGE, + device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_BASIC_BIT) && + device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) && + device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_BIT) && + device_.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) + ? std::span(QUERIES_PREFIX_SCAN_SUM_COMP_SPV) + : std::span(QUERIES_PREFIX_SCAN_SUM_NOSUBGROUPS_COMP_SPV), + {32}), scheduler{scheduler_}, compute_pass_descriptor_queue{compute_pass_descriptor_queue_} {} void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffer, @@ -422,7 +437,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe static constexpr VkMemoryBarrier read_barrier{ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, .pNext = nullptr, - .srcAccessMask = VK_ACCESS_NONE, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, }; static constexpr VkMemoryBarrier write_barrier{ diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 956d73ac1..95df5f894 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include @@ -31,7 +32,8 @@ public: vk::Span bindings, vk::Span templates, const DescriptorBankInfo& bank_info, - vk::Span push_constants, std::span code); + vk::Span push_constants, std::span code, + std::optional optional_subgroup_size = std::nullopt); ~ComputePass(); protected: