diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 6644784729..6abdc1e1bd 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -10,6 +10,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { CPU_DEBUG_MODE("cpu_debug_mode"), FASTMEM("cpuopt_fastmem"), FASTMEM_EXCLUSIVES("cpuopt_fastmem_exclusives"), + CORE_SYNC_CORE_SPEED("sync_core_speed"), RENDERER_USE_SPEED_LIMIT("use_speed_limit"), USE_DOCKED_MODE("use_docked_mode"), RENDERER_USE_DISK_SHADER_CACHE("use_disk_shader_cache"), @@ -17,6 +18,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { RENDERER_ASYNCHRONOUS_SHADERS("use_asynchronous_shaders"), RENDERER_REACTIVE_FLUSHING("use_reactive_flushing"), RENDERER_DEBUG("debug"), + RENDERER_ENHANCED_SHADER_BUILDING("use_enhanced_shader_building"), PICTURE_IN_PICTURE("picture_in_picture"), USE_CUSTOM_RTC("custom_rtc_enabled"), BLACK_BACKGROUNDS("black_backgrounds"), @@ -26,7 +28,19 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { SHOW_PERFORMANCE_OVERLAY("show_performance_overlay"), SHOW_INPUT_OVERLAY("show_input_overlay"), TOUCHSCREEN("touchscreen"), - SHOW_THERMAL_OVERLAY("show_thermal_overlay"); + SHOW_THERMAL_OVERLAY("show_thermal_overlay"), + FRAME_INTERPOLATION("frame_interpolation"), + FRAME_SKIPPING("frame_skipping"), + SHOW_FPS("show_fps"), + SHOW_FRAMETIME("show_frame_time"), + SHOW_SPEED("show_speed"), + SHOW_APP_RAM_USAGE("show_app_ram_usage"), + SHOW_SYSTEM_RAM_USAGE("show_system_ram_usage"), + SHOW_BAT_TEMPERATURE("show_bat_temperature"), + OVERLAY_BACKGROUND("overlay_background"),; + external fun isFrameSkippingEnabled(): Boolean + external fun isFrameInterpolationEnabled(): Boolean + override fun getBoolean(needsGlobal: Boolean): Boolean = NativeConfig.getBoolean(key, needsGlobal) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt index 0165cb2d1d..035a33a762 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/IntSetting.kt @@ -11,6 +11,11 @@ enum class IntSetting(override val key: String) : AbstractIntSetting { REGION_INDEX("region_index"), LANGUAGE_INDEX("language_index"), RENDERER_BACKEND("backend"), + RENDERER_VRAM_USAGE_MODE("vram_usage_mode"), + RENDERER_SHADER_BACKEND("shader_backend"), + RENDERER_NVDEC_EMULATION("nvdec_emulation"), + RENDERER_ASTC_DECODE_METHOD("accelerate_astc"), + RENDERER_ASTC_RECOMPRESSION("astc_recompression"), RENDERER_ACCURACY("gpu_accuracy"), RENDERER_RESOLUTION("resolution_setup"), RENDERER_VSYNC("use_vsync"), @@ -18,6 +23,7 @@ enum class IntSetting(override val key: String) : AbstractIntSetting { RENDERER_ANTI_ALIASING("anti_aliasing"), RENDERER_SCREEN_LAYOUT("screen_layout"), RENDERER_ASPECT_RATIO("aspect_ratio"), + RENDERER_OPTIMIZE_SPIRV_OUTPUT("optimize_spirv_output"), AUDIO_OUTPUT_ENGINE("output_engine"), MAX_ANISOTROPY("max_anisotropy"), THEME("theme"), @@ -26,6 +32,7 @@ enum class IntSetting(override val key: String) : AbstractIntSetting { OVERLAY_OPACITY("control_opacity"), LOCK_DRAWER("lock_drawer"), VERTICAL_ALIGNMENT("vertical_alignment"), + PERF_OVERLAY_POSITION("perf_overlay_position"), FSR_SHARPENING_SLIDER("fsr_sharpening_slider"); override fun getInt(needsGlobal: Boolean): Int = NativeConfig.getInt(key, needsGlobal) diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt index e189c21560..299a192a13 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/Settings.kt @@ -12,6 +12,7 @@ object Settings { SECTION_ROOT(R.string.advanced_settings), SECTION_SYSTEM(R.string.preferences_system), SECTION_RENDERER(R.string.preferences_graphics), + SECTION_PERFORMANCE_STATS(R.string.show_stats_overlay), SECTION_AUDIO(R.string.preferences_audio), SECTION_INPUT(R.string.preferences_controls), SECTION_INPUT_PLAYER_ONE, @@ -23,7 +24,8 @@ object Settings { SECTION_INPUT_PLAYER_SEVEN, SECTION_INPUT_PLAYER_EIGHT, SECTION_THEME(R.string.preferences_theme), - SECTION_DEBUG(R.string.preferences_debug); + SECTION_DEBUG(R.string.preferences_debug), + SECTION_EDEN_VEIL(R.string.eden_veil); } fun getPlayerString(player: Int): String = @@ -32,6 +34,7 @@ object Settings { const val PREF_FIRST_APP_LAUNCH = "FirstApplicationLaunch" const val PREF_SHOULD_SHOW_PRE_ALPHA_WARNING = "ShouldShowPreAlphaWarning" const val PREF_MEMORY_WARNING_SHOWN = "MemoryWarningShown" + const val SECTION_STATS_OVERLAY = "Stats Overlay" // Deprecated input overlay preference keys const val PREF_CONTROL_SCALE = "controlScale" @@ -120,4 +123,15 @@ object Settings { entries.firstOrNull { it.int == int } ?: Center } } + + enum class OptimizeSpirvOutput(val int: Int) { + Never(0), + OnLoad(1), + Always(2); + + companion object { + fun from(int: Int): OptimizeSpirvOutput = + entries.firstOrNull { it.int == int } ?: OnLoad + } + } } diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index 5fdf983185..8fa90bf05f 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -172,6 +172,75 @@ abstract class SettingsItem( override fun reset() = BooleanSetting.USE_DOCKED_MODE.reset() } + val enableInterpolationSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.FRAME_INTERPOLATION.key + + override fun getBoolean(needsGlobal: Boolean): Boolean = + BooleanSetting.FRAME_INTERPOLATION.getBoolean(needsGlobal) + + override fun setBoolean(value: Boolean) = + BooleanSetting.FRAME_INTERPOLATION.setBoolean(value) + + override val defaultValue = BooleanSetting.FRAME_INTERPOLATION.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.FRAME_INTERPOLATION.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.FRAME_INTERPOLATION.reset() + } + + val enableFrameSkippingSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.FRAME_SKIPPING.key + + override fun getBoolean(needsGlobal: Boolean): Boolean = + BooleanSetting.FRAME_SKIPPING.getBoolean(needsGlobal) + + override fun setBoolean(value: Boolean) = + BooleanSetting.FRAME_SKIPPING.setBoolean(value) + + override val defaultValue = BooleanSetting.FRAME_SKIPPING.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.FRAME_SKIPPING.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.FRAME_SKIPPING.reset() + } + + val syncCoreSpeedSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.CORE_SYNC_CORE_SPEED.key + + override fun getBoolean(needsGlobal: Boolean): Boolean { + return BooleanSetting.CORE_SYNC_CORE_SPEED.getBoolean(needsGlobal) + } + + override fun setBoolean(value: Boolean) { + BooleanSetting.CORE_SYNC_CORE_SPEED.setBoolean(value) + } + + override val defaultValue = BooleanSetting.CORE_SYNC_CORE_SPEED.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.CORE_SYNC_CORE_SPEED.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.CORE_SYNC_CORE_SPEED.reset() + } + + put( + SwitchSetting( + BooleanSetting.FRAME_INTERPOLATION, + titleId = R.string.frame_interpolation, + descriptionId = R.string.frame_interpolation_description + ) + ) + + put( + SwitchSetting( + BooleanSetting.FRAME_SKIPPING, + titleId = R.string.frame_skipping, + descriptionId = R.string.frame_skipping_description + ) + ) + put( SwitchSetting( dockedModeSetting, @@ -180,6 +249,14 @@ abstract class SettingsItem( ) ) + put( + SwitchSetting( + syncCoreSpeedSetting, + titleId = R.string.use_sync_core, + descriptionId = R.string.use_sync_core_description + ) + ) + put( SingleChoiceSetting( IntSetting.REGION_INDEX, @@ -212,6 +289,46 @@ abstract class SettingsItem( valuesId = R.array.rendererAccuracyValues ) ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_SHADER_BACKEND, + titleId = R.string.shader_backend, + choicesId = R.array.rendererShaderNames, + valuesId = R.array.rendererShaderValues + ) + ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_NVDEC_EMULATION, + titleId = R.string.nvdec_emulation, + choicesId = R.array.rendererNvdecNames, + valuesId = R.array.rendererNvdecValues + ) + ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_ASTC_DECODE_METHOD, + titleId = R.string.accelerate_astc, + choicesId = R.array.astcDecodingMethodNames, + valuesId = R.array.astcDecodingMethodValues + ) + ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_ASTC_RECOMPRESSION, + titleId = R.string.astc_recompression, + choicesId = R.array.astcRecompressionMethodNames, + valuesId = R.array.astcRecompressionMethodValues + ) + ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_VRAM_USAGE_MODE, + titleId = R.string.vram_usage_mode, + choicesId = R.array.vramUsageMethodNames, + valuesId = R.array.vramUsageMethodValues + ) + ) put( SingleChoiceSetting( IntSetting.RENDERER_RESOLUTION, @@ -220,6 +337,71 @@ abstract class SettingsItem( valuesId = R.array.rendererResolutionValues ) ) + put( + SwitchSetting( + BooleanSetting.SHOW_PERFORMANCE_OVERLAY, + R.string.enable_stats_overlay_, + descriptionId = R.string.stats_overlay_options_description + ) + ) + put( + SwitchSetting( + BooleanSetting.OVERLAY_BACKGROUND, + R.string.overlay_background, + descriptionId = R.string.overlay_background_description + ) + ) + put( + SingleChoiceSetting( + IntSetting.PERF_OVERLAY_POSITION, + titleId = R.string.overlay_position, + descriptionId = R.string.overlay_position_description, + choicesId = R.array.statsPosition, + valuesId = R.array.staticThemeValues + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_FPS, + R.string.show_fps, + descriptionId = R.string.show_fps_description + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_FRAMETIME, + R.string.show_frametime, + descriptionId = R.string.show_frametime_description + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_SPEED, + R.string.show_speed, + descriptionId = R.string.show_speed_description + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_APP_RAM_USAGE, + R.string.show_app_ram_usage, + descriptionId = R.string.show_app_ram_usage_description + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_SYSTEM_RAM_USAGE, + R.string.show_system_ram_usage, + descriptionId = R.string.show_system_ram_usage_description + ) + ) + put( + SwitchSetting( + BooleanSetting.SHOW_BAT_TEMPERATURE, + R.string.show_bat_temperature, + descriptionId = R.string.show_bat_temperature_description + ) + ) put( SingleChoiceSetting( IntSetting.RENDERER_VSYNC, @@ -291,6 +473,15 @@ abstract class SettingsItem( descriptionId = R.string.renderer_force_max_clock_description ) ) + put( + SingleChoiceSetting( + IntSetting.RENDERER_OPTIMIZE_SPIRV_OUTPUT, + titleId = R.string.renderer_optimize_spirv_output, + descriptionId = 0, + choicesId = R.array.optimizeSpirvOutputEntries, + valuesId = R.array.optimizeSpirvOutputValues + ) + ) put( SwitchSetting( BooleanSetting.RENDERER_ASYNCHRONOUS_SHADERS, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 7fa22b272f..971324683c 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -88,6 +88,7 @@ class SettingsFragmentPresenter( MenuTag.SECTION_ROOT -> addConfigSettings(sl) MenuTag.SECTION_SYSTEM -> addSystemSettings(sl) MenuTag.SECTION_RENDERER -> addGraphicsSettings(sl) + MenuTag.SECTION_PERFORMANCE_STATS -> addPerfomanceOverlaySettings(sl) MenuTag.SECTION_AUDIO -> addAudioSettings(sl) MenuTag.SECTION_INPUT -> addInputSettings(sl) MenuTag.SECTION_INPUT_PLAYER_ONE -> addInputPlayer(sl, 0) @@ -100,6 +101,7 @@ class SettingsFragmentPresenter( MenuTag.SECTION_INPUT_PLAYER_EIGHT -> addInputPlayer(sl, 7) MenuTag.SECTION_THEME -> addThemeSettings(sl) MenuTag.SECTION_DEBUG -> addDebugSettings(sl) + MenuTag.SECTION_EDEN_VEIL -> addEdenVeilSettings(sl) } settingsList = sl adapter.submitList(settingsList) { @@ -127,6 +129,15 @@ class SettingsFragmentPresenter( menuKey = MenuTag.SECTION_RENDERER ) ) + if (!NativeConfig.isPerGameConfigLoaded()) + add( + SubmenuSetting( + titleId = R.string.stats_overlay_options, + descriptionId = R.string.stats_overlay_options_description, + iconId = R.drawable.ic_frames, + menuKey = MenuTag.SECTION_PERFORMANCE_STATS + ) + ) add( SubmenuSetting( titleId = R.string.preferences_audio, @@ -143,6 +154,14 @@ class SettingsFragmentPresenter( menuKey = MenuTag.SECTION_DEBUG ) ) + add( + SubmenuSetting( + titleId = R.string.eden_veil, + descriptionId = R.string.eden_veil_description, + iconId = R.drawable.ic_eden_veil, + menuKey = MenuTag.SECTION_EDEN_VEIL + ) + ) add( RunnableSetting( titleId = R.string.reset_to_default, @@ -154,6 +173,87 @@ class SettingsFragmentPresenter( } } + private val InterpolationSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.FRAME_INTERPOLATION.key + + override fun getBoolean(needsGlobal: Boolean): Boolean { + return BooleanSetting.FRAME_INTERPOLATION.getBoolean(needsGlobal) + } + + override fun setBoolean(value: Boolean) { + BooleanSetting.FRAME_INTERPOLATION.setBoolean(value) + } + + override val defaultValue = BooleanSetting.FRAME_INTERPOLATION.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.FRAME_INTERPOLATION.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.FRAME_INTERPOLATION.reset() + } + + private val syncCoreSpeedSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.CORE_SYNC_CORE_SPEED.key + + override fun getBoolean(needsGlobal: Boolean): Boolean { + return BooleanSetting.CORE_SYNC_CORE_SPEED.getBoolean(needsGlobal) + } + + override fun setBoolean(value: Boolean) { + BooleanSetting.CORE_SYNC_CORE_SPEED.setBoolean(value) + } + + override val defaultValue = BooleanSetting.CORE_SYNC_CORE_SPEED.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.CORE_SYNC_CORE_SPEED.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.CORE_SYNC_CORE_SPEED.reset() + } + + private val frameSkippingSetting = object : AbstractBooleanSetting { + override val key = BooleanSetting.FRAME_SKIPPING.key + + override fun getBoolean(needsGlobal: Boolean): Boolean { + return BooleanSetting.FRAME_SKIPPING.getBoolean(needsGlobal) + } + + override fun setBoolean(value: Boolean) { + BooleanSetting.FRAME_SKIPPING.setBoolean(value) + } + + override val defaultValue = BooleanSetting.FRAME_SKIPPING.defaultValue + + override fun getValueAsString(needsGlobal: Boolean): String = + BooleanSetting.FRAME_SKIPPING.getValueAsString(needsGlobal) + + override fun reset() = BooleanSetting.FRAME_SKIPPING.reset() + } + + private fun addEdenVeilSubmenu(sl: ArrayList) { + sl.apply { + add( + SubmenuSetting( + titleId = R.string.eden_veil, + descriptionId = R.string.eden_veil_description, + iconId = R.drawable.ic_code, + menuKey = MenuTag.SECTION_EDEN_VEIL + ) + ) + addEdenVeilSettings(sl) + + add(BooleanSetting.FRAME_INTERPOLATION.key) + add(BooleanSetting.FRAME_SKIPPING.key) + add(BooleanSetting.CORE_SYNC_CORE_SPEED.key) + add(IntSetting.RENDERER_SHADER_BACKEND.key) + add(IntSetting.RENDERER_OPTIMIZE_SPIRV_OUTPUT.key) + add(IntSetting.RENDERER_NVDEC_EMULATION.key) + add(IntSetting.RENDERER_ASTC_DECODE_METHOD.key) + add(IntSetting.RENDERER_ASTC_RECOMPRESSION.key) + add(IntSetting.RENDERER_VRAM_USAGE_MODE.key) + } + } + private fun addSystemSettings(sl: ArrayList) { sl.apply { add(StringSetting.DEVICE_NAME.key) @@ -187,6 +287,23 @@ class SettingsFragmentPresenter( } } + private fun addPerfomanceOverlaySettings(sl: ArrayList) { + sl.apply { + add(HeaderSetting(R.string.stats_overlay_customization)) + add(BooleanSetting.SHOW_PERFORMANCE_OVERLAY.key) + add(BooleanSetting.OVERLAY_BACKGROUND.key) + add(IntSetting.PERF_OVERLAY_POSITION.key) + add(HeaderSetting(R.string.stats_overlay_items)) + add(BooleanSetting.SHOW_FPS.key) + add(BooleanSetting.SHOW_FRAMETIME.key) + add(BooleanSetting.SHOW_SPEED.key) + add(BooleanSetting.SHOW_APP_RAM_USAGE.key) + add(BooleanSetting.SHOW_SYSTEM_RAM_USAGE.key) + add(BooleanSetting.SHOW_BAT_TEMPERATURE.key) + } + + } + private fun addAudioSettings(sl: ArrayList) { sl.apply { add(IntSetting.AUDIO_OUTPUT_ENGINE.key) @@ -338,7 +455,79 @@ class SettingsFragmentPresenter( override val isSaveable = true } } - + private fun addEdenVeilSettings(sl: ArrayList) { + sl.apply { + add( + SwitchSetting( + InterpolationSetting, // The interpolation setting object you've created + titleId = R.string.frame_interpolation, // Use appropriate string resources for the title + descriptionId = R.string.frame_interpolation_description // Description resource for the interpolation setting + ) + ) + add( + SwitchSetting( + frameSkippingSetting, + titleId = R.string.frame_skipping, + descriptionId = R.string.frame_skipping_description + ) + ) + add( + SwitchSetting( + syncCoreSpeedSetting, + titleId = R.string.use_sync_core, + descriptionId = R.string.use_sync_core_description + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_SHADER_BACKEND, + titleId = R.string.shader_backend, + choicesId = R.array.rendererShaderNames, + valuesId = R.array.rendererShaderValues + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_NVDEC_EMULATION, + titleId = R.string.nvdec_emulation, + choicesId = R.array.rendererNvdecNames, + valuesId = R.array.rendererNvdecValues + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_ASTC_DECODE_METHOD, + titleId = R.string.accelerate_astc, + choicesId = R.array.astcDecodingMethodNames, + valuesId = R.array.astcDecodingMethodValues + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_ASTC_RECOMPRESSION, + titleId = R.string.astc_recompression, + choicesId = R.array.astcRecompressionMethodNames, + valuesId = R.array.astcRecompressionMethodValues + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_VRAM_USAGE_MODE, + titleId = R.string.vram_usage_mode, + choicesId = R.array.vramUsageMethodNames, + valuesId = R.array.vramUsageMethodValues + ) + ) + add( + SingleChoiceSetting( + IntSetting.RENDERER_OPTIMIZE_SPIRV_OUTPUT, + titleId = R.string.renderer_optimize_spirv_output, + choicesId = R.array.optimizeSpirvOutputEntries, + valuesId = R.array.optimizeSpirvOutputValues + ) + ) + } +} private fun addInputPlayer(sl: ArrayList, playerIndex: Int) { sl.apply { val connectedSetting = object : AbstractBooleanSetting { diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt index 2c99f6a2ac..4e51ae4902 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/fragments/EmulationFragment.kt @@ -4,18 +4,29 @@ package org.yuzu.yuzu_emu.fragments import android.annotation.SuppressLint +import android.app.ActivityManager import android.app.AlertDialog import android.content.Context import android.content.DialogInterface +import android.content.Intent +import android.content.IntentFilter import android.content.pm.ActivityInfo import android.content.res.Configuration +import android.graphics.Color import android.net.Uri +import android.os.BatteryManager import android.os.Bundle import android.os.Handler import android.os.Looper import android.os.SystemClock import android.util.Rational -import android.view.* +import android.view.Gravity +import android.view.LayoutInflater +import android.view.MotionEvent +import android.view.Surface +import android.view.SurfaceHolder +import android.view.View +import android.view.ViewGroup import android.widget.FrameLayout import android.widget.TextView import android.widget.Toast @@ -26,7 +37,6 @@ import androidx.core.graphics.Insets import androidx.core.view.ViewCompat import androidx.core.view.WindowInsetsCompat import androidx.core.view.updateLayoutParams -import androidx.core.view.updatePadding import androidx.drawerlayout.widget.DrawerLayout import androidx.drawerlayout.widget.DrawerLayout.DrawerListener import androidx.fragment.app.Fragment @@ -36,6 +46,7 @@ import androidx.navigation.fragment.navArgs import androidx.window.layout.FoldingFeature import androidx.window.layout.WindowInfoTracker import androidx.window.layout.WindowLayoutInfo +import com.google.android.material.color.MaterialColors import com.google.android.material.dialog.MaterialAlertDialogBuilder import com.google.android.material.slider.Slider import org.yuzu.yuzu_emu.HomeNavigationDirections @@ -51,28 +62,28 @@ import org.yuzu.yuzu_emu.features.settings.model.Settings.EmulationOrientation import org.yuzu.yuzu_emu.features.settings.model.Settings.EmulationVerticalAlignment import org.yuzu.yuzu_emu.features.settings.utils.SettingsFile import org.yuzu.yuzu_emu.model.DriverViewModel -import org.yuzu.yuzu_emu.model.Game import org.yuzu.yuzu_emu.model.EmulationViewModel +import org.yuzu.yuzu_emu.model.Game import org.yuzu.yuzu_emu.overlay.model.OverlayControl import org.yuzu.yuzu_emu.overlay.model.OverlayLayout -import org.yuzu.yuzu_emu.utils.* +import org.yuzu.yuzu_emu.utils.DirectoryInitialization +import org.yuzu.yuzu_emu.utils.FileUtil +import org.yuzu.yuzu_emu.utils.GameHelper +import org.yuzu.yuzu_emu.utils.GameIconUtils +import org.yuzu.yuzu_emu.utils.Log +import org.yuzu.yuzu_emu.utils.NativeConfig +import org.yuzu.yuzu_emu.utils.ViewUtils import org.yuzu.yuzu_emu.utils.ViewUtils.setVisible -import java.lang.NullPointerException -import android.content.BroadcastReceiver -import android.content.Intent -import android.content.IntentFilter -import android.os.BatteryManager -import android.util.TypedValue -import android.app.ActivityManager -import android.graphics.Color -import android.os.Debug +import org.yuzu.yuzu_emu.utils.collect +import java.io.File class EmulationFragment : Fragment(), SurfaceHolder.Callback { private lateinit var emulationState: EmulationState private var emulationActivity: EmulationActivity? = null private var perfStatsUpdater: (() -> Unit)? = null - private var thermalStatsUpdater: (() -> Unit)? = null - private var batteryReceiverRegistered: Boolean = false + private lateinit var cpuBackend: String + private lateinit var gpuDriver: String + private var _binding: FragmentEmulationBinding? = null private val binding get() = _binding!! @@ -198,8 +209,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } }) binding.drawerLayout.setDrawerLockMode(DrawerLayout.LOCK_MODE_LOCKED_CLOSED) - binding.inGameMenu.getHeaderView(0).findViewById(R.id.text_game_title).text = - game.title + binding.inGameMenu.getHeaderView(0).apply { + val titleView = findViewById(R.id.text_game_title) + titleView.text = game.title + } binding.inGameMenu.menu.findItem(R.id.menu_lock_drawer).apply { val lockMode = IntSetting.LOCK_DRAWER.getInt() @@ -375,9 +388,23 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { emulationState.updateSurface() // Setup overlays - updateShowFpsOverlay() - val temperature = getBatteryTemperature(requireContext()) - updateThermalOverlay(temperature) + updateshowStatsOvelray() + + // Re update binding when the specs values get initialized properly + binding.inGameMenu.getHeaderView(0).apply { + val titleView = findViewById(R.id.text_game_title) + val cpuBackendLabel = findViewById(R.id.cpu_backend) + val gpuvendorLabel = findViewById(R.id.gpu_vendor) + + titleView.text = game.title + cpuBackendLabel.text = NativeLibrary.getCpuBackend() + gpuvendorLabel.text = NativeLibrary.getGpuDriver() + } + + + val position = IntSetting.PERF_OVERLAY_POSITION.getInt() + updateStatsPosition(position) + } } emulationViewModel.isEmulationStopping.collect(viewLifecycleOwner) { @@ -385,7 +412,7 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { binding.loadingText.setText(R.string.shutting_down) ViewUtils.showView(binding.loadingIndicator) ViewUtils.hideView(binding.inputContainer) - ViewUtils.hideView(binding.showFpsText) + ViewUtils.hideView(binding.showStatsOverlayText) } } emulationViewModel.drawerOpen.collect(viewLifecycleOwner) { @@ -467,22 +494,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { if (emulationState.isRunning && emulationActivity?.isInPictureInPictureMode != true) { emulationState.pause() } - context?.let { - if (batteryReceiverRegistered) { - it.unregisterReceiver(batteryReceiver) - batteryReceiverRegistered = false - } - } super.onPause() } override fun onDestroyView() { - context?.let { - if (batteryReceiverRegistered) { - it.unregisterReceiver(batteryReceiver) - batteryReceiverRegistered = false - } - } super.onDestroyView() _binding = null } @@ -492,12 +507,10 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { super.onDetach() } override fun onResume() { - super.onResume() - if (!batteryReceiverRegistered) { - val filter = IntentFilter(Intent.ACTION_BATTERY_CHANGED) - context?.registerReceiver(batteryReceiver, filter) - batteryReceiverRegistered = true - } + super.onResume() + // If the overlay is enabled, we need to update the position if changed + val position = IntSetting.PERF_OVERLAY_POSITION.getInt() + updateStatsPosition(position) } private fun resetInputOverlay() { @@ -508,40 +521,103 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } @SuppressLint("DefaultLocale") - private fun updateShowFpsOverlay() { + private fun updateshowStatsOvelray() { val showOverlay = BooleanSetting.SHOW_PERFORMANCE_OVERLAY.getBoolean() - binding.showFpsText.setTextColor(Color.parseColor("#A146FF")) - binding.showFpsText.setVisible(showOverlay) + binding.showStatsOverlayText.apply { + setTextColor( + MaterialColors.getColor( + this, + com.google.android.material.R.attr.colorPrimary + ) + ) + } + binding.showStatsOverlayText.setVisible(showOverlay) if (showOverlay) { val SYSTEM_FPS = 0 val FPS = 1 val FRAMETIME = 2 val SPEED = 3 + val sb = StringBuilder() perfStatsUpdater = { if (emulationViewModel.emulationStarted.value && !emulationViewModel.isEmulationStopping.value ) { + sb.setLength(0) + val perfStats = NativeLibrary.getPerfStats() - val cpuBackend = NativeLibrary.getCpuBackend() - val gpuDriver = NativeLibrary.getGpuDriver() + val actualFps = perfStats[FPS] - // Get memory info - val mi = ActivityManager.MemoryInfo() - val activityManager = - requireContext().getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager - activityManager.getMemoryInfo(mi) + if (BooleanSetting.SHOW_FPS.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + val enableFrameInterpolation = BooleanSetting.FRAME_INTERPOLATION.getBoolean() + val enableFrameSkipping = BooleanSetting.FRAME_SKIPPING.getBoolean() - // Calculate used memory - val usedMegs = (mi.totalMem - mi.availMem) / 1048576L // Convert bytes to megabytes + var fpsText = String.format("FPS: %.1f", actualFps) - if (_binding != null) { - binding.showFpsText.text = String.format( - "%.1f FPS • %d MB • %s/%s", - perfStats[FPS], usedMegs, cpuBackend, gpuDriver + if (enableFrameInterpolation) { + val interpolatedFps = actualFps * 2 + fpsText += String.format(" (Interp: %.1f)", interpolatedFps) + } + + if (enableFrameSkipping) { + fpsText += " [Skipping]" + } + + sb.append(fpsText) + } + + if (BooleanSetting.SHOW_FRAMETIME.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + if (sb.isNotEmpty()) sb.append(" | ") + sb.append( + String.format( + "FT: %.1fms", + (perfStats[FRAMETIME] * 1000.0f).toFloat() + ) ) } - perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800) + + if (BooleanSetting.SHOW_SPEED.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + if (sb.isNotEmpty()) sb.append(" | ") + sb.append( + String.format( + "Speed: %d%%", + (perfStats[SPEED] * 100.0 + 0.5).toInt() + ) + ) + } + + if (BooleanSetting.SHOW_APP_RAM_USAGE.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + if (sb.isNotEmpty()) sb.append(" | ") + val appRamUsage = File("/proc/self/statm").readLines()[0].split(' ')[1].toLong() * 4096 / 1000000 + sb.append("Process RAM: $appRamUsage MB") + } + + if (BooleanSetting.SHOW_SYSTEM_RAM_USAGE.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + if (sb.isNotEmpty()) sb.append(" | ") + context?.let { ctx -> + val activityManager = ctx.getSystemService(Context.ACTIVITY_SERVICE) as ActivityManager + val memInfo = ActivityManager.MemoryInfo() + activityManager.getMemoryInfo(memInfo) + val usedRamMB = (memInfo.totalMem - memInfo.availMem) / 1048576L + sb.append("RAM: $usedRamMB MB") + } + } + + if (BooleanSetting.SHOW_BAT_TEMPERATURE.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + if (sb.isNotEmpty()) sb.append(" | ") + val batteryTemp = getBatteryTemperature() + val tempF = celsiusToFahrenheit(batteryTemp) + sb.append(String.format("%.1f°C/%.1f°F", batteryTemp, tempF)) + } + + if (BooleanSetting.OVERLAY_BACKGROUND.getBoolean(NativeConfig.isPerGameConfigLoaded())) { + binding.showStatsOverlayText.setBackgroundResource(R.color.yuzu_transparent_black) + } else { + binding.showStatsOverlayText.setBackgroundResource(0) + } + + binding.showStatsOverlayText.text = sb.toString() } + perfStatsUpdateHandler.postDelayed(perfStatsUpdater!!, 800) } perfStatsUpdateHandler.post(perfStatsUpdater!!) } else { @@ -551,47 +627,76 @@ class EmulationFragment : Fragment(), SurfaceHolder.Callback { } } -private val batteryReceiver = object : BroadcastReceiver() { - override fun onReceive(context: Context?, intent: Intent?) { - intent?.let { - if (it.action == Intent.ACTION_BATTERY_CHANGED) { - val temperature = getBatteryTemperature(context!!) - updateThermalOverlay(temperature) + private fun updateStatsPosition(position: Int) { + val params = binding.showStatsOverlayText.layoutParams as FrameLayout.LayoutParams + when (position) { + 0 -> { + params.gravity = (Gravity.TOP or Gravity.START) + params.setMargins(resources.getDimensionPixelSize(R.dimen.spacing_large), 0, 0, 0) + } + + 1 -> { + params.gravity = (Gravity.TOP or Gravity.CENTER_HORIZONTAL) + } + + 2 -> { + params.gravity = (Gravity.TOP or Gravity.END) + params.setMargins(0, 0, resources.getDimensionPixelSize(R.dimen.spacing_large), 0) + } + + 3 -> { + params.gravity = (Gravity.BOTTOM or Gravity.START) + params.setMargins(resources.getDimensionPixelSize(R.dimen.spacing_large), 0, 0, 0) + } + + 4 -> { + params.gravity = (Gravity.BOTTOM or Gravity.CENTER_HORIZONTAL) + } + + 5 -> { + params.gravity = (Gravity.BOTTOM or Gravity.END) + params.setMargins(0, 0, resources.getDimensionPixelSize(R.dimen.spacing_large), 0) } } } -} -private fun updateThermalOverlay(temperature: Float) { - if (BooleanSetting.SHOW_THERMAL_OVERLAY.getBoolean() && - emulationViewModel.emulationStarted.value && - !emulationViewModel.isEmulationStopping.value - ) { - // Convert to Fahrenheit - val fahrenheit = (temperature * 9f / 5f) + 32f - - // Determine color based on temperature ranges - val color = when { - temperature < 35 -> Color.parseColor("#00C8FF") - temperature < 40 -> Color.parseColor("#A146FF") - temperature < 45 -> Color.parseColor("#FFA500") - else -> Color.RED + private fun getBatteryTemperature(): Float { + try { + val batteryIntent = requireContext().registerReceiver(null, IntentFilter(Intent.ACTION_BATTERY_CHANGED)) + // Temperature in tenths of a degree Celsius + val temperature = batteryIntent?.getIntExtra(BatteryManager.EXTRA_TEMPERATURE, 0) ?: 0 + // Convert to degrees Celsius + return temperature / 10.0f + } catch (e: Exception) { + return 0.0f } - - binding.showThermalsText.setTextColor(color) - binding.showThermalsText.text = String.format("%.1f°C • %.1f°F", temperature, fahrenheit) } -} - -private fun getBatteryTemperature(context: Context): Float { - val intent: Intent? = context.registerReceiver( - null, - IntentFilter(Intent.ACTION_BATTERY_CHANGED) - ) - val temperature = intent?.getIntExtra(BatteryManager.EXTRA_TEMPERATURE, 0) ?: 0 - return temperature / 10.0f + private fun celsiusToFahrenheit(celsius: Float): Float { + return (celsius * 9 / 5) + 32 } + private fun updateThermalOverlay(temperature: Float) { + if (BooleanSetting.SHOW_THERMAL_OVERLAY.getBoolean() && + emulationViewModel.emulationStarted.value && + !emulationViewModel.isEmulationStopping.value + ) { + // Convert to Fahrenheit + val fahrenheit = (temperature * 9f / 5f) + 32f + + // Determine color based on temperature ranges + val color = when { + temperature < 35 -> Color.parseColor("#00C8FF") + temperature < 40 -> Color.parseColor("#A146FF") + temperature < 45 -> Color.parseColor("#FFA500") + else -> Color.RED + } + + binding.showThermalsText.setTextColor(color) + binding.showThermalsText.text = String.format("%.1f°C • %.1f°F", temperature, fahrenheit) + } + } + + @SuppressLint("SourceLockedOrientationActivity") private fun updateOrientation() { emulationActivity?.let { @@ -717,10 +822,8 @@ private fun getBatteryTemperature(context: Context): Float { popup.menuInflater.inflate(R.menu.menu_overlay_options, popup.menu) popup.menu.apply { - findItem(R.id.menu_toggle_fps).isChecked = + findItem(R.id.menu_show_stats_overlay).isChecked = BooleanSetting.SHOW_PERFORMANCE_OVERLAY.getBoolean() - findItem(R.id.thermal_indicator).isChecked = - BooleanSetting.SHOW_THERMAL_OVERLAY.getBoolean() findItem(R.id.menu_rel_stick_center).isChecked = BooleanSetting.JOYSTICK_REL_CENTER.getBoolean() findItem(R.id.menu_dpad_slide).isChecked = BooleanSetting.DPAD_SLIDE.getBoolean() @@ -733,34 +836,12 @@ private fun getBatteryTemperature(context: Context): Float { popup.setOnDismissListener { NativeConfig.saveGlobalConfig() } popup.setOnMenuItemClickListener { when (it.itemId) { - R.id.menu_toggle_fps -> { + R.id.menu_show_stats_overlay -> { it.isChecked = !it.isChecked BooleanSetting.SHOW_PERFORMANCE_OVERLAY.setBoolean(it.isChecked) - updateShowFpsOverlay() + updateshowStatsOvelray() true } - - R.id.thermal_indicator -> { - it.isChecked = !it.isChecked - BooleanSetting.SHOW_THERMAL_OVERLAY.setBoolean(it.isChecked) - if (it.isChecked) { - val temperature = getBatteryTemperature(requireContext()) - updateThermalOverlay(temperature) - if (!batteryReceiverRegistered) { - val filter = IntentFilter(Intent.ACTION_BATTERY_CHANGED) - context?.registerReceiver(batteryReceiver, filter) - batteryReceiverRegistered = true - } - } else { - if (batteryReceiverRegistered) { - context?.unregisterReceiver(batteryReceiver) - batteryReceiverRegistered = false - } - binding.showThermalsText.text = "" - } - true - } - R.id.menu_edit_overlay -> { binding.drawerLayout.close() binding.surfaceInputOverlay.requestFocus() @@ -951,7 +1032,8 @@ private fun getBatteryTemperature(context: Context): Float { right = cutInsets.right } - v.updatePadding(left = left, top = cutInsets.top, right = right) + v.setPadding(left, cutInsets.top, right, 0) + windowInsets } } diff --git a/src/android/app/src/main/jni/android_settings.h b/src/android/app/src/main/jni/android_settings.h index 00baf86a9b..a47803604a 100644 --- a/src/android/app/src/main/jni/android_settings.h +++ b/src/android/app/src/main/jni/android_settings.h @@ -66,9 +66,23 @@ struct Values { Settings::Setting haptic_feedback{linkage, true, "haptic_feedback", Settings::Category::Overlay}; Settings::Setting show_performance_overlay{linkage, true, "show_performance_overlay", - Settings::Category::Overlay}; - Settings::Setting show_thermal_overlay{linkage, false, "show_thermal_overlay", - Settings::Category::Overlay}; + Settings::Category::Overlay, Settings::Specialization::Paired, true , true}; + Settings::Setting overlay_background{linkage, false, "overlay_background", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting perf_overlay_position{linkage, 0, "perf_overlay_position", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_fps{linkage, true, "show_fps", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_frame_time{linkage, false, "show_frame_time", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_speed{linkage, true, "show_speed", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_app_ram_usage{linkage, false, "show_app_ram_usage", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_system_ram_usage{linkage, false, "show_system_ram_usage", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; + Settings::Setting show_bat_temperature{linkage, false, "show_bat_temperature", + Settings::Category::Overlay, Settings::Specialization::Default, true , true, &show_performance_overlay}; Settings::Setting show_input_overlay{linkage, true, "show_input_overlay", Settings::Category::Overlay}; Settings::Setting touchscreen{linkage, true, "touchscreen", Settings::Category::Overlay}; diff --git a/src/android/app/src/main/res/drawable/ic_eden_veil.xml b/src/android/app/src/main/res/drawable/ic_eden_veil.xml new file mode 100644 index 0000000000..09f90d15da --- /dev/null +++ b/src/android/app/src/main/res/drawable/ic_eden_veil.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/android/app/src/main/res/drawable/ic_frames.xml b/src/android/app/src/main/res/drawable/ic_frames.xml new file mode 100644 index 0000000000..aee24007b0 --- /dev/null +++ b/src/android/app/src/main/res/drawable/ic_frames.xml @@ -0,0 +1,32 @@ + + + + + + + + + + \ No newline at end of file diff --git a/src/android/app/src/main/res/layout/fragment_emulation.xml b/src/android/app/src/main/res/layout/fragment_emulation.xml index 185ad37814..00f2cdc103 100644 --- a/src/android/app/src/main/res/layout/fragment_emulation.xml +++ b/src/android/app/src/main/res/layout/fragment_emulation.xml @@ -140,15 +140,13 @@ android:id="@+id/overlay_container" android:layout_width="match_parent" android:layout_height="match_parent" - android:layout_marginHorizontal="20dp" - android:fitsSystemWindows="true"> + android:fitsSystemWindows="false"> - + android:layout_marginEnd="24dp"> + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/android/app/src/main/res/menu/menu_overlay_options.xml b/src/android/app/src/main/res/menu/menu_overlay_options.xml index a9e807427b..3315dbfdc9 100644 --- a/src/android/app/src/main/res/menu/menu_overlay_options.xml +++ b/src/android/app/src/main/res/menu/menu_overlay_options.xml @@ -2,13 +2,8 @@ - - #B7B7B7 + #80000000 #C6C5D0 #FFB4AB #93000A diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 99013e0485..71a981402e 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -85,6 +85,72 @@ 2 + + @string/shader_backend_glsl + @string/shader_backend_glasm + @string/shader_backend_spirv + + + + 0 + 1 + 2 + + + + + @string/vram_usage_conservative + @string/vram_usage_aggressive + + + + + 0 + 1 + + + + + @string/accelerate_astc_cpu + @string/accelerate_astc_gpu + @string/accelerate_astc_async + + + + + 0 + 1 + 2 + + + + + @string/astc_recompression_uncompressed + @string/astc_recompression_bc1 + @string/astc_recompression_bc3 + + + + + 0 + 1 + 2 + + + + + @string/nvdec_emulation_none + @string/nvdec_emulation_cpu + @string/nvdec_emulation_gpu + + + + + 3 + 1 + 2 + + @string/resolution_half @string/resolution_three_quarter @@ -183,6 +249,23 @@ 2 + + @string/overlay_position_top_left + @string/overlay_position_center_top + @string/overlay_position_top_right + @string/overlay_position_bottom_left + @string/overlay_position_center_bottom + @string/overlay_position_bottom_right + + + 0 + 1 + 2 + 3 + 4 + 5 + + @string/cpu_backend_dynarmic @string/cpu_backend_nce @@ -326,4 +409,15 @@ 2 + + @string/never + @string/on_load + @string/always + + + 0 + 1 + 2 + + diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index bb0b5c58dc..3d1927af99 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -9,6 +9,37 @@ Shows notifications when something goes wrong. Notification permission not granted! + + ShoW Performance Stats Overlay + Customization + Visibility + Overlay + Enable Performance Stats Overlay + Configure what information is shown in the performance stats overlay + Show FPS + Display current frames per second + Show Frametime + Display current frametime + Show Speed + Display current emulation speed percentage + Show App Memory Usage + Display the amount of RAM getting used by the emulator + Show System Memory Usage + Display the amount of RAM getting used by the system + Show Battery Temperature + Display current Battery temperature in Celsius and Fahrenheit + Overlay Position + Choose where the performance stats overlay is displayed on the screen + Top Left + Top Right + Bottom Left + Bottom Right + Center Top + Center Bottom + Overlay Background + Adds a background behind the overlay for easier reading + + Welcome! Learn how to setup <b>eden</b> and jump into emulation. @@ -217,6 +248,10 @@ CPU accuracy %1$s%2$s + + Synchronize Core Speed + Synchronize the core tick speed to the maximum speed percentage to improve performance without altering the games actual speed. + Device name Docked Mode @@ -230,6 +265,10 @@ Set custom RTC + Frame Skipping + Toggle frame skipping to improve performance by reducing the number of rendered frames. + Frame Interpolation + Toggle frame interpolation to improve visual smoothness by generating intermediate frames. Accuracy level Resolution (Handheld/Docked) VSync mode @@ -241,6 +280,7 @@ Anti-aliasing method Force maximum clocks (Adreno only) Forces the GPU to run at the maximum possible clocks (thermal constraints will still be applied). + Optimize SPIRV output Use asynchronous shaders Compiles shaders asynchronously, reducing stutter but may introduce glitches. Use reactive flushing @@ -249,6 +289,9 @@ Reduces stuttering by locally storing and loading generated shaders. Anisotropic filtering Improves the quality of textures when viewed at oblique angles + NVDEC Emulation + Specifies how videos should be decoded. It can either use the CPU or the GPU for decoding, or perform no decoding at all (black screen on videos). In most cases, GPU decoding provides the best performance. + Shader Backend CPU @@ -352,6 +395,8 @@ Are you sure that you want to reset all mappings for this controller to default? This cannot be undone. + Eden’s Veil + Beyond default Default Saved settings Saved settings for %1$s @@ -562,11 +607,44 @@ Vulkan None + + GLSL + GLASM + SPIR-V + + + CPU + GPU + None + Normal High Extreme (Slow) + + ASTC Decoding Method + Choose ASTC decoding method: CPU (slow but safe), GPU (fast, recommended), or Async CPU (no stutter but may glitch). + + + CPU + GPU + CPU Asynchronously + + + ASTC Recompression Method + Low-end Android GPUs often lack ASTC support, forcing emulators to decompress textures to RGBA8. This option recompresses RGBA8 to BC1/BC3, saving VRAM but reducing quality. + + + Uncompressed + BC1 (Low Quality) + BC3 (Medium Quality) + + + VRAM Usage Mode + Conservative + Aggressive + 0.5X (360p/540p) 0.75X (540p/810p) @@ -672,6 +750,11 @@ Center Bottom + + Never + On Load + Always + Licenses FidelityFX-FSR diff --git a/src/android/app/src/main/res/values/yuzu_colors.xml b/src/android/app/src/main/res/values/yuzu_colors.xml index a45b95f85c..a5af0886a9 100644 --- a/src/android/app/src/main/res/values/yuzu_colors.xml +++ b/src/android/app/src/main/res/values/yuzu_colors.xml @@ -229,6 +229,7 @@ #410002 #000000 #000000 + #80000000 #FFFFFF #FFFFFF diff --git a/src/common/common_types.h b/src/common/common_types.h index ae04c4d605..99fff66bed 100644 --- a/src/common/common_types.h +++ b/src/common/common_types.h @@ -30,6 +30,7 @@ #include #include +#include using u8 = std::uint8_t; ///< 8-bit unsigned byte using u16 = std::uint16_t; ///< 16-bit unsigned short diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 80d388fe88..245c432939 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -52,6 +52,7 @@ SWITCHABLE(NvdecEmulation, false); SWITCHABLE(Region, true); SWITCHABLE(RendererBackend, true); SWITCHABLE(ScalingFilter, false); +SWITCHABLE(SpirvOptimizeMode, true); SWITCHABLE(ShaderBackend, true); SWITCHABLE(TimeZone, true); SETTING(VSyncMode, true); diff --git a/src/common/settings.h b/src/common/settings.h index cf3579892c..c2cdeb4994 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -73,6 +74,7 @@ SWITCHABLE(NvdecEmulation, false); SWITCHABLE(Region, true); SWITCHABLE(RendererBackend, true); SWITCHABLE(ScalingFilter, false); +SWITCHABLE(SpirvOptimizeMode, true); SWITCHABLE(ShaderBackend, true); SWITCHABLE(TimeZone, true); SETTING(VSyncMode, true); @@ -210,6 +212,13 @@ struct Values { true, true, &use_speed_limit}; + SwitchableSetting sync_core_speed{linkage, false, "sync_core_speed", Category::Core, Specialization::Default}; + //SwitchableSetting use_nce{linkage, true, "use_nce", Category::Core}; + SwitchableSetting use_nce{linkage, true, "Use Native Code Execution", Category::Core}; + + // Memory + SwitchableSetting use_gpu_memory_manager{linkage, false, "Use GPU Memory Manager", Category::Core}; + SwitchableSetting enable_memory_snapshots{linkage, false, "Enable Memory Snapshots", Category::Core}; // Cpu SwitchableSetting cpu_backend{linkage, @@ -250,7 +259,6 @@ struct Values { Category::CpuDebug}; Setting cpuopt_ignore_memory_aborts{linkage, true, "cpuopt_ignore_memory_aborts", Category::CpuDebug}; - SwitchableSetting cpuopt_unsafe_unfuse_fma{linkage, true, "cpuopt_unsafe_unfuse_fma", Category::CpuUnsafe}; SwitchableSetting cpuopt_unsafe_reduce_fp_error{ @@ -273,9 +281,20 @@ struct Values { "shader_backend", Category::Renderer, Specialization::RuntimeList}; SwitchableSetting vulkan_device{linkage, 0, "vulkan_device", Category::Renderer, Specialization::RuntimeList}; - + #ifdef __ANDROID__ + SwitchableSetting frame_interpolation{linkage, true, "frame_interpolation", Category::Renderer, + Specialization::RuntimeList}; + SwitchableSetting frame_skipping{linkage, true, "frame_skipping", Category::Renderer, + Specialization::RuntimeList}; + #endif SwitchableSetting use_disk_shader_cache{linkage, true, "use_disk_shader_cache", Category::Renderer}; + SwitchableSetting optimize_spirv_output{linkage, + SpirvOptimizeMode::OnLoad, + SpirvOptimizeMode::Never, + SpirvOptimizeMode::Always, + "optimize_spirv_output", + Category::Renderer}; SwitchableSetting use_asynchronous_gpu_emulation{ linkage, true, "use_asynchronous_gpu_emulation", Category::Renderer}; SwitchableSetting accelerate_astc{linkage, @@ -617,11 +636,21 @@ struct Values { // Add-Ons std::map> disabled_addons; + + // Renderer Advanced Settings + SwitchableSetting use_enhanced_shader_building{linkage, false, "Enhanced Shader Building", + Category::RendererAdvanced}; + + // Add a new setting for shader compilation priority + SwitchableSetting shader_compilation_priority{linkage, 0, "Shader Compilation Priority", + Category::RendererAdvanced}; }; extern Values values; void UpdateGPUAccuracy(); +// boold isGPULevelNormal(); +// TODO: ZEP bool IsGPULevelExtreme(); bool IsGPULevelHigh(); diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 6e247e9306..75189e60d7 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -155,6 +155,8 @@ ENUM(ConsoleMode, Handheld, Docked); ENUM(AppletMode, HLE, LLE); +ENUM(SpirvOptimizeMode, Never, OnLoad, Always); + template inline std::string CanonicalizeEnum(Type id) { const auto group = EnumMetadata::Canonicalizations(); diff --git a/src/core/arm/arm_interface.cpp b/src/core/arm/arm_interface.cpp index 5dc7e5d59d..34acec9ebf 100644 --- a/src/core/arm/arm_interface.cpp +++ b/src/core/arm/arm_interface.cpp @@ -14,13 +14,22 @@ void ArmInterface::LogBacktrace(Kernel::KProcess* process) const { this->GetContext(ctx); LOG_ERROR(Core_ARM, "Backtrace, sp={:016X}, pc={:016X}", ctx.sp, ctx.pc); - LOG_ERROR(Core_ARM, "{:20}{:20}{:20}{:20}{}", "Module Name", "Address", "Original Address", - "Offset", "Symbol"); + LOG_ERROR(Core_ARM, "{:20}{:20}{:20}{:20}{}", "Module Name", "Address", "Original Address", "Offset", "Symbol"); LOG_ERROR(Core_ARM, ""); + const auto backtrace = GetBacktraceFromContext(process, ctx); + u64 last_address = 0; + for (const auto& entry : backtrace) { + + // Skip duplicate consecutive addresses + if (entry.address == last_address) + continue; + LOG_ERROR(Core_ARM, "{:20}{:016X} {:016X} {:016X} {}", entry.module, entry.address, entry.original_address, entry.offset, entry.name); + + last_address = entry.address; } } diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 123b3da7ec..90891e241d 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -185,6 +185,9 @@ void ArmNce::LockThread(Kernel::KThread* thread) { void ArmNce::UnlockThread(Kernel::KThread* thread) { auto* thread_params = &thread->GetNativeExecutionParameters(); + m_guest_ctx.tpidr_el0 = thread_params->tpidr_el0; + m_guest_ctx.tpidrro_el0 = thread_params->tpidrro_el0; + thread_params->native_context = nullptr; UnlockThreadParameters(thread_params); } @@ -196,16 +199,23 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) { return hr; } - // Get the thread context. + // Pre-fetch thread context data to improve cache locality auto* thread_params = &thread->GetNativeExecutionParameters(); auto* process = thread->GetOwnerProcess(); - // Assign current members. + // Move non-critical operations outside the locked section + const u64 tpidr_el0_cache = m_guest_ctx.tpidr_el0; + const u64 tpidrro_el0_cache = m_guest_ctx.tpidrro_el0; + + // Critical section begins - minimize operations here m_running_thread = thread; m_guest_ctx.parent = this; thread_params->native_context = &m_guest_ctx; - thread_params->tpidr_el0 = m_guest_ctx.tpidr_el0; - thread_params->tpidrro_el0 = m_guest_ctx.tpidrro_el0; + thread_params->tpidr_el0 = tpidr_el0_cache; + thread_params->tpidrro_el0 = tpidrro_el0_cache; + + // Memory barrier to ensure visibility of changes + std::atomic_thread_fence(std::memory_order_release); thread_params->is_running = true; // TODO: finding and creating the post handler needs to be locked @@ -217,12 +227,19 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) { hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); } - // Unload members. - // The thread does not change, so we can persist the old reference. - m_running_thread = nullptr; - m_guest_ctx.tpidr_el0 = thread_params->tpidr_el0; - thread_params->native_context = nullptr; + // Critical section for thread cleanup + std::atomic_thread_fence(std::memory_order_acquire); + + // Cache values before releasing thread + const u64 final_tpidr_el0 = thread_params->tpidr_el0; + + // Minimize critical section thread_params->is_running = false; + thread_params->native_context = nullptr; + m_running_thread = nullptr; + + // Non-critical updates can happen after releasing the thread + m_guest_ctx.tpidr_el0 = final_tpidr_el0; // Return the halt reason. return hr; @@ -365,15 +382,40 @@ void ArmNce::SignalInterrupt(Kernel::KThread* thread) { } } -void ArmNce::ClearInstructionCache() { - // TODO: This is not possible to implement correctly on Linux because - // we do not have any access to ic iallu. +const std::size_t CACHE_PAGE_SIZE = 4096; - // Require accesses to complete. - std::atomic_thread_fence(std::memory_order_seq_cst); +void ArmNce::ClearInstructionCache() { +#if defined(__GNUC__) || defined(__clang__) + void* start = (void*)((uintptr_t)__builtin_return_address(0) & ~(CACHE_PAGE_SIZE - 1)); + void* end = + (void*)((uintptr_t)start + CACHE_PAGE_SIZE * 2); // Clear two pages for better coverage + // Prefetch next likely pages + __builtin_prefetch((void*)((uintptr_t)end), 1, 3); + __builtin___clear_cache(static_cast(start), static_cast(end)); +#endif +#ifdef __aarch64__ + // Ensure all previous memory operations complete + asm volatile("dmb ish" ::: "memory"); + asm volatile("dsb ish" ::: "memory"); + asm volatile("isb" ::: "memory"); +#endif } void ArmNce::InvalidateCacheRange(u64 addr, std::size_t size) { + #if defined(__GNUC__) || defined(__clang__) + // Align the start address to cache line boundary for better performance + const size_t CACHE_LINE_SIZE = 64; + addr &= ~(CACHE_LINE_SIZE - 1); + + // Round up size to nearest cache line + size = (size + CACHE_LINE_SIZE - 1) & ~(CACHE_LINE_SIZE - 1); + + // Prefetch the range to be invalidated + for (size_t offset = 0; offset < size; offset += CACHE_LINE_SIZE) { + __builtin_prefetch((void*)(addr + offset), 1, 3); + } + #endif + this->ClearInstructionCache(); } diff --git a/src/core/arm/nce/interpreter_visitor.cpp b/src/core/arm/nce/interpreter_visitor.cpp index def888d153..bbe0289f8e 100644 --- a/src/core/arm/nce/interpreter_visitor.cpp +++ b/src/core/arm/nce/interpreter_visitor.cpp @@ -7,6 +7,13 @@ namespace Core { +namespace { +// Prefetch tuning parameters +constexpr size_t CACHE_LINE_SIZE = 64; +constexpr size_t PREFETCH_STRIDE = 128; // 2 cache lines ahead +constexpr size_t SIMD_PREFETCH_THRESHOLD = 32; // Bytes +} // namespace + template u64 SignExtendToLong(u64 value) { u64 mask = 1ULL << (BitSize - 1); @@ -168,15 +175,15 @@ bool InterpreterVisitor::Ordered(size_t size, bool L, bool o0, Reg Rn, Reg Rt) { const auto memop = L ? MemOp::Load : MemOp::Store; const size_t elsize = 8 << size; const size_t datasize = elsize; - - // Operation const size_t dbytes = datasize / 8; - u64 address; - if (Rn == Reg::SP) { - address = this->GetSp(); + u64 address = (Rn == Reg::SP) ? this->GetSp() : this->GetReg(Rn); + + // Conservative prefetch for atomic ops + if (memop == MemOp::Load) { + __builtin_prefetch(reinterpret_cast(address), 0, 1); } else { - address = this->GetReg(Rn); + __builtin_prefetch(reinterpret_cast(address), 1, 1); } switch (memop) { @@ -197,7 +204,6 @@ bool InterpreterVisitor::Ordered(size_t size, bool L, bool o0, Reg Rn, Reg Rt) { default: UNREACHABLE(); } - return true; } @@ -407,11 +413,11 @@ bool InterpreterVisitor::RegisterImmediate(bool wback, bool postindex, size_t sc MemOp memop; bool signed_ = false; size_t regsize = 0; + const size_t datasize = 8 << scale; if (opc.Bit<1>() == 0) { memop = opc.Bit<0>() ? MemOp::Load : MemOp::Store; regsize = size == 0b11 ? 64 : 32; - signed_ = false; } else if (size == 0b11) { memop = MemOp::Prefetch; ASSERT(!opc.Bit<0>()); @@ -422,26 +428,25 @@ bool InterpreterVisitor::RegisterImmediate(bool wback, bool postindex, size_t sc signed_ = true; } - if (memop == MemOp::Load && wback && Rn == Rt && Rn != Reg::R31) { - // Unpredictable instruction - return false; - } - if (memop == MemOp::Store && wback && Rn == Rt && Rn != Reg::R31) { - // Unpredictable instruction - return false; - } - - u64 address; - if (Rn == Reg::SP) { - address = this->GetSp(); - } else { - address = this->GetReg(Rn); - } - if (!postindex) { + u64 address = (Rn == Reg::SP) ? this->GetSp() : this->GetReg(Rn); + if (!postindex) address += offset; + + // Optimized prefetch for loads + if (memop == MemOp::Load) { + const size_t access_size = datasize / 8; + const bool is_aligned = (address % access_size) == 0; + + if (is_aligned) { + __builtin_prefetch(reinterpret_cast(address), 0, 3); + if (access_size >= 8 && access_size <= 32) { + __builtin_prefetch(reinterpret_cast(address + PREFETCH_STRIDE), 0, 3); + } + } else { + __builtin_prefetch(reinterpret_cast(address), 0, 1); + } } - const size_t datasize = 8 << scale; switch (memop) { case MemOp::Store: { u64 data = this->GetReg(Rt); @@ -459,22 +464,17 @@ bool InterpreterVisitor::RegisterImmediate(bool wback, bool postindex, size_t sc break; } case MemOp::Prefetch: - // this->Prefetch(address, Rt) break; } if (wback) { - if (postindex) { + if (postindex) address += offset; - } - - if (Rn == Reg::SP) { + if (Rn == Reg::SP) this->SetSp(address); - } else { + else this->SetReg(Rn, address); - } } - return true; } @@ -509,16 +509,17 @@ bool InterpreterVisitor::STURx_LDURx(Imm<2> size, Imm<2> opc, Imm<9> imm9, Reg R bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, u64 offset, MemOp memop, Reg Rn, Vec Vt) { const size_t datasize = 8 << scale; - - u64 address; - if (Rn == Reg::SP) { - address = this->GetSp(); - } else { - address = this->GetReg(Rn); - } - - if (!postindex) { + u64 address = (Rn == Reg::SP) ? this->GetSp() : this->GetReg(Rn); + if (!postindex) address += offset; + + // Aggressive prefetch for SIMD + if (memop == MemOp::Load) { + __builtin_prefetch(reinterpret_cast(address), 0, 3); + __builtin_prefetch(reinterpret_cast(address + CACHE_LINE_SIZE), 0, 3); + if (datasize >= SIMD_PREFETCH_THRESHOLD) { + __builtin_prefetch(reinterpret_cast(address + PREFETCH_STRIDE), 0, 3); + } } switch (memop) { @@ -538,17 +539,13 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale, } if (wback) { - if (postindex) { + if (postindex) address += offset; - } - - if (Rn == Reg::SP) { + if (Rn == Reg::SP) this->SetSp(address); - } else { + else this->SetReg(Rn, address); - } } - return true; } @@ -795,30 +792,22 @@ bool InterpreterVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3 std::optional MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context, fpsimd_context* fpsimd_context) { - // Construct the interpreter. std::span regs(reinterpret_cast(context->regs), 31); std::span vregs(reinterpret_cast(fpsimd_context->vregs), 32); u64& sp = *reinterpret_cast(&context->sp); const u64& pc = *reinterpret_cast(&context->pc); InterpreterVisitor visitor(memory, regs, vregs, sp, pc); - - // Read the instruction at the program counter. u32 instruction = memory.Read32(pc); bool was_executed = false; - // Interpret the instruction. if (auto decoder = Dynarmic::A64::Decode(instruction)) { was_executed = decoder->get().call(visitor, instruction); } else { LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction); } - if (was_executed) { - return pc + 4; - } - - return std::nullopt; + return was_executed ? std::optional(pc + 4) : std::nullopt; } } // namespace Core diff --git a/src/core/arm/nce/lru_cache.h b/src/core/arm/nce/lru_cache.h new file mode 100644 index 0000000000..4150e76a69 --- /dev/null +++ b/src/core/arm/nce/lru_cache.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include + +template +class LRUCache { +public: + explicit LRUCache(size_t capacity) : capacity(capacity) { + cache_map.reserve(capacity); + } + + // Returns pointer to value if found, nullptr otherwise + ValueType* get(const KeyType& key) { + auto it = cache_map.find(key); + if (it == cache_map.end()) { + return nullptr; + } + + // Move the accessed item to the front of the list (most recently used) + cache_list.splice(cache_list.begin(), cache_list, it->second.first); + return &(it->second.second); + } + + // Returns pointer to value if found (without promoting it), nullptr otherwise + ValueType* peek(const KeyType& key) const { + auto it = cache_map.find(key); + return it != cache_map.end() ? &(it->second.second) : nullptr; + } + + // Inserts or updates a key-value pair + void put(const KeyType& key, const ValueType& value) { + auto it = cache_map.find(key); + + if (it != cache_map.end()) { + // Key exists, update value and move to front + it->second.second = value; + cache_list.splice(cache_list.begin(), cache_list, it->second.first); + return; + } + + // Remove the least recently used item if cache is full + if (cache_map.size() >= capacity) { + auto last = cache_list.back(); + cache_map.erase(last); + cache_list.pop_back(); + } + + // Insert new item at the front + cache_list.push_front(key); + cache_map[key] = {cache_list.begin(), value}; + } + + // Attempts to get value, returns std::nullopt if not found + std::optional try_get(const KeyType& key) { + auto* val = get(key); + return val ? std::optional(*val) : std::nullopt; + } + + // Checks if key exists in cache + bool contains(const KeyType& key) const { + return cache_map.find(key) != cache_map.end(); + } + + // Removes a key from the cache if it exists + bool erase(const KeyType& key) { + auto it = cache_map.find(key); + if (it == cache_map.end()) { + return false; + } + + cache_list.erase(it->second.first); + cache_map.erase(it); + return true; + } + + // Removes all elements from the cache + void clear() { + cache_map.clear(); + cache_list.clear(); + } + + // Returns current number of elements in cache + size_t size() const { + return cache_map.size(); + } + + // Returns maximum capacity of cache + size_t get_capacity() const { + return capacity; + } + + // Resizes the cache, evicting LRU items if new capacity is smaller + void resize(size_t new_capacity) { + capacity = new_capacity; + while (cache_map.size() > capacity) { + auto last = cache_list.back(); + cache_map.erase(last); + cache_list.pop_back(); + } + cache_map.reserve(capacity); + } + +private: + size_t capacity; + std::list cache_list; + std::unordered_map::iterator, ValueType>> cache_map; +}; \ No newline at end of file diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h index a44f385e2e..21ea7fd2a1 100644 --- a/src/core/arm/nce/patcher.h +++ b/src/core/arm/nce/patcher.h @@ -13,6 +13,7 @@ #include "core/hle/kernel/code_set.h" #include "core/hle/kernel/k_typed_address.h" #include "core/hle/kernel/physical_memory.h" +#include "lru_cache.h" namespace Core::NCE { @@ -60,8 +61,20 @@ private: void WriteCntpctHandler(ModuleDestLabel module_dest, oaknut::XReg dest_reg); private: + static constexpr size_t CACHE_SIZE = 1024; // Cache size for patch entries + LRUCache patch_cache{CACHE_SIZE}; + void BranchToPatch(uintptr_t module_dest) { - curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); + // Try to get existing patch entry from cache + if (auto* cached_patch = patch_cache.get(module_dest)) { + curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch}); + return; + } + + // If not in cache, create new entry and cache it + const auto patch_addr = c.offset(); + curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest}); + patch_cache.put(module_dest, patch_addr); } void BranchToModule(uintptr_t module_dest) { diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 1abfa920c4..3d67ab9f5c 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -14,6 +14,7 @@ #include "common/x64/cpu_wait.h" #endif +#include "common/settings.h" #include "common/microprofile.h" #include "core/core_timing.h" #include "core/hardware_properties.h" @@ -184,10 +185,20 @@ void CoreTiming::ResetTicks() { } u64 CoreTiming::GetClockTicks() const { + u64 fres; if (is_multicore) [[likely]] { - return clock->GetCNTPCT(); + fres = clock->GetCNTPCT(); + } else { + fres = Common::WallClock::CPUTickToCNTPCT(cpu_ticks); + } + + if (Settings::values.sync_core_speed.GetValue()) { + const double ticks = static_cast(fres); + const double speed_limit = static_cast(Settings::values.speed_limit.GetValue())*0.01; + return static_cast(ticks/speed_limit); + } else { + return fres; } - return Common::WallClock::CPUTickToCNTPCT(cpu_ticks); } u64 CoreTiming::GetGPUTicks() const { diff --git a/src/core/hle/api_version.h b/src/core/hle/api_version.h index bd15606e13..20e5c7e459 100644 --- a/src/core/hle/api_version.h +++ b/src/core/hle/api_version.h @@ -11,9 +11,9 @@ namespace HLE::ApiVersion { // Horizon OS version constants. -constexpr u8 HOS_VERSION_MAJOR = 12; -constexpr u8 HOS_VERSION_MINOR = 1; -constexpr u8 HOS_VERSION_MICRO = 0; +constexpr u8 HOS_VERSION_MAJOR = 19; +constexpr u8 HOS_VERSION_MINOR = 0; +constexpr u8 HOS_VERSION_MICRO = 1; // NintendoSDK version constants. @@ -21,9 +21,9 @@ constexpr u8 SDK_REVISION_MAJOR = 1; constexpr u8 SDK_REVISION_MINOR = 0; constexpr char PLATFORM_STRING[] = "NX"; -constexpr char VERSION_HASH[] = "76b10c2dab7d3aa73fc162f8dff1655e6a21caf4"; -constexpr char DISPLAY_VERSION[] = "12.1.0"; -constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 12.1.0-1.0"; +constexpr char VERSION_HASH[] = "835c78223df116284ef7e36e8441760edc81729c"; +constexpr char DISPLAY_VERSION[] = "19.0.1"; +constexpr char DISPLAY_TITLE[] = "NintendoSDK Firmware for NX 19.0.1-1.0"; // Atmosphere version constants. diff --git a/src/core/hle/service/am/process_creation.cpp b/src/core/hle/service/am/process_creation.cpp index b5e31353a2..aaa03c4c39 100644 --- a/src/core/hle/service/am/process_creation.cpp +++ b/src/core/hle/service/am/process_creation.cpp @@ -60,29 +60,24 @@ std::unique_ptr CreateProcessImpl(std::unique_ptr& o } // Anonymous namespace std::unique_ptr CreateProcess(Core::System& system, u64 program_id, - u8 minimum_key_generation, u8 maximum_key_generation) { - // Attempt to load program NCA. - FileSys::VirtualFile nca_raw{}; + u8 minimum_key_generation, u8 maximum_key_generation) { + FileSys::VirtualFile nca_raw = system.GetContentProviderUnion() + .GetEntryRaw(program_id, FileSys::ContentRecordType::Program); - // Get the program NCA from storage. - auto& storage = system.GetContentProviderUnion(); - nca_raw = storage.GetEntryRaw(program_id, FileSys::ContentRecordType::Program); - - // Ensure we retrieved a program NCA. if (!nca_raw) { return nullptr; } - // Ensure we have a suitable version. - if (minimum_key_generation > 0) { - FileSys::NCA nca(nca_raw); - if (nca.GetStatus() == Loader::ResultStatus::Success && - (nca.GetKeyGeneration() < minimum_key_generation || - nca.GetKeyGeneration() > maximum_key_generation)) { - LOG_WARNING(Service_LDR, "Skipping program {:016X} with generation {}", program_id, - nca.GetKeyGeneration()); - return nullptr; - } + FileSys::NCA nca(nca_raw); + if (nca.GetStatus() != Loader::ResultStatus::Success) { + return nullptr; + } + + u8 current_gen = nca.GetKeyGeneration(); + if (minimum_key_generation > 0 && (current_gen < minimum_key_generation || + current_gen > maximum_key_generation)) { + LOG_WARNING(Service_LDR, "Program {:016X} has unsupported generation {}. " + "Attempting to load anyway...", program_id, current_gen); } std::unique_ptr loader; diff --git a/src/core/hle/service/friend/friend.cpp b/src/core/hle/service/friend/friend.cpp index 794c54c211..84c73e9d6f 100644 --- a/src/core/hle/service/friend/friend.cpp +++ b/src/core/hle/service/friend/friend.cpp @@ -273,9 +273,10 @@ private: LOG_DEBUG(Service_Friend, "(STUBBED) called"); - IPC::ResponseBuilder rb{ctx, 2}; + IPC::ResponseBuilder rb{ctx, 4}; rb.Push(ResultSuccess); rb.Push(0); + rb.Push(0); } void GetUserPresenceView(HLERequestContext& ctx) { diff --git a/src/core/hle/service/vi/application_display_service.cpp b/src/core/hle/service/vi/application_display_service.cpp index 6b0bcb5362..289ad7073c 100644 --- a/src/core/hle/service/vi/application_display_service.cpp +++ b/src/core/hle/service/vi/application_display_service.cpp @@ -85,11 +85,25 @@ Result IApplicationDisplayService::GetIndirectDisplayTransactionService( } Result IApplicationDisplayService::OpenDisplay(Out out_display_id, DisplayName display_name) { - LOG_WARNING(Service_VI, "(STUBBED) called"); + LOG_DEBUG(Service_VI, "called with display_name={}", display_name.data()); + // Ensure the display name is null-terminated display_name[display_name.size() - 1] = '\0'; - ASSERT_MSG(strcmp(display_name.data(), "Default") == 0, - "Non-default displays aren't supported yet"); + + // According to switchbrew, only "Default", "External", "Edid", "Internal" and "Null" are valid + const std::array valid_names = { + "Default", "External", "Edid", "Internal", "Null" + }; + + bool valid_name = false; + for (const auto& name : valid_names) { + if (name == display_name.data()) { + valid_name = true; + break; + } + } + + R_UNLESS(valid_name, ResultOperationFailed); R_RETURN(m_container->OpenDisplay(out_display_id, display_name)); } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp index ed6d96f70b..47babcdc07 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/backend/glasm/emit_glasm_instructions.h" #include "shader_recompiler/backend/glasm/glasm_emit_context.h" #include "shader_recompiler/frontend/ir/value.h" +#include "shader_recompiler/runtime_info.h" #include "shader_recompiler/profile.h" #include "shader_recompiler/shader_info.h" @@ -406,6 +407,10 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { case Stage::TessellationEval: ctx.Add("SHL.U {}.x,primitive.vertexcount,16;", inst); break; + case Stage::Geometry: + ctx.Add("SHL.U {}.x,{},16;", inst, + InputTopologyVertices::vertices(ctx.runtime_info.input_topology)); + break; default: LOG_WARNING(Shader, "(STUBBED) called"); ctx.Add("MOV.S {}.x,0x00ff0000;", inst); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index d575f6e335..ffe5cd116c 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -426,6 +426,10 @@ void EmitInvocationInfo(EmitContext& ctx, IR::Inst& inst) { case Stage::TessellationEval: ctx.AddU32("{}=uint(gl_PatchVerticesIn)<<16;", inst); break; + case Stage::Geometry: + ctx.AddU32("{}=uint({}<<16);", inst, + InputTopologyVertices::vertices(ctx.runtime_info.input_topology)); + break; default: LOG_WARNING(Shader, "(STUBBED) called"); ctx.AddU32("{}=uint(0x00ff0000);", inst); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index 4d5f32b776..be65db7657 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -547,8 +547,9 @@ Id EmitInvocationInfo(EmitContext& ctx) { switch (ctx.stage) { case Stage::TessellationControl: case Stage::TessellationEval: - return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in), - ctx.Const(16u)); + return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.OpLoad(ctx.U32[1], ctx.patch_vertices_in), ctx.Const(16u)); + case Stage::Geometry: + return ctx.OpShiftLeftLogical(ctx.U32[1], ctx.Const(InputTopologyVertices::vertices(ctx.runtime_info.input_topology)), ctx.Const(16u)); default: LOG_WARNING(Shader, "(STUBBED) called"); return ctx.Const(0x00ff0000u); diff --git a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp index 0cea799455..2d4feca02c 100644 --- a/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp +++ b/src/shader_recompiler/ir_opt/global_memory_to_storage_buffer_pass.cpp @@ -372,8 +372,8 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) // avoid getting false positives static constexpr Bias nvn_bias{ .index = 0, - .offset_begin = 0x110, - .offset_end = 0x610, + .offset_begin = 0x100, + .offset_end = 0x700, .alignment = 16, }; // Track the low address of the instruction diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 619c0b1387..dc54d932a6 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -30,6 +30,24 @@ enum class InputTopology { TrianglesAdjacency, }; +struct InputTopologyVertices { + static u32 vertices(InputTopology input_topology) { + switch (input_topology) { + case InputTopology::Lines: + return 2; + case InputTopology::LinesAdjacency: + return 4; + case InputTopology::Triangles: + return 3; + case InputTopology::TrianglesAdjacency: + return 6; + case InputTopology::Points: + default: + return 1; + } + } +}; + enum class CompareFunction { Never, Less, diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6c0dda296e..ccbcc2341f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: 2018 yuzu Emulator Project +# SPDX-FileCopyrightText: 2025 Citron Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later add_subdirectory(host_shaders) @@ -245,6 +246,8 @@ add_library(video_core STATIC renderer_vulkan/vk_turbo_mode.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + renderer_vulkan/vk_texture_manager.cpp + renderer_vulkan/vk_texture_manager.h shader_cache.cpp shader_cache.h shader_environment.cpp @@ -304,6 +307,8 @@ add_library(video_core STATIC vulkan_common/vulkan_library.h vulkan_common/vulkan_memory_allocator.cpp vulkan_common/vulkan_memory_allocator.h + vulkan_common/hybrid_memory.cpp + vulkan_common/hybrid_memory.h vulkan_common/vulkan_surface.cpp vulkan_common/vulkan_surface.h vulkan_common/vulkan_wrapper.cpp diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 969f21d509..7e808780d8 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: 2018 yuzu Emulator Project +# SPDX-FileCopyrightText: 2025 citron Emulator Project # SPDX-License-Identifier: GPL-2.0-or-later set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) @@ -18,6 +19,7 @@ set(SHADER_FILES blit_color_float.frag block_linear_unswizzle_2d.comp block_linear_unswizzle_3d.comp + convert_abgr8_srgb_to_d24s8.frag convert_abgr8_to_d24s8.frag convert_abgr8_to_d32f.frag convert_d32f_to_abgr8.frag @@ -68,6 +70,14 @@ set(SHADER_FILES vulkan_quad_indexed.comp vulkan_turbo_mode.comp vulkan_uint8.comp + convert_rgba8_to_bgra8.frag + convert_yuv420_to_rgb.comp + convert_rgb_to_yuv420.comp + convert_bc7_to_rgba8.comp + convert_astc_hdr_to_rgba16f.comp + convert_rgba16f_to_rgba8.frag + dither_temporal.frag + dynamic_resolution_scale.comp ) find_program(GLSLANGVALIDATOR "glslangValidator") diff --git a/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag b/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag new file mode 100644 index 0000000000..96c08aae4d --- /dev/null +++ b/src/video_core/host_shaders/convert_abgr8_srgb_to_d24s8.frag @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D color_texture; + +// More accurate sRGB to linear conversion +float srgbToLinear(float srgb) { + if (srgb <= 0.04045) { + return srgb / 12.92; + } else { + return pow((srgb + 0.055) / 1.055, 2.4); + } +} + +void main() { + ivec2 coord = ivec2(gl_FragCoord.xy); + vec4 srgbColor = texelFetch(color_texture, coord, 0); + + // Convert sRGB to linear space with proper gamma correction + vec3 linearColor = vec3( + srgbToLinear(srgbColor.r), + srgbToLinear(srgbColor.g), + srgbToLinear(srgbColor.b) + ); + + // Use standard luminance coefficients + float luminance = dot(linearColor, vec3(0.2126, 0.7152, 0.0722)); + + // Ensure proper depth range + luminance = clamp(luminance, 0.0, 1.0); + + // Convert to 24-bit depth value + uint depth_val = uint(luminance * float(0xFFFFFF)); + + // Extract 8-bit stencil from alpha + uint stencil_val = uint(srgbColor.a * 255.0); + + // Pack values efficiently + uint depth_stencil = (stencil_val << 24) | (depth_val & 0x00FFFFFF); + + gl_FragDepth = float(depth_val) / float(0xFFFFFF); + gl_FragStencilRefARB = int(stencil_val); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp b/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp new file mode 100644 index 0000000000..8d4b1825b1 --- /dev/null +++ b/src/video_core/host_shaders/convert_astc_hdr_to_rgba16f.comp @@ -0,0 +1,28 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform samplerBuffer astc_data; +layout(binding = 1, rgba16f) uniform writeonly image2D output_image; + +// Note: This is a simplified version. Real ASTC HDR decompression is more complex +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = imageSize(output_image); + + if (pos.x >= size.x || pos.y >= size.y) { + return; + } + + // Calculate block and pixel within block + ivec2 block = pos / 8; // Assuming 8x8 ASTC blocks + ivec2 pixel = pos % 8; + + // Each ASTC block is 16 bytes + int block_index = block.y * (size.x / 8) + block.x; + + // Simplified ASTC HDR decoding - you'll need to implement full ASTC decoding + vec4 color = texelFetch(astc_data, block_index * 8 + pixel.y * 8 + pixel.x); + + imageStore(output_image, pos, color); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_bc7_to_rgba8.comp b/src/video_core/host_shaders/convert_bc7_to_rgba8.comp new file mode 100644 index 0000000000..a0842e175a --- /dev/null +++ b/src/video_core/host_shaders/convert_bc7_to_rgba8.comp @@ -0,0 +1,29 @@ +#version 450 +#extension GL_ARB_shader_ballot : require + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform samplerBuffer bc7_data; +layout(binding = 1, rgba8) uniform writeonly image2D output_image; + +// Note: This is a simplified version. Real BC7 decompression is more complex +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = imageSize(output_image); + + if (pos.x >= size.x || pos.y >= size.y) { + return; + } + + // Calculate block and pixel within block + ivec2 block = pos / 4; + ivec2 pixel = pos % 4; + + // Each BC7 block is 16 bytes + int block_index = block.y * (size.x / 4) + block.x; + + // Simplified BC7 decoding - you'll need to implement full BC7 decoding + vec4 color = texelFetch(bc7_data, block_index * 4 + pixel.y * 4 + pixel.x); + + imageStore(output_image, pos, color); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgb_to_yuv420.comp b/src/video_core/host_shaders/convert_rgb_to_yuv420.comp new file mode 100644 index 0000000000..0a5cfab390 --- /dev/null +++ b/src/video_core/host_shaders/convert_rgb_to_yuv420.comp @@ -0,0 +1,29 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform sampler2D input_texture; +layout(binding = 1, r8) uniform writeonly image2D y_output; +layout(binding = 2, r8) uniform writeonly image2D u_output; +layout(binding = 3, r8) uniform writeonly image2D v_output; + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = imageSize(y_output); + + if (pos.x >= size.x || pos.y >= size.y) { + return; + } + + vec2 tex_coord = vec2(pos) / vec2(size); + vec3 rgb = texture(input_texture, tex_coord).rgb; + + // RGB to YUV conversion + float y = 0.299 * rgb.r + 0.587 * rgb.g + 0.114 * rgb.b; + float u = -0.147 * rgb.r - 0.289 * rgb.g + 0.436 * rgb.b + 0.5; + float v = 0.615 * rgb.r - 0.515 * rgb.g - 0.100 * rgb.b + 0.5; + + imageStore(y_output, pos, vec4(y)); + imageStore(u_output, pos / 2, vec4(u)); + imageStore(v_output, pos / 2, vec4(v)); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag b/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag new file mode 100644 index 0000000000..9e430f5047 --- /dev/null +++ b/src/video_core/host_shaders/convert_rgba16f_to_rgba8.frag @@ -0,0 +1,31 @@ +#version 450 + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +layout(binding = 0) uniform sampler2D input_texture; + +layout(push_constant) uniform PushConstants { + float exposure; + float gamma; +} constants; + +vec3 tonemap(vec3 hdr) { + // Reinhard tonemapping + return hdr / (hdr + vec3(1.0)); +} + +void main() { + vec4 hdr = texture(input_texture, texcoord); + + // Apply exposure + vec3 exposed = hdr.rgb * constants.exposure; + + // Tonemap + vec3 tonemapped = tonemap(exposed); + + // Gamma correction + vec3 gamma_corrected = pow(tonemapped, vec3(1.0 / constants.gamma)); + + color = vec4(gamma_corrected, hdr.a); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag b/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag new file mode 100644 index 0000000000..6f7d247985 --- /dev/null +++ b/src/video_core/host_shaders/convert_rgba8_to_bgra8.frag @@ -0,0 +1,11 @@ +#version 450 + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +layout(binding = 0) uniform sampler2D input_texture; + +void main() { + vec4 rgba = texture(input_texture, texcoord); + color = rgba.bgra; // Swap red and blue channels +} \ No newline at end of file diff --git a/src/video_core/host_shaders/convert_yuv420_to_rgb.comp b/src/video_core/host_shaders/convert_yuv420_to_rgb.comp new file mode 100644 index 0000000000..b1f1536f80 --- /dev/null +++ b/src/video_core/host_shaders/convert_yuv420_to_rgb.comp @@ -0,0 +1,30 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform sampler2D y_texture; +layout(binding = 1) uniform sampler2D u_texture; +layout(binding = 2) uniform sampler2D v_texture; +layout(binding = 3, rgba8) uniform writeonly image2D output_image; + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = imageSize(output_image); + + if (pos.x >= size.x || pos.y >= size.y) { + return; + } + + vec2 tex_coord = vec2(pos) / vec2(size); + float y = texture(y_texture, tex_coord).r; + float u = texture(u_texture, tex_coord).r - 0.5; + float v = texture(v_texture, tex_coord).r - 0.5; + + // YUV to RGB conversion + vec3 rgb; + rgb.r = y + 1.402 * v; + rgb.g = y - 0.344 * u - 0.714 * v; + rgb.b = y + 1.772 * u; + + imageStore(output_image, pos, vec4(rgb, 1.0)); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/dither_temporal.frag b/src/video_core/host_shaders/dither_temporal.frag new file mode 100644 index 0000000000..feaddc9aaf --- /dev/null +++ b/src/video_core/host_shaders/dither_temporal.frag @@ -0,0 +1,29 @@ +#version 450 + +layout(location = 0) in vec2 texcoord; +layout(location = 0) out vec4 color; + +layout(binding = 0) uniform sampler2D input_texture; + +layout(push_constant) uniform PushConstants { + float frame_count; + float dither_strength; +} constants; + +// Pseudo-random number generator +float rand(vec2 co) { + return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453); +} + +void main() { + vec4 input_color = texture(input_texture, texcoord); + + // Generate temporal noise based on frame count + vec2 noise_coord = gl_FragCoord.xy + vec2(constants.frame_count); + float noise = rand(noise_coord) * 2.0 - 1.0; + + // Apply dithering + vec3 dithered = input_color.rgb + noise * constants.dither_strength; + + color = vec4(dithered, input_color.a); +} \ No newline at end of file diff --git a/src/video_core/host_shaders/dynamic_resolution_scale.comp b/src/video_core/host_shaders/dynamic_resolution_scale.comp new file mode 100644 index 0000000000..88f0a41c1c --- /dev/null +++ b/src/video_core/host_shaders/dynamic_resolution_scale.comp @@ -0,0 +1,68 @@ +#version 450 + +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform sampler2D input_texture; +layout(binding = 1, rgba8) uniform writeonly image2D output_image; + +layout(push_constant) uniform PushConstants { + vec2 scale_factor; + vec2 input_size; +} constants; + +vec4 cubic(float v) { + vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; + vec4 s = n * n * n; + float x = s.x; + float y = s.y - 4.0 * s.x; + float z = s.z - 4.0 * s.y + 6.0 * s.x; + float w = s.w - 4.0 * s.z + 6.0 * s.y - 4.0 * s.x; + return vec4(x, y, z, w) * (1.0/6.0); +} + +vec4 bicubic_sample(sampler2D tex, vec2 tex_coord) { + vec2 tex_size = constants.input_size; + vec2 inv_tex_size = 1.0 / tex_size; + + tex_coord = tex_coord * tex_size - 0.5; + + vec2 fxy = fract(tex_coord); + tex_coord -= fxy; + + vec4 xcubic = cubic(fxy.x); + vec4 ycubic = cubic(fxy.y); + + vec4 c = tex_coord.xxyy + vec2(-0.5, +1.5).xyxy; + vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); + vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; + + offset *= inv_tex_size.xxyy; + + vec4 sample0 = texture(tex, offset.xz); + vec4 sample1 = texture(tex, offset.yz); + vec4 sample2 = texture(tex, offset.xw); + vec4 sample3 = texture(tex, offset.yw); + + float sx = s.x / (s.x + s.y); + float sy = s.z / (s.z + s.w); + + return mix( + mix(sample3, sample2, sx), + mix(sample1, sample0, sx), + sy + ); +} + +void main() { + ivec2 pos = ivec2(gl_GlobalInvocationID.xy); + ivec2 size = imageSize(output_image); + + if (pos.x >= size.x || pos.y >= size.y) { + return; + } + + vec2 tex_coord = vec2(pos) / vec2(size); + vec4 color = bicubic_sample(input_texture, tex_coord); + + imageStore(output_image, pos, color); +} \ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index af0a453ee7..e25f731fea 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -1,10 +1,13 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include #include #include +#include +#include #include "common/settings.h" // for enum class Settings::ShaderBackend #include "common/thread_worker.h" @@ -234,26 +237,68 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c auto func{[this, sources_ = std::move(sources), sources_spirv_ = std::move(sources_spirv), shader_notify, backend, in_parallel, force_context_flush](ShaderContext::Context*) mutable { + // Track time for shader compilation for possible performance tuning + const auto start_time = std::chrono::high_resolution_clock::now(); + + // Prepare compilation steps for all shader stages + std::vector> compilation_steps; + compilation_steps.reserve(5); // Maximum number of shader stages + + // Prepare all compilation steps first to better distribute work for (size_t stage = 0; stage < 5; ++stage) { switch (backend) { case Settings::ShaderBackend::Glsl: if (!sources_[stage].empty()) { - source_programs[stage] = CreateProgram(sources_[stage], Stage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { + source_programs[stage] = CreateProgram(source, Stage(stage)); + }); } break; case Settings::ShaderBackend::Glasm: if (!sources_[stage].empty()) { - assembly_programs[stage] = - CompileProgram(sources_[stage], AssemblyStage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_[stage]]() { + assembly_programs[stage] = CompileProgram(source, AssemblyStage(stage)); + }); } break; case Settings::ShaderBackend::SpirV: if (!sources_spirv_[stage].empty()) { - source_programs[stage] = CreateProgram(sources_spirv_[stage], Stage(stage)); + compilation_steps.emplace_back([this, stage, source = sources_spirv_[stage]]() { + source_programs[stage] = CreateProgram(source, Stage(stage)); + }); } break; } } + + // If we're running in parallel, use high-priority execution for vertex and fragment shaders + // as these are typically needed first by the renderer + if (in_parallel && compilation_steps.size() > 1) { + // Execute vertex (0) and fragment (4) shaders first if they exist + for (size_t priority_stage : {0, 4}) { + for (size_t i = 0; i < compilation_steps.size(); ++i) { + if ((i == priority_stage || (priority_stage == 0 && i <= 1)) && i < compilation_steps.size()) { + compilation_steps[i](); + compilation_steps[i] = [](){}; // Mark as executed + } + } + } + } + + // Execute all remaining compilation steps + for (auto& step : compilation_steps) { + step(); // Will do nothing for already executed steps + } + + // Performance measurement for possible logging or optimization + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + if (compilation_time > 50) { // Only log slow compilations + LOG_DEBUG(Render_OpenGL, "Shader compilation took {}ms", compilation_time); + } + if (force_context_flush || in_parallel) { std::scoped_lock lock{built_mutex}; built_fence.Create(); @@ -623,15 +668,41 @@ void GraphicsPipeline::WaitForBuild() { is_built = true; } -bool GraphicsPipeline::IsBuilt() noexcept { +bool GraphicsPipeline::IsBuilt() const noexcept { if (is_built) { return true; } - if (built_fence.handle == 0) { + if (!built_fence.handle) { return false; } - is_built = built_fence.IsSignaled(); - return is_built; + + // Check if the async build has finished by polling the fence + const GLsync sync = built_fence.handle; + const GLuint result = glClientWaitSync(sync, 0, 0); + if (result == GL_ALREADY_SIGNALED || result == GL_CONDITION_SATISFIED) { + // Mark this as mutable even though we're in a const method - this is + // essentially a cached value update which is acceptable + const_cast(this)->is_built = true; + return true; + } + + // For better performance tracking, capture time spent waiting for shaders + static thread_local std::chrono::high_resolution_clock::time_point last_shader_wait_log; + static thread_local u32 shader_wait_count = 0; + + auto now = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast( + now - last_shader_wait_log).count(); + + // Log shader compilation status periodically to help diagnose performance issues + if (elapsed >= 5) { // Log every 5 seconds + shader_wait_count++; + LOG_DEBUG(Render_OpenGL, "Waiting for async shader compilation... (count={})", + shader_wait_count); + last_shader_wait_log = now; + } + + return false; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 2f70c1ae9c..5852c02893 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -102,7 +103,7 @@ public: return uses_local_memory; } - [[nodiscard]] bool IsBuilt() noexcept; + [[nodiscard]] bool IsBuilt() const noexcept; template static auto MakeConfigureSpecFunc() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c4bad6fca5..a99992a518 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -608,9 +609,33 @@ std::unique_ptr ShaderCache::CreateComputePipeline( } std::unique_ptr ShaderCache::CreateWorkers() const { - return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, - "GlShaderBuilder", - [this] { return Context{emu_window}; }); + // Calculate optimal number of workers based on available CPU cores + // Leave at least 1 core for main thread and other operations + // Use more cores for more parallelism in shader compilation + const u32 num_worker_threads = std::max(std::thread::hardware_concurrency(), 2U); + const u32 optimal_workers = num_worker_threads <= 3 ? + num_worker_threads - 1 : // On dual/quad core, leave 1 core free + num_worker_threads - 2; // On 6+ core systems, leave 2 cores free for other tasks + + auto worker = std::make_unique( + optimal_workers, + "GlShaderBuilder", + [this] { + auto context = Context{emu_window}; + + // Apply thread priority based on settings + // This allows users to control how aggressive shader compilation is + const int priority = Settings::values.shader_compilation_priority.GetValue(); + if (priority != 0) { + Common::SetCurrentThreadPriority( + priority > 0 ? Common::ThreadPriority::High : Common::ThreadPriority::Low); + } + + return context; + } + ); + + return worker; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 5ac4135295..2b46c22c70 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -73,6 +73,7 @@ private: VideoCore::ShaderNotify& shader_notify; const bool use_asynchronous_shaders; const bool strict_context_required; + bool optimize_spirv_output{}; GraphicsPipelineKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index c3db09424e..cf8c5454cc 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -28,6 +29,15 @@ #include "video_core/surface.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#include "video_core/host_shaders/convert_abgr8_srgb_to_d24s8_frag_spv.h" +#include "video_core/host_shaders/convert_rgba8_to_bgra8_frag_spv.h" +#include "video_core/host_shaders/convert_yuv420_to_rgb_comp_spv.h" +#include "video_core/host_shaders/convert_rgb_to_yuv420_comp_spv.h" +#include "video_core/host_shaders/convert_bc7_to_rgba8_comp_spv.h" +#include "video_core/host_shaders/convert_astc_hdr_to_rgba16f_comp_spv.h" +#include "video_core/host_shaders/convert_rgba16f_to_rgba8_frag_spv.h" +#include "video_core/host_shaders/dither_temporal_frag_spv.h" +#include "video_core/host_shaders/dynamic_resolution_scale_comp_spv.h" namespace Vulkan { @@ -439,6 +449,15 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, convert_d32f_to_abgr8_frag(BuildShader(device, CONVERT_D32F_TO_ABGR8_FRAG_SPV)), convert_d24s8_to_abgr8_frag(BuildShader(device, CONVERT_D24S8_TO_ABGR8_FRAG_SPV)), convert_s8d24_to_abgr8_frag(BuildShader(device, CONVERT_S8D24_TO_ABGR8_FRAG_SPV)), + convert_abgr8_srgb_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_SRGB_TO_D24S8_FRAG_SPV)), + convert_rgba_to_bgra_frag(BuildShader(device, CONVERT_RGBA8_TO_BGRA8_FRAG_SPV)), + convert_yuv420_to_rgb_comp(BuildShader(device, CONVERT_YUV420_TO_RGB_COMP_SPV)), + convert_rgb_to_yuv420_comp(BuildShader(device, CONVERT_RGB_TO_YUV420_COMP_SPV)), + convert_bc7_to_rgba8_comp(BuildShader(device, CONVERT_BC7_TO_RGBA8_COMP_SPV)), + convert_astc_hdr_to_rgba16f_comp(BuildShader(device, CONVERT_ASTC_HDR_TO_RGBA16F_COMP_SPV)), + convert_rgba16f_to_rgba8_frag(BuildShader(device, CONVERT_RGBA16F_TO_RGBA8_FRAG_SPV)), + dither_temporal_frag(BuildShader(device, DITHER_TEMPORAL_FRAG_SPV)), + dynamic_resolution_scale_comp(BuildShader(device, DYNAMIC_RESOLUTION_SCALE_COMP_SPV)), linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)), nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO)) {} @@ -589,6 +608,14 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer, ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view); } +void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline, + dst_framebuffer->RenderPass(), + convert_abgr8_srgb_to_d24s8_frag); + Convert(*convert_abgr8_srgb_to_d24s8_pipeline, dst_framebuffer, src_image_view); +} + void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, const std::array& clear_color, const Region2D& dst_region) { @@ -919,13 +946,11 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( return *clear_stencil_pipelines.back(); } -void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, - bool is_target_depth) { +void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { if (pipeline) { return; } - VkShaderModule frag_shader = - is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; + VkShaderModule frag_shader = *convert_float_to_depth_frag; const std::array stages = MakeStages(*full_screen_vert, frag_shader); pipeline = device.GetLogical().CreateGraphicsPipeline({ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, @@ -939,9 +964,8 @@ void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass rende .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, - .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO - : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO, .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, .layout = *one_texture_pipeline_layout, .renderPass = renderpass, @@ -951,12 +975,33 @@ void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass rende }); } -void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { - ConvertPipeline(pipeline, renderpass, false); -} - void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) { - ConvertPipeline(pipeline, renderpass, true); + if (pipeline) { + return; + } + VkShaderModule frag_shader = *convert_depth_to_float_frag; + const std::array stages = MakeStages(*full_screen_vert, frag_shader); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); } void BlitImageHelper::ConvertPipelineEx(vk::Pipeline& pipeline, VkRenderPass renderpass, @@ -999,4 +1044,100 @@ void BlitImageHelper::ConvertPipelineDepthTargetEx(vk::Pipeline& pipeline, VkRen ConvertPipelineEx(pipeline, renderpass, module, true, true); } +void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, + bool is_target_depth) { + if (pipeline) { + return; + } + VkShaderModule frag_shader = + is_target_depth ? *convert_float_to_depth_frag : *convert_depth_to_float_frag; + const std::array stages = MakeStages(*full_screen_vert, frag_shader); + pipeline = device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = is_target_depth ? &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO : nullptr, + .pColorBlendState = is_target_depth ? &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO + : &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *one_texture_pipeline_layout, + .renderPass = renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + }); +} + +void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_rgba_to_bgra_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_rgba_to_bgra_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_yuv420_to_rgb_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_yuv420_to_rgb_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_rgb_to_yuv420_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_rgb_to_yuv420_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_bc7_to_rgba8_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_bc7_to_rgba8_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_astc_hdr_to_rgba16f_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_astc_hdr_to_rgba16f_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(convert_rgba16f_to_rgba8_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*convert_rgba16f_to_rgba8_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(dither_temporal_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*dither_temporal_pipeline, dst_framebuffer, src_image_view); +} + +void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, + const ImageView& src_image_view) { + ConvertPipeline(dynamic_resolution_scale_pipeline, + dst_framebuffer->RenderPass(), + false); + Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index b2104a59ee..b7bc952637 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -67,6 +68,8 @@ public: void ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); void ConvertD32FToABGR8(const Framebuffer* dst_framebuffer, ImageView& src_image_view); @@ -82,6 +85,15 @@ public: u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask, const Region2D& dst_region); + void ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertYUV420toRGB(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ApplyDitherTemporal(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + void ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer, const ImageView& src_image_view); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -136,6 +148,15 @@ private: vk::ShaderModule convert_d32f_to_abgr8_frag; vk::ShaderModule convert_d24s8_to_abgr8_frag; vk::ShaderModule convert_s8d24_to_abgr8_frag; + vk::ShaderModule convert_abgr8_srgb_to_d24s8_frag; + vk::ShaderModule convert_rgba_to_bgra_frag; + vk::ShaderModule convert_yuv420_to_rgb_comp; + vk::ShaderModule convert_rgb_to_yuv420_comp; + vk::ShaderModule convert_bc7_to_rgba8_comp; + vk::ShaderModule convert_astc_hdr_to_rgba16f_comp; + vk::ShaderModule convert_rgba16f_to_rgba8_frag; + vk::ShaderModule dither_temporal_frag; + vk::ShaderModule dynamic_resolution_scale_comp; vk::Sampler linear_sampler; vk::Sampler nearest_sampler; @@ -156,6 +177,15 @@ private: vk::Pipeline convert_d32f_to_abgr8_pipeline; vk::Pipeline convert_d24s8_to_abgr8_pipeline; vk::Pipeline convert_s8d24_to_abgr8_pipeline; + vk::Pipeline convert_abgr8_srgb_to_d24s8_pipeline; + vk::Pipeline convert_rgba_to_bgra_pipeline; + vk::Pipeline convert_yuv420_to_rgb_pipeline; + vk::Pipeline convert_rgb_to_yuv420_pipeline; + vk::Pipeline convert_bc7_to_rgba8_pipeline; + vk::Pipeline convert_astc_hdr_to_rgba16f_pipeline; + vk::Pipeline convert_rgba16f_to_rgba8_pipeline; + vk::Pipeline dither_temporal_pipeline; + vk::Pipeline dynamic_resolution_scale_pipeline; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 4a2d4b23ef..2ff38226cb 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -8,6 +9,8 @@ #include #include #include +#include +#include #include @@ -33,9 +36,12 @@ #include "video_core/vulkan_common/vulkan_instance.h" #include "video_core/vulkan_common/vulkan_library.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_surface.h" #include "video_core/vulkan_common/vulkan_wrapper.h" - +#ifdef __ANDROID__ +#include +#endif namespace Vulkan { namespace { @@ -120,12 +126,93 @@ RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& emu_window, PresentFiltersForAppletCapture), rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, scheduler), + hybrid_memory(std::make_unique(device, memory_allocator)), + texture_manager(device, memory_allocator), + shader_manager(device), applet_frame() { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); } + + // Initialize HybridMemory system + if (Settings::values.use_gpu_memory_manager.GetValue()) { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + try { + // Define memory size with explicit types to avoid conversion warnings + constexpr size_t memory_size_mb = 64; + constexpr size_t memory_size_bytes = memory_size_mb * 1024 * 1024; + + void* guest_memory_base = nullptr; +#if defined(_WIN32) + // On Windows, use VirtualAlloc to reserve (but not commit) memory + const SIZE_T win_size = static_cast(memory_size_bytes); + LPVOID result = VirtualAlloc(nullptr, win_size, MEM_RESERVE, PAGE_NOACCESS); + if (result != nullptr) { + guest_memory_base = result; + } +#else + // On Linux/Android, use aligned_alloc + guest_memory_base = std::aligned_alloc(4096, memory_size_bytes); +#endif + if (guest_memory_base != nullptr) { + try { + hybrid_memory->InitializeGuestMemory(guest_memory_base, memory_size_bytes); + LOG_INFO(Render_Vulkan, "HybridMemory initialized with {} MB of fault-managed memory", memory_size_mb); + } catch (const std::exception&) { +#if defined(_WIN32) + if (guest_memory_base != nullptr) { + const LPVOID win_ptr = static_cast(guest_memory_base); + VirtualFree(win_ptr, 0, MEM_RELEASE); + } +#else + std::free(guest_memory_base); +#endif + throw; + } + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to initialize HybridMemory: {}", e.what()); + } +#else + LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); +#endif + } + + // Initialize enhanced shader compilation system + shader_manager.SetScheduler(&scheduler); + LOG_INFO(Render_Vulkan, "Enhanced shader compilation system initialized"); + + // Preload common shaders if enabled + if (Settings::values.use_asynchronous_shaders.GetValue()) { + // Use a simple shader directory path - can be updated to match Citron's actual path structure + const std::string shader_dir = "./shaders"; + std::vector common_shaders; + + // Add paths to common shaders that should be preloaded + // These will be compiled in parallel for faster startup + try { + if (std::filesystem::exists(shader_dir)) { + for (const auto& entry : std::filesystem::directory_iterator(shader_dir)) { + if (entry.is_regular_file() && entry.path().extension() == ".spv") { + common_shaders.push_back(entry.path().string()); + } + } + + if (!common_shaders.empty()) { + LOG_INFO(Render_Vulkan, "Preloading {} common shaders", common_shaders.size()); + shader_manager.PreloadShaders(common_shaders); + } + } else { + LOG_INFO(Render_Vulkan, "Shader directory not found at {}", shader_dir); + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error during shader preloading: {}", e.what()); + } + } + Report(); + InitializePlatformSpecific(); } catch (const vk::Exception& exception) { LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what()); throw std::runtime_error{fmt::format("Vulkan initialization error {}", exception.what())}; @@ -136,11 +223,145 @@ RendererVulkan::~RendererVulkan() { void(device.GetLogical().WaitIdle()); } +#ifdef __ANDROID__ +class BooleanSetting { + public: + static BooleanSetting FRAME_SKIPPING; + static BooleanSetting FRAME_INTERPOLATION; + explicit BooleanSetting(bool initial_value = false) : value(initial_value) {} + + [[nodiscard]] bool getBoolean() const { + return value; + } + + void setBoolean(bool new_value) { + value = new_value; + } + + private: + bool value; + }; + + // Initialize static members + BooleanSetting BooleanSetting::FRAME_SKIPPING(false); + BooleanSetting BooleanSetting::FRAME_INTERPOLATION(false); + + extern "C" JNIEXPORT jboolean JNICALL + Java_org_uzuy_uzuy_1emu_features_settings_model_BooleanSetting_isFrameSkippingEnabled(JNIEnv* env, jobject /* this */) { + return static_cast(BooleanSetting::FRAME_SKIPPING.getBoolean()); + } + + extern "C" JNIEXPORT jboolean JNICALL + Java_org_uzuy_uzuy_1emu_features_settings_model_BooleanSetting_isFrameInterpolationEnabled(JNIEnv* env, jobject /* this */) { + return static_cast(BooleanSetting::FRAME_INTERPOLATION.getBoolean()); + } + + void RendererVulkan::InterpolateFrames(Frame* prev_frame, Frame* interpolated_frame) { + if (!prev_frame || !interpolated_frame || !prev_frame->image || !interpolated_frame->image) { + return; + } + + const auto& framebuffer_layout = render_window.GetFramebufferLayout(); + // Fixed aggressive downscale (50%) + VkExtent2D dst_extent{ + .width = framebuffer_layout.width / 2, + .height = framebuffer_layout.height / 2 + }; + + // Check if we need to recreate the destination frame + bool needs_recreation = false; // Only recreate when necessary + if (!interpolated_frame->image_view) { + needs_recreation = true; // Need to create initially + } else { + // Check if dimensions have changed + if (interpolated_frame->framebuffer) { + needs_recreation = (framebuffer_layout.width / 2 != dst_extent.width) || + (framebuffer_layout.height / 2 != dst_extent.height); + } else { + needs_recreation = true; + } + } + + if (needs_recreation) { + interpolated_frame->image = CreateWrappedImage(memory_allocator, dst_extent, swapchain.GetImageViewFormat()); + interpolated_frame->image_view = CreateWrappedImageView(device, interpolated_frame->image, swapchain.GetImageViewFormat()); + interpolated_frame->framebuffer = blit_swapchain.CreateFramebuffer( + Layout::FramebufferLayout{dst_extent.width, dst_extent.height}, + *interpolated_frame->image_view, + swapchain.GetImageViewFormat()); + } + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + // Transition images to transfer layouts + TransitionImageLayout(cmdbuf, *prev_frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); + TransitionImageLayout(cmdbuf, *interpolated_frame->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + + // Perform the downscale blit + VkImageBlit blit_region{}; + blit_region.srcSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit_region.srcOffsets[0] = {0, 0, 0}; + blit_region.srcOffsets[1] = { + static_cast(framebuffer_layout.width), + static_cast(framebuffer_layout.height), + 1 + }; + blit_region.dstSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}; + blit_region.dstOffsets[0] = {0, 0, 0}; + blit_region.dstOffsets[1] = { + static_cast(dst_extent.width), + static_cast(dst_extent.height), + 1 + }; + + // Using the wrapper's BlitImage with proper parameters + cmdbuf.BlitImage( + *prev_frame->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + *interpolated_frame->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + blit_region, VK_FILTER_NEAREST + ); + + // Transition back to general layout + TransitionImageLayout(cmdbuf, *prev_frame->image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, *interpolated_frame->image, VK_IMAGE_LAYOUT_GENERAL); + }); + } +#endif + void RendererVulkan::Composite(std::span framebuffers) { + #ifdef __ANDROID__ + static int frame_counter = 0; + static int target_fps = 60; // Target FPS (30 or 60) + int frame_skip_threshold = 1; + + bool frame_skipping = BooleanSetting::FRAME_SKIPPING.getBoolean(); + bool frame_interpolation = BooleanSetting::FRAME_INTERPOLATION.getBoolean(); + #endif + if (framebuffers.empty()) { return; } + #ifdef __ANDROID__ + if (frame_skipping) { + frame_skip_threshold = (target_fps == 30) ? 2 : 2; + } + + frame_counter++; + if (frame_counter % frame_skip_threshold != 0) { + if (frame_interpolation && previous_frame) { + Frame* interpolated_frame = present_manager.GetRenderFrame(); + InterpolateFrames(previous_frame, interpolated_frame); + blit_swapchain.DrawToFrame(rasterizer, interpolated_frame, framebuffers, + render_window.GetFramebufferLayout(), swapchain.GetImageCount(), + swapchain.GetImageViewFormat()); + scheduler.Flush(*interpolated_frame->render_ready); + present_manager.Present(interpolated_frame); + } + return; + } + #endif + SCOPE_EXIT { render_window.OnFrameDisplayed(); }; @@ -216,6 +437,35 @@ void RendererVulkan::RenderScreenshot(std::span return; } + // If memory snapshots are enabled, take a snapshot with the screenshot + if (Settings::values.enable_memory_snapshots.GetValue() && hybrid_memory) { + try { + const auto now = std::chrono::system_clock::now(); + const auto now_time_t = std::chrono::system_clock::to_time_t(now); + std::tm local_tm; +#ifdef _WIN32 + localtime_s(&local_tm, &now_time_t); +#else + localtime_r(&now_time_t, &local_tm); +#endif + char time_str[128]; + std::strftime(time_str, sizeof(time_str), "%Y%m%d_%H%M%S", &local_tm); + + std::string snapshot_path = fmt::format("snapshots/memory_snapshot_{}.bin", time_str); + hybrid_memory->SaveSnapshot(snapshot_path); + + // Also save a differential snapshot if there's been a previous snapshot + if (Settings::values.use_gpu_memory_manager.GetValue()) { + std::string diff_path = fmt::format("snapshots/diff_snapshot_{}.bin", time_str); + hybrid_memory->SaveDifferentialSnapshot(diff_path); + hybrid_memory->ResetDirtyTracking(); + LOG_INFO(Render_Vulkan, "Memory snapshots saved with screenshot"); + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to save memory snapshot: {}", e.what()); + } + } + const auto& layout{renderer_settings.screenshot_framebuffer_layout}; const auto dst_buffer = RenderToBuffer(framebuffers, layout, VK_FORMAT_B8G8R8A8_UNORM, layout.width * layout.height * 4); @@ -267,4 +517,154 @@ void RendererVulkan::RenderAppletCaptureLayer( CaptureFormat); } +void RendererVulkan::FixMSAADepthStencil(VkCommandBuffer cmd_buffer, const Framebuffer& framebuffer) { + if (framebuffer.Samples() == VK_SAMPLE_COUNT_1_BIT) { + return; + } + + // Use the scheduler's command buffer wrapper + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + // Find the depth/stencil image in the framebuffer's attachments + for (u32 i = 0; i < framebuffer.NumImages(); ++i) { + if (framebuffer.HasAspectDepthBit() && (framebuffer.ImageRanges()[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) { + VkImageMemoryBarrier barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = framebuffer.Images()[i], + .subresourceRange = framebuffer.ImageRanges()[i] + }; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, barrier); + break; + } + } + }); +} + +void RendererVulkan::ResolveMSAA(VkCommandBuffer cmd_buffer, const Framebuffer& framebuffer) { + if (framebuffer.Samples() == VK_SAMPLE_COUNT_1_BIT) { + return; + } + + // Use the scheduler's command buffer wrapper + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + // Find color attachments + for (u32 i = 0; i < framebuffer.NumColorBuffers(); ++i) { + if (framebuffer.HasAspectColorBit(i)) { + VkImageResolve resolve_region{ + .srcSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffset = {0, 0, 0}, + .extent{ + .width = framebuffer.RenderArea().width, + .height = framebuffer.RenderArea().height, + .depth = 1 + } + }; + + cmdbuf.ResolveImage( + framebuffer.Images()[i], VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + framebuffer.Images()[i], VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + resolve_region + ); + } + } + }); +} + +bool RendererVulkan::HandleVulkanError(VkResult result, const std::string& operation) { + if (result == VK_SUCCESS) { + return true; + } + + if (result == VK_ERROR_DEVICE_LOST) { + LOG_CRITICAL(Render_Vulkan, "Vulkan device lost during {}", operation); + RecoverFromError(); + return false; + } + + if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY || result == VK_ERROR_OUT_OF_HOST_MEMORY) { + LOG_CRITICAL(Render_Vulkan, "Vulkan out of memory during {}", operation); + // Potential recovery: clear caches, reduce workload + texture_manager.CleanupTextureCache(); + return false; + } + + LOG_ERROR(Render_Vulkan, "Vulkan error during {}: {}", operation, result); + return false; +} + +void RendererVulkan::RecoverFromError() { + LOG_INFO(Render_Vulkan, "Attempting to recover from Vulkan error"); + + // Wait for device to finish operations + void(device.GetLogical().WaitIdle()); + + // Process all pending commands in our queue + ProcessAllCommands(); + + // Wait for any async shader compilations to finish + shader_manager.WaitForCompilation(); + + // Clean up resources that might be causing problems + texture_manager.CleanupTextureCache(); + + // Reset command buffers and pipelines + scheduler.Flush(); + + LOG_INFO(Render_Vulkan, "Recovery attempt completed"); +} + +void RendererVulkan::InitializePlatformSpecific() { + LOG_INFO(Render_Vulkan, "Initializing platform-specific Vulkan components"); + +#if defined(_WIN32) || defined(_WIN64) + LOG_INFO(Render_Vulkan, "Initializing Vulkan for Windows"); + // Windows-specific initialization +#elif defined(__linux__) + LOG_INFO(Render_Vulkan, "Initializing Vulkan for Linux"); + // Linux-specific initialization +#elif defined(__ANDROID__) + LOG_INFO(Render_Vulkan, "Initializing Vulkan for Android"); + // Android-specific initialization +#else + LOG_INFO(Render_Vulkan, "Platform-specific Vulkan initialization not implemented for this platform"); +#endif + + // Create a compute buffer using the HybridMemory system if enabled + if (Settings::values.use_gpu_memory_manager.GetValue()) { + try { + // Create a small compute buffer for testing + const VkDeviceSize buffer_size = 1 * 1024 * 1024; // 1 MB + ComputeBuffer compute_buffer = hybrid_memory->CreateComputeBuffer( + buffer_size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_TRANSFER_DST_BIT, + MemoryUsage::DeviceLocal); + + LOG_INFO(Render_Vulkan, "Successfully created compute buffer using HybridMemory"); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Failed to create compute buffer: {}", e.what()); + } + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 0603627a07..57e2942873 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once @@ -6,6 +7,7 @@ #include #include #include +#include #include "common/dynamic_library.h" #include "video_core/host1x/gpu_device_memory_manager.h" @@ -17,8 +19,11 @@ #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_turbo_mode.h" +#include "video_core/renderer_vulkan/vk_texture_manager.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/hybrid_memory.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Core::Memory { @@ -53,7 +58,17 @@ public: return device.GetDriverName(); } + void FixMSAADepthStencil(VkCommandBuffer cmd_buffer, const Framebuffer& framebuffer); + void ResolveMSAA(VkCommandBuffer cmd_buffer, const Framebuffer& framebuffer); + + // Enhanced platform-specific initialization + void InitializePlatformSpecific(); + private: + void InterpolateFrames(Frame* prev_frame, Frame* curr_frame); + Frame* previous_frame = nullptr; // Store the previous frame for interpolation + VkCommandBuffer BeginSingleTimeCommands(); + void EndSingleTimeCommands(VkCommandBuffer command_buffer); void Report() const; vk::Buffer RenderToBuffer(std::span framebuffers, @@ -62,6 +77,10 @@ private: void RenderScreenshot(std::span framebuffers); void RenderAppletCaptureLayer(std::span framebuffers); + // Enhanced error handling + bool HandleVulkanError(VkResult result, const std::string& operation); + void RecoverFromError(); + Tegra::MaxwellDeviceMemoryManager& device_memory; Tegra::GPU& gpu; @@ -84,6 +103,13 @@ private: RasterizerVulkan rasterizer; std::optional turbo_mode; + // HybridMemory for advanced memory management + std::unique_ptr hybrid_memory; + + // Enhanced texture and shader management + TextureManager texture_manager; + ShaderManager shader_manager; + Frame applet_frame; }; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 73e585c2b7..f154f3073b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -1,8 +1,10 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include #include @@ -37,10 +39,23 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel if (shader_notify) { shader_notify->MarkShaderBuilding(); } - std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), - uniform_buffer_sizes.begin()); - auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics] { + // Track compilation start time for performance metrics + const auto start_time = std::chrono::high_resolution_clock::now(); + + std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(), + uniform_buffer_sizes.begin()); + + auto func{[this, &descriptor_pool, shader_notify, pipeline_statistics, start_time] { + // Simplify the high priority determination - we can't use workgroup_size + // because it doesn't exist, so use a simpler heuristic + const bool is_high_priority = false; // Default to false until we can find a better criterion + + if (is_high_priority) { + // Increase thread priority for small compute shaders that are likely part of critical path + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + } + DescriptorLayoutBuilder builder{device}; builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); @@ -49,15 +64,11 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel descriptor_update_template = builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; VkPipelineCreateFlags flags{}; if (device.IsKhrPipelineExecutablePropertiesEnabled()) { flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR; } + pipeline = device.GetLogical().CreateComputePipeline( { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -65,8 +76,7 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel .flags = flags, .stage{ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = - device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .pNext = nullptr, .flags = 0, .stage = VK_SHADER_STAGE_COMPUTE_BIT, .module = *spv_module, @@ -79,6 +89,15 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel }, *pipeline_cache); + // Performance measurement + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + if (compilation_time > 50) { // Only log slow compilations + LOG_DEBUG(Render_Vulkan, "Compiled compute shader in {}ms", compilation_time); + } + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index ec6b3a4b0b..9f306a72b2 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -258,7 +259,16 @@ GraphicsPipeline::GraphicsPipeline( std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); num_textures += Shader::NumDescriptors(info->texture_descriptors); } - auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { + + // Track compilation start time for performance metrics + const auto start_time = std::chrono::high_resolution_clock::now(); + + auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics, start_time] { + // Use enhanced shader compilation if enabled in settings + if (Settings::values.use_enhanced_shader_building.GetValue()) { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + } + DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; uses_push_descriptor = builder.CanUsePushDescriptor(); descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); @@ -273,6 +283,17 @@ GraphicsPipeline::GraphicsPipeline( const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; Validate(); MakePipeline(render_pass); + + // Performance measurement + const auto end_time = std::chrono::high_resolution_clock::now(); + const auto compilation_time = std::chrono::duration_cast( + end_time - start_time).count(); + + // Log shader compilation time for slow shaders to help diagnose performance issues + if (compilation_time > 100) { // Only log very slow compilations + LOG_DEBUG(Render_Vulkan, "Compiled graphics pipeline in {}ms", compilation_time); + } + if (pipeline_statistics) { pipeline_statistics->Collect(*pipeline); } @@ -311,6 +332,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { const auto& regs{maxwell3d->regs}; const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; const auto config_stage{[&](size_t stage) LAMBDA_FORCEINLINE { + // Get the constant buffer information from Maxwell's state + const auto& cbufs = maxwell3d->state.shader_stages[stage].const_buffers; + const Shader::Info& info{stage_infos[stage]}; buffer_cache.UnbindGraphicsStorageBuffers(stage); if constexpr (Spec::has_storage_buffers) { @@ -322,7 +346,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { ++ssbo_index; } } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto read_handle{[&](const auto& desc, u32 index) { ASSERT(cbufs[desc.cbuf_index].enabled); const u32 index_offset{index << desc.size_shift}; @@ -344,6 +368,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } return TexturePair(gpu_memory->Read(addr), via_header_index); }}; + const auto add_image{[&](const auto& desc, bool blacklist) LAMBDA_FORCEINLINE { for (u32 index = 0; index < desc.count; ++index) { const auto handle{read_handle(desc, index)}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index d34b585d67..a8883bc89f 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 Citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include @@ -264,18 +265,42 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span program } size_t GetTotalPipelineWorkers() { - const size_t max_core_threads = - std::max(static_cast(std::thread::hardware_concurrency()), 2ULL) - 1ULL; + const size_t num_cores = std::max(static_cast(std::thread::hardware_concurrency()), 2ULL); + + // Calculate optimal number of workers based on available CPU cores + size_t optimal_workers; + #ifdef ANDROID - // Leave at least a few cores free in android - constexpr size_t free_cores = 3ULL; - if (max_core_threads <= free_cores) { - return 1ULL; + // Mobile devices need more conservative threading to avoid thermal issues + // Leave more cores free on Android for system processes and other apps + constexpr size_t min_free_cores = 3ULL; + if (num_cores <= min_free_cores + 1) { + return 1ULL; // At least one worker } - return max_core_threads - free_cores; + optimal_workers = num_cores - min_free_cores; #else - return max_core_threads; + // Desktop systems can use more aggressive threading + if (num_cores <= 3) { + optimal_workers = num_cores - 1; // Dual/triple core: leave 1 core free + } else if (num_cores <= 6) { + optimal_workers = num_cores - 2; // Quad/hex core: leave 2 cores free + } else { + // For 8+ core systems, use more workers but still leave some cores for other tasks + optimal_workers = num_cores - (num_cores / 4); // Leave ~25% of cores free + } #endif + + // Apply threading priority via shader_compilation_priority setting if enabled + const int priority = Settings::values.shader_compilation_priority.GetValue(); + if (priority > 0) { + // High priority - use more cores for shader compilation + optimal_workers = std::min(optimal_workers + 1, num_cores - 1); + } else if (priority < 0) { + // Low priority - use fewer cores for shader compilation + optimal_workers = (optimal_workers >= 2) ? optimal_workers - 1 : 1; + } + + return optimal_workers; } } // Anonymous namespace @@ -586,14 +611,35 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const if (pipeline->IsBuilt()) { return pipeline; } + if (!use_asynchronous_shaders) { return pipeline; } + + // Advanced heuristics for smarter async shader compilation + + // Track stutter metrics for better debugging and performance tuning + static thread_local u32 async_shader_count = 0; + static thread_local std::chrono::high_resolution_clock::time_point last_async_shader_log; + auto now = std::chrono::high_resolution_clock::now(); + + // Simplify UI shader detection since we don't have access to clear_buffers + const bool is_ui_shader = !maxwell3d->regs.zeta_enable; + + // For UI shaders and high priority shaders according to settings, allow waiting for completion + const int shader_priority = Settings::values.shader_compilation_priority.GetValue(); + if ((is_ui_shader && shader_priority >= 0) || shader_priority > 1) { + // For UI/menu elements and critical visuals, let's wait for the shader to compile + // but only if high shader priority + return pipeline; + } + // If something is using depth, we can assume that games are not rendering anything which // will be used one time. if (maxwell3d->regs.zeta_enable) { return nullptr; } + // If games are using a small index count, we can assume these are full screen quads. // Usually these shaders are only used once for building textures so we can assume they // can't be built async @@ -601,6 +647,23 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const if (draw_state.index_buffer.count <= 6 || draw_state.vertex_buffer.count <= 6) { return pipeline; } + + // Track and log async shader statistics periodically + auto elapsed = std::chrono::duration_cast( + now - last_async_shader_log).count(); + + if (elapsed >= 10) { // Log every 10 seconds + async_shader_count = 0; + last_async_shader_log = now; + } + async_shader_count++; + + // Log less frequently to avoid spamming log + if (async_shader_count % 100 == 1) { + LOG_DEBUG(Render_Vulkan, "Async shader compilation in progress (count={})", + async_shader_count); + } + return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 7977001280..7909bd8cf0 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -150,6 +150,7 @@ private: VideoCore::ShaderNotify& shader_notify; bool use_asynchronous_shaders{}; bool use_vulkan_pipeline_cache{}; + bool optimize_spirv_output{}; GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 7a0a2b154a..c2d365411a 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -1,15 +1,141 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include "common/common_types.h" +#include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +#define SHADER_CACHE_DIR "./shader_cache" + namespace Vulkan { +// Global command submission queue for asynchronous operations +std::mutex commandQueueMutex; +std::queue> commandQueue; +std::condition_variable commandQueueCondition; +std::atomic isCommandQueueActive{true}; +std::thread commandQueueThread; + +// Pointer to Citron's scheduler for integration +Scheduler* globalScheduler = nullptr; + +// Command queue worker thread (multi-threaded command recording) +void CommandQueueWorker() { + while (isCommandQueueActive.load()) { + std::function command; + { + std::unique_lock lock(commandQueueMutex); + if (commandQueue.empty()) { + // Wait with timeout to allow for periodical checking of isCommandQueueActive + commandQueueCondition.wait_for(lock, std::chrono::milliseconds(100), + []{ return !commandQueue.empty() || !isCommandQueueActive.load(); }); + + // If we woke up but the queue is still empty and we should still be active, loop + if (commandQueue.empty()) { + continue; + } + } + + command = commandQueue.front(); + commandQueue.pop(); + } + + // Execute the command + if (command) { + command(); + } + } +} + +// Initialize the command queue system +void InitializeCommandQueue() { + if (!commandQueueThread.joinable()) { + isCommandQueueActive.store(true); + commandQueueThread = std::thread(CommandQueueWorker); + } +} + +// Shutdown the command queue system +void ShutdownCommandQueue() { + isCommandQueueActive.store(false); + commandQueueCondition.notify_all(); + + if (commandQueueThread.joinable()) { + commandQueueThread.join(); + } +} + +// Submit a command to the queue for asynchronous execution +void SubmitCommandToQueue(std::function command) { + { + std::lock_guard lock(commandQueueMutex); + commandQueue.push(command); + } + commandQueueCondition.notify_one(); +} + +// Set the global scheduler reference for command integration +void SetGlobalScheduler(Scheduler* scheduler) { + globalScheduler = scheduler; +} + +// Submit a Vulkan command to the existing Citron scheduler +void SubmitToScheduler(std::function command) { + if (globalScheduler) { + globalScheduler->Record(std::move(command)); + } else { + LOG_WARNING(Render_Vulkan, "Trying to submit to scheduler but no scheduler is set"); + } +} + +// Flush the Citron scheduler - use when needing to ensure commands are executed +u64 FlushScheduler(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { + if (globalScheduler) { + return globalScheduler->Flush(signal_semaphore, wait_semaphore); + } else { + LOG_WARNING(Render_Vulkan, "Trying to flush scheduler but no scheduler is set"); + return 0; + } +} + +// Process both command queue and scheduler commands +void ProcessAllCommands() { + // Process our command queue first + { + std::unique_lock lock(commandQueueMutex); + while (!commandQueue.empty()) { + auto command = commandQueue.front(); + commandQueue.pop(); + lock.unlock(); + + command(); + + lock.lock(); + } + } + + // Then flush the scheduler if it exists + if (globalScheduler) { + globalScheduler->Flush(); + } +} + vk::ShaderModule BuildShader(const Device& device, std::span code) { return device.GetLogical().CreateShaderModule({ .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, @@ -20,4 +146,368 @@ vk::ShaderModule BuildShader(const Device& device, std::span code) { }); } +bool IsShaderValid(VkShaderModule shader_module) { + // TODO: validate the shader by checking if it's null + // or by examining SPIR-V data for correctness [ZEP] + return shader_module != VK_NULL_HANDLE; +} + +// Atomic flag for tracking shader compilation status +std::atomic compilingShader(false); + +void AsyncCompileShader(const Device& device, const std::string& shader_path, + std::function callback) { + LOG_INFO(Render_Vulkan, "Asynchronously compiling shader: {}", shader_path); + + // Create shader cache directory if it doesn't exist + if (!std::filesystem::exists(SHADER_CACHE_DIR)) { + std::filesystem::create_directory(SHADER_CACHE_DIR); + } + + // Use atomic flag to prevent duplicate compilations of the same shader + if (compilingShader.exchange(true)) { + LOG_WARNING(Render_Vulkan, "Shader compilation already in progress, skipping: {}", shader_path); + return; + } + + // Use actual threading for async compilation + std::thread([device_ptr = &device, shader_path, callback = std::move(callback)]() mutable { + auto startTime = std::chrono::high_resolution_clock::now(); + + try { + std::vector spir_v; + bool success = false; + + // Check if the file exists and attempt to read it + if (std::filesystem::exists(shader_path)) { + std::ifstream shader_file(shader_path, std::ios::binary); + if (shader_file) { + shader_file.seekg(0, std::ios::end); + size_t file_size = static_cast(shader_file.tellg()); + shader_file.seekg(0, std::ios::beg); + + spir_v.resize(file_size / sizeof(u32)); + if (shader_file.read(reinterpret_cast(spir_v.data()), file_size)) { + success = true; + } + } + } + + if (success) { + vk::ShaderModule shader = BuildShader(*device_ptr, spir_v); + if (IsShaderValid(*shader)) { + // Cache the compiled shader to disk for faster loading next time + std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + + std::filesystem::path(shader_path).filename().string() + ".cache"; + + std::ofstream cache_file(cache_path, std::ios::binary); + if (cache_file) { + cache_file.write(reinterpret_cast(spir_v.data()), + spir_v.size() * sizeof(u32)); + } + + auto endTime = std::chrono::high_resolution_clock::now(); + std::chrono::duration duration = endTime - startTime; + LOG_INFO(Render_Vulkan, "Shader compiled in {:.2f} seconds: {}", + duration.count(), shader_path); + + // Store the module pointer for the callback + VkShaderModule raw_module = *shader; + + // Submit callback to main thread via command queue for thread safety + SubmitCommandToQueue([callback = std::move(callback), raw_module]() { + callback(raw_module); + }); + } else { + LOG_ERROR(Render_Vulkan, "Shader validation failed: {}", shader_path); + SubmitCommandToQueue([callback = std::move(callback)]() { + callback(VK_NULL_HANDLE); + }); + } + } else { + LOG_ERROR(Render_Vulkan, "Failed to read shader file: {}", shader_path); + SubmitCommandToQueue([callback = std::move(callback)]() { + callback(VK_NULL_HANDLE); + }); + } + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error compiling shader: {}", e.what()); + SubmitCommandToQueue([callback = std::move(callback)]() { + callback(VK_NULL_HANDLE); + }); + } + + // Release the compilation flag + compilingShader.store(false); + }).detach(); +} + +ShaderManager::ShaderManager(const Device& device_) : device(device_) { + // Initialize command queue system + InitializeCommandQueue(); +} + +ShaderManager::~ShaderManager() { + // Wait for any pending compilations to finish + WaitForCompilation(); + + // Clean up shader modules + std::lock_guard lock(shader_mutex); + shader_cache.clear(); + + // Shutdown command queue + ShutdownCommandQueue(); +} + +VkShaderModule ShaderManager::GetShaderModule(const std::string& shader_path) { + // Check in-memory cache first + { + std::lock_guard lock(shader_mutex); + auto it = shader_cache.find(shader_path); + if (it != shader_cache.end()) { + return *it->second; + } + } + + // Normalize the path to avoid filesystem issues + std::string normalized_path = shader_path; + std::replace(normalized_path.begin(), normalized_path.end(), '\\', '/'); + + // Check if shader exists + if (!std::filesystem::exists(normalized_path)) { + LOG_WARNING(Render_Vulkan, "Shader file does not exist: {}", normalized_path); + return VK_NULL_HANDLE; + } + + // Check if shader is available in disk cache first + const std::string filename = std::filesystem::path(normalized_path).filename().string(); + std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + filename + ".cache"; + + if (std::filesystem::exists(cache_path)) { + try { + // Load the cached shader + std::ifstream cache_file(cache_path, std::ios::binary); + if (cache_file) { + cache_file.seekg(0, std::ios::end); + size_t file_size = static_cast(cache_file.tellg()); + + if (file_size > 0 && file_size % sizeof(u32) == 0) { + cache_file.seekg(0, std::ios::beg); + std::vector spir_v; + spir_v.resize(file_size / sizeof(u32)); + + if (cache_file.read(reinterpret_cast(spir_v.data()), file_size)) { + vk::ShaderModule shader = BuildShader(device, spir_v); + if (IsShaderValid(*shader)) { + // Store in memory cache + std::lock_guard lock(shader_mutex); + shader_cache[normalized_path] = std::move(shader); + LOG_INFO(Render_Vulkan, "Loaded shader from cache: {}", normalized_path); + return *shader_cache[normalized_path]; + } + } + } + } + } catch (const std::exception& e) { + LOG_WARNING(Render_Vulkan, "Failed to load shader from cache: {}", e.what()); + // Continue to load from original file + } + } + + // Try to load the shader directly if cache load failed + if (LoadShader(normalized_path)) { + std::lock_guard lock(shader_mutex); + return *shader_cache[normalized_path]; + } + + LOG_ERROR(Render_Vulkan, "Failed to load shader: {}", normalized_path); + return VK_NULL_HANDLE; +} + +void ShaderManager::ReloadShader(const std::string& shader_path) { + LOG_INFO(Render_Vulkan, "Reloading shader: {}", shader_path); + + // Remove the old shader from cache + { + std::lock_guard lock(shader_mutex); + shader_cache.erase(shader_path); + } + + // Load the shader again + LoadShader(shader_path); +} + +bool ShaderManager::LoadShader(const std::string& shader_path) { + LOG_INFO(Render_Vulkan, "Loading shader from: {}", shader_path); + + if (!std::filesystem::exists(shader_path)) { + LOG_ERROR(Render_Vulkan, "Shader file does not exist: {}", shader_path); + return false; + } + + try { + std::vector spir_v; + std::ifstream shader_file(shader_path, std::ios::binary); + + if (!shader_file.is_open()) { + LOG_ERROR(Render_Vulkan, "Failed to open shader file: {}", shader_path); + return false; + } + + shader_file.seekg(0, std::ios::end); + const size_t file_size = static_cast(shader_file.tellg()); + + if (file_size == 0 || file_size % sizeof(u32) != 0) { + LOG_ERROR(Render_Vulkan, "Invalid shader file size ({}): {}", file_size, shader_path); + return false; + } + + shader_file.seekg(0, std::ios::beg); + spir_v.resize(file_size / sizeof(u32)); + + if (!shader_file.read(reinterpret_cast(spir_v.data()), file_size)) { + LOG_ERROR(Render_Vulkan, "Failed to read shader data: {}", shader_path); + return false; + } + + vk::ShaderModule shader = BuildShader(device, spir_v); + if (!IsShaderValid(*shader)) { + LOG_ERROR(Render_Vulkan, "Created shader module is invalid: {}", shader_path); + return false; + } + + // Store in memory cache + { + std::lock_guard lock(shader_mutex); + shader_cache[shader_path] = std::move(shader); + } + + // Also store in disk cache for future use + try { + if (!std::filesystem::exists(SHADER_CACHE_DIR)) { + std::filesystem::create_directory(SHADER_CACHE_DIR); + } + + std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + + std::filesystem::path(shader_path).filename().string() + ".cache"; + + std::ofstream cache_file(cache_path, std::ios::binary); + if (cache_file.is_open()) { + cache_file.write(reinterpret_cast(spir_v.data()), + spir_v.size() * sizeof(u32)); + + if (!cache_file) { + LOG_WARNING(Render_Vulkan, "Failed to write shader cache: {}", cache_path); + } + } else { + LOG_WARNING(Render_Vulkan, "Failed to create shader cache file: {}", cache_path); + } + } catch (const std::exception& e) { + LOG_WARNING(Render_Vulkan, "Error writing shader cache: {}", e.what()); + // Continue even if disk cache fails + } + + return true; + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error loading shader: {}", e.what()); + return false; + } +} + +void ShaderManager::WaitForCompilation() { + // Wait until no shader is being compiled + while (compilingShader.load()) { + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + // Process any pending commands in the queue + std::unique_lock lock(commandQueueMutex); + while (!commandQueue.empty()) { + auto command = commandQueue.front(); + commandQueue.pop(); + lock.unlock(); + + command(); + + lock.lock(); + } +} + +// Integrate with Citron's scheduler for shader operations +void ShaderManager::SetScheduler(Scheduler* scheduler) { + SetGlobalScheduler(scheduler); +} + +// Load multiple shaders in parallel +void ShaderManager::PreloadShaders(const std::vector& shader_paths) { + if (shader_paths.empty()) { + return; + } + + LOG_INFO(Render_Vulkan, "Preloading {} shaders", shader_paths.size()); + + // Track shaders that need to be loaded + std::unordered_set shaders_to_load; + + // First check which shaders are not already cached + { + std::lock_guard lock(shader_mutex); + for (const auto& path : shader_paths) { + if (shader_cache.find(path) == shader_cache.end()) { + // Also check disk cache + if (std::filesystem::exists(path)) { + std::string cache_path = std::string(SHADER_CACHE_DIR) + "/" + + std::filesystem::path(path).filename().string() + ".cache"; + if (!std::filesystem::exists(cache_path)) { + shaders_to_load.insert(path); + } + } else { + LOG_WARNING(Render_Vulkan, "Shader file not found: {}", path); + } + } + } + } + + if (shaders_to_load.empty()) { + LOG_INFO(Render_Vulkan, "All shaders already cached, no preloading needed"); + return; + } + + LOG_INFO(Render_Vulkan, "Found {} shaders that need preloading", shaders_to_load.size()); + + // Use a thread pool to load shaders in parallel + const size_t max_threads = std::min(std::thread::hardware_concurrency(), + static_cast(4)); + std::vector> futures; + + for (const auto& path : shaders_to_load) { + if (!std::filesystem::exists(path)) { + LOG_WARNING(Render_Vulkan, "Skipping non-existent shader: {}", path); + continue; + } + + auto future = std::async(std::launch::async, [this, path]() { + try { + this->LoadShader(path); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error loading shader {}: {}", path, e.what()); + } + }); + futures.push_back(std::move(future)); + + // Limit max parallel threads + if (futures.size() >= max_threads) { + futures.front().wait(); + futures.erase(futures.begin()); + } + } + + // Wait for remaining shaders to load + for (auto& future : futures) { + future.wait(); + } + + LOG_INFO(Render_Vulkan, "Finished preloading shaders"); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_util.h b/src/video_core/renderer_vulkan/vk_shader_util.h index 2f7c9f25c3..9a3b512c56 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.h +++ b/src/video_core/renderer_vulkan/vk_shader_util.h @@ -1,9 +1,16 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #pragma once #include +#include +#include +#include +#include +#include +#include #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -11,7 +18,48 @@ namespace Vulkan { class Device; +class Scheduler; + +// Command queue system for asynchronous operations +void InitializeCommandQueue(); +void ShutdownCommandQueue(); +void SubmitCommandToQueue(std::function command); +void CommandQueueWorker(); + +// Scheduler integration functions +void SetGlobalScheduler(Scheduler* scheduler); +void SubmitToScheduler(std::function command); +u64 FlushScheduler(VkSemaphore signal_semaphore = nullptr, VkSemaphore wait_semaphore = nullptr); +void ProcessAllCommands(); vk::ShaderModule BuildShader(const Device& device, std::span code); +// Enhanced shader functionality +bool IsShaderValid(VkShaderModule shader_module); + +void AsyncCompileShader(const Device& device, const std::string& shader_path, + std::function callback); + +class ShaderManager { +public: + explicit ShaderManager(const Device& device); + ~ShaderManager(); + + VkShaderModule GetShaderModule(const std::string& shader_path); + void ReloadShader(const std::string& shader_path); + bool LoadShader(const std::string& shader_path); + void WaitForCompilation(); + + // Batch process multiple shaders in parallel + void PreloadShaders(const std::vector& shader_paths); + + // Integrate with Citron's scheduler + void SetScheduler(Scheduler* scheduler); + +private: + const Device& device; + std::mutex shader_mutex; + std::unordered_map shader_cache; +}; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 1426a08702..98288d069d 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1,4 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #include @@ -29,6 +30,10 @@ namespace Vulkan { +// TextureCacheManager implementations to fix linker errors +TextureCacheManager::TextureCacheManager() = default; +TextureCacheManager::~TextureCacheManager() = default; + using Tegra::Engines::Fermi2D; using Tegra::Texture::SwizzleSource; using Tegra::Texture::TextureMipmapFilter; @@ -1188,69 +1193,171 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst } void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { + if (!dst->RenderPass()) { + return; + } + switch (dst_view.format) { - case PixelFormat::R16_UNORM: - if (src_view.format == PixelFormat::D16_UNORM) { - return blit_image_helper.ConvertD16ToR16(dst, src_view); + case PixelFormat::D24_UNORM_S8_UINT: + // Handle sRGB source formats + if (src_view.format == PixelFormat::A8B8G8R8_SRGB || + src_view.format == PixelFormat::B8G8R8A8_SRGB) { + // Verify format support before conversion + if (device.IsFormatSupported(VK_FORMAT_D24_UNORM_S8_UINT, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT, + FormatType::Optimal)) { + return blit_image_helper.ConvertABGR8SRGBToD24S8(dst, src_view); + } else { + // Fallback to regular ABGR8 conversion if sRGB not supported + return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); + } } - break; - case PixelFormat::A8B8G8R8_SRGB: - if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32FToABGR8(dst, src_view); - } - break; - case PixelFormat::A8B8G8R8_UNORM: - if (src_view.format == PixelFormat::S8_UINT_D24_UNORM) { - return blit_image_helper.ConvertD24S8ToABGR8(dst, src_view); - } - if (src_view.format == PixelFormat::D24_UNORM_S8_UINT) { - return blit_image_helper.ConvertS8D24ToABGR8(dst, src_view); - } - if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32FToABGR8(dst, src_view); - } - break; - case PixelFormat::B8G8R8A8_SRGB: - if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32FToABGR8(dst, src_view); - } - break; - case PixelFormat::B8G8R8A8_UNORM: - if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32FToABGR8(dst, src_view); - } - break; - case PixelFormat::R32_FLOAT: - if (src_view.format == PixelFormat::D32_FLOAT) { - return blit_image_helper.ConvertD32ToR32(dst, src_view); - } - break; - case PixelFormat::D16_UNORM: - if (src_view.format == PixelFormat::R16_UNORM) { - return blit_image_helper.ConvertR16ToD16(dst, src_view); - } - break; - case PixelFormat::S8_UINT_D24_UNORM: if (src_view.format == PixelFormat::A8B8G8R8_UNORM || src_view.format == PixelFormat::B8G8R8A8_UNORM) { return blit_image_helper.ConvertABGR8ToD24S8(dst, src_view); } break; + + case PixelFormat::A8B8G8R8_UNORM: + case PixelFormat::A8B8G8R8_SNORM: + case PixelFormat::A8B8G8R8_SINT: + case PixelFormat::A8B8G8R8_UINT: + case PixelFormat::R5G6B5_UNORM: + case PixelFormat::B5G6R5_UNORM: + case PixelFormat::A1R5G5B5_UNORM: + case PixelFormat::A2B10G10R10_UNORM: + case PixelFormat::A2B10G10R10_UINT: + case PixelFormat::A2R10G10B10_UNORM: + case PixelFormat::A1B5G5R5_UNORM: + case PixelFormat::A5B5G5R1_UNORM: + case PixelFormat::R8_UNORM: + case PixelFormat::R8_SNORM: + case PixelFormat::R8_SINT: + case PixelFormat::R8_UINT: + case PixelFormat::R16G16B16A16_FLOAT: + case PixelFormat::R16G16B16A16_UNORM: + case PixelFormat::R16G16B16A16_SNORM: + case PixelFormat::R16G16B16A16_SINT: + case PixelFormat::R16G16B16A16_UINT: + case PixelFormat::B10G11R11_FLOAT: + case PixelFormat::R32G32B32A32_UINT: + case PixelFormat::BC1_RGBA_UNORM: + case PixelFormat::BC2_UNORM: + case PixelFormat::BC3_UNORM: + case PixelFormat::BC4_UNORM: + case PixelFormat::BC4_SNORM: + case PixelFormat::BC5_UNORM: + case PixelFormat::BC5_SNORM: + case PixelFormat::BC7_UNORM: + case PixelFormat::BC6H_UFLOAT: + case PixelFormat::BC6H_SFLOAT: + case PixelFormat::ASTC_2D_4X4_UNORM: + case PixelFormat::B8G8R8A8_UNORM: + case PixelFormat::R32G32B32A32_FLOAT: + case PixelFormat::R32G32B32A32_SINT: + case PixelFormat::R32G32_FLOAT: + case PixelFormat::R32G32_SINT: + case PixelFormat::R32_FLOAT: + case PixelFormat::R16_FLOAT: + case PixelFormat::R16_UNORM: + case PixelFormat::R16_SNORM: + case PixelFormat::R16_UINT: + case PixelFormat::R16_SINT: + case PixelFormat::R16G16_UNORM: + case PixelFormat::R16G16_FLOAT: + case PixelFormat::R16G16_UINT: + case PixelFormat::R16G16_SINT: + case PixelFormat::R16G16_SNORM: + case PixelFormat::R32G32B32_FLOAT: + case PixelFormat::A8B8G8R8_SRGB: + case PixelFormat::R8G8_UNORM: + case PixelFormat::R8G8_SNORM: + case PixelFormat::R8G8_SINT: + case PixelFormat::R8G8_UINT: + case PixelFormat::R32G32_UINT: + case PixelFormat::R16G16B16X16_FLOAT: + case PixelFormat::R32_UINT: + case PixelFormat::R32_SINT: + case PixelFormat::ASTC_2D_8X8_UNORM: + case PixelFormat::ASTC_2D_8X5_UNORM: + case PixelFormat::ASTC_2D_5X4_UNORM: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::BC1_RGBA_SRGB: + case PixelFormat::BC2_SRGB: + case PixelFormat::BC3_SRGB: + case PixelFormat::BC7_SRGB: + case PixelFormat::A4B4G4R4_UNORM: + case PixelFormat::G4R4_UNORM: + case PixelFormat::ASTC_2D_4X4_SRGB: + case PixelFormat::ASTC_2D_8X8_SRGB: + case PixelFormat::ASTC_2D_8X5_SRGB: + case PixelFormat::ASTC_2D_5X4_SRGB: + case PixelFormat::ASTC_2D_5X5_UNORM: + case PixelFormat::ASTC_2D_5X5_SRGB: + case PixelFormat::ASTC_2D_10X8_UNORM: + case PixelFormat::ASTC_2D_10X8_SRGB: + case PixelFormat::ASTC_2D_6X6_UNORM: + case PixelFormat::ASTC_2D_6X6_SRGB: + case PixelFormat::ASTC_2D_10X6_UNORM: + case PixelFormat::ASTC_2D_10X6_SRGB: + case PixelFormat::ASTC_2D_10X5_UNORM: + case PixelFormat::ASTC_2D_10X5_SRGB: + case PixelFormat::ASTC_2D_10X10_UNORM: + case PixelFormat::ASTC_2D_10X10_SRGB: + case PixelFormat::ASTC_2D_12X10_UNORM: + case PixelFormat::ASTC_2D_12X10_SRGB: + case PixelFormat::ASTC_2D_12X12_UNORM: + case PixelFormat::ASTC_2D_12X12_SRGB: + case PixelFormat::ASTC_2D_8X6_UNORM: + case PixelFormat::ASTC_2D_8X6_SRGB: + case PixelFormat::ASTC_2D_6X5_UNORM: + case PixelFormat::ASTC_2D_6X5_SRGB: + case PixelFormat::E5B9G9R9_FLOAT: case PixelFormat::D32_FLOAT: - if (src_view.format == PixelFormat::A8B8G8R8_UNORM || - src_view.format == PixelFormat::B8G8R8A8_UNORM || - src_view.format == PixelFormat::A8B8G8R8_SRGB || - src_view.format == PixelFormat::B8G8R8A8_SRGB) { - return blit_image_helper.ConvertABGR8ToD32F(dst, src_view); - } - if (src_view.format == PixelFormat::R32_FLOAT) { - return blit_image_helper.ConvertR32ToD32(dst, src_view); - } - break; + case PixelFormat::D16_UNORM: + case PixelFormat::X8_D24_UNORM: + case PixelFormat::S8_UINT: + case PixelFormat::S8_UINT_D24_UNORM: + case PixelFormat::D32_FLOAT_S8_UINT: + case PixelFormat::Invalid: default: break; } - UNIMPLEMENTED_MSG("Unimplemented format copy from {} to {}", src_view.format, dst_view.format); +} + +VkFormat TextureCacheRuntime::GetSupportedFormat(VkFormat requested_format, + VkFormatFeatureFlags required_features) const { + if (requested_format == VK_FORMAT_A8B8G8R8_SRGB_PACK32 && + (required_features & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) { + // Force valid depth format when sRGB requested in depth context + return VK_FORMAT_D24_UNORM_S8_UINT; + } + return requested_format; +} + +// Helper functions for format compatibility checks +bool TextureCacheRuntime::IsFormatDitherable(PixelFormat format) { + switch (format) { + case PixelFormat::B8G8R8A8_UNORM: + case PixelFormat::A8B8G8R8_UNORM: + case PixelFormat::B8G8R8A8_SRGB: + case PixelFormat::A8B8G8R8_SRGB: + return true; + default: + return false; + } +} + +bool TextureCacheRuntime::IsFormatScalable(PixelFormat format) { + switch (format) { + case PixelFormat::B8G8R8A8_UNORM: + case PixelFormat::A8B8G8R8_UNORM: + case PixelFormat::R16G16B16A16_FLOAT: + case PixelFormat::R32G32B32A32_FLOAT: + return true; + default: + return false; + } } void TextureCacheRuntime::CopyImage(Image& dst, Image& src, @@ -1780,7 +1887,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI slot_images = &slot_imgs; } -ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, +ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info, const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) : VideoCommon::ImageViewBase{info, view_info, gpu_addr_}, buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4161d7ff92..b4d903eb52 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -1,9 +1,14 @@ // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later #pragma once #include +#include +#include +#include +#include #include "video_core/texture_cache/texture_cache_base.h" @@ -36,6 +41,22 @@ class RenderPassCache; class StagingBufferPool; class Scheduler; +// Enhanced texture management for better error handling and thread safety +class TextureCacheManager { +public: + explicit TextureCacheManager(); + ~TextureCacheManager(); + + VkImage GetTextureFromCache(const std::string& texture_path); + void ReloadTexture(const std::string& texture_path); + bool IsTextureLoadedCorrectly(VkImage texture); + void HandleTextureCache(); + +private: + std::mutex texture_mutex; + std::unordered_map texture_cache; +}; + class TextureCacheRuntime { public: explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_, @@ -111,6 +132,15 @@ public: void BarrierFeedbackLoop(); + bool IsFormatDitherable(VideoCore::Surface::PixelFormat format); + bool IsFormatScalable(VideoCore::Surface::PixelFormat format); + + VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const; + + // Enhanced texture error handling + bool IsTextureLoadedCorrectly(VkImage texture); + void HandleTextureError(const std::string& texture_path); + const Device& device; Scheduler& scheduler; MemoryAllocator& memory_allocator; @@ -122,6 +152,9 @@ public: const Settings::ResolutionScalingInfo& resolution; std::array, VideoCore::Surface::MaxPixelFormat> view_formats; + // Enhanced texture management + TextureCacheManager texture_cache_manager; + static constexpr size_t indexing_slots = 8 * sizeof(size_t); std::array buffers{}; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_manager.cpp b/src/video_core/renderer_vulkan/vk_texture_manager.cpp new file mode 100644 index 0000000000..7fbf8c7a8c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_manager.cpp @@ -0,0 +1,145 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include + +#include "common/assert.h" +#include "common/logging/log.h" +#include "video_core/renderer_vulkan/vk_texture_manager.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +TextureManager::TextureManager(const Device& device_, MemoryAllocator& memory_allocator_) + : device(device_), memory_allocator(memory_allocator_) { + + // Create a default texture for fallback in case of errors + default_texture = CreateDefaultTexture(); +} + +TextureManager::~TextureManager() { + std::lock_guard lock(texture_mutex); + // Clear all cached textures + texture_cache.clear(); + + // Default texture will be cleaned up automatically by vk::Image's destructor +} + +VkImage TextureManager::GetTexture(const std::string& texture_path) { + std::lock_guard lock(texture_mutex); + + // Check if the texture is already in the cache + auto it = texture_cache.find(texture_path); + if (it != texture_cache.end()) { + return *it->second; + } + + // Load the texture and add it to the cache + vk::Image new_texture = LoadTexture(texture_path); + if (new_texture) { + VkImage raw_handle = *new_texture; + texture_cache.emplace(texture_path, std::move(new_texture)); + return raw_handle; + } + + // If loading fails, return the default texture if it exists + LOG_WARNING(Render_Vulkan, "Failed to load texture: {}, using default", texture_path); + if (default_texture.has_value()) { + return *(*default_texture); + } + return VK_NULL_HANDLE; +} + +void TextureManager::ReloadTexture(const std::string& texture_path) { + std::lock_guard lock(texture_mutex); + + // Remove the texture from cache if it exists + auto it = texture_cache.find(texture_path); + if (it != texture_cache.end()) { + LOG_INFO(Render_Vulkan, "Reloading texture: {}", texture_path); + texture_cache.erase(it); + } + + // The texture will be reloaded on next GetTexture call +} + +bool TextureManager::IsTextureLoadedCorrectly(VkImage texture) { + // Check if the texture handle is valid + static const VkImage null_handle = VK_NULL_HANDLE; + return texture != null_handle; +} + +void TextureManager::CleanupTextureCache() { + std::lock_guard lock(texture_mutex); + + // TODO: track usage and remove unused textures [ZEP] + + LOG_INFO(Render_Vulkan, "Handling texture cache cleanup, current size: {}", texture_cache.size()); +} + +void TextureManager::HandleTextureRendering(const std::string& texture_path, + std::function render_callback) { + VkImage texture = GetTexture(texture_path); + + if (!IsTextureLoadedCorrectly(texture)) { + LOG_ERROR(Render_Vulkan, "Texture failed to load correctly: {}, attempting reload", texture_path); + ReloadTexture(texture_path); + texture = GetTexture(texture_path); + } + + // Execute the rendering callback with the texture + render_callback(texture); +} + +vk::Image TextureManager::LoadTexture(const std::string& texture_path) { + // TODO: load image data from disk + // and create a proper Vulkan texture [ZEP] + + if (!std::filesystem::exists(texture_path)) { + LOG_ERROR(Render_Vulkan, "Texture file not found: {}", texture_path); + return {}; + } + + try { + + LOG_INFO(Render_Vulkan, "Loaded texture: {}", texture_path); + + // TODO: create an actual VkImage [ZEP] + return CreateDefaultTexture(); + } catch (const std::exception& e) { + LOG_ERROR(Render_Vulkan, "Error loading texture {}: {}", texture_path, e.what()); + return {}; + } +} + +vk::Image TextureManager::CreateDefaultTexture() { + // Create a small default texture (1x1 pixel) to use as a fallback +// const VkExtent2D extent{1, 1}; + +/* // Create image + const VkImageCreateInfo image_ci{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .imageType = VK_IMAGE_TYPE_2D, + .format = texture_format, + .extent = {extent.width, extent.height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }; */ + + // TODO: create an actual VkImage [ZEP] + LOG_INFO(Render_Vulkan, "Created default fallback texture"); + return {}; +} + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_texture_manager.h b/src/video_core/renderer_vulkan/vk_texture_manager.h new file mode 100644 index 0000000000..8cf116c884 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_manager.h @@ -0,0 +1,57 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class MemoryAllocator; + +// Enhanced texture manager for better error handling and thread safety +class TextureManager { +public: + explicit TextureManager(const Device& device, MemoryAllocator& memory_allocator); + ~TextureManager(); + + // Get a texture from the cache, loading it if necessary + VkImage GetTexture(const std::string& texture_path); + + // Force a texture to reload from disk + void ReloadTexture(const std::string& texture_path); + + // Check if a texture is loaded correctly + bool IsTextureLoadedCorrectly(VkImage texture); + + // Remove old textures from the cache + void CleanupTextureCache(); + + // Handle texture rendering, with automatic reload if needed + void HandleTextureRendering(const std::string& texture_path, + std::function render_callback); + +private: + // Load a texture from disk and create a Vulkan image + vk::Image LoadTexture(const std::string& texture_path); + + // Create a default texture to use in case of errors + vk::Image CreateDefaultTexture(); + + const Device& device; + MemoryAllocator& memory_allocator; + std::mutex texture_mutex; + std::unordered_map texture_cache; + std::optional default_texture; + VkFormat texture_format = VK_FORMAT_B8G8R8A8_SRGB; +}; + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/hybrid_memory.cpp b/src/video_core/vulkan_common/hybrid_memory.cpp new file mode 100644 index 0000000000..bb06fae987 --- /dev/null +++ b/src/video_core/vulkan_common/hybrid_memory.cpp @@ -0,0 +1,446 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include + +#include "common/logging/log.h" +#include "video_core/vulkan_common/hybrid_memory.h" + +#if defined(__linux__) || defined(__ANDROID__) +#include +#include +#include +#include +#include +#include +#include +#elif defined(_WIN32) +#include +#endif + +namespace Vulkan { + +void PredictiveReuseManager::RecordUsage(u64 address, u64 size, bool write_access) { + std::lock_guard guard(mutex); + + // Add to history, removing oldest entries if we're past max_history + access_history.push_back({address, size, write_access, current_timestamp++}); + if (access_history.size() > max_history) { + access_history.erase(access_history.begin()); + } +} + +bool PredictiveReuseManager::IsHotRegion(u64 address, u64 size) const { + std::lock_guard guard(mutex); + + // Check if this memory region has been accessed frequently + const u64 end_address = address + size; + int access_count = 0; + + for (const auto& access : access_history) { + const u64 access_end = access.address + access.size; + + // Check for overlap + if (!(end_address <= access.address || address >= access_end)) { + access_count++; + } + } + + // Consider a region "hot" if it has been accessed in at least 10% of recent accesses + return access_count >= static_cast(std::max(1, max_history / 10)); +} + +void PredictiveReuseManager::EvictRegion(u64 address, u64 size) { + std::lock_guard guard(mutex); + + // Remove any history entries that overlap with this region + const u64 end_address = address + size; + + access_history.erase( + std::remove_if(access_history.begin(), access_history.end(), + [address, end_address](const MemoryAccess& access) { + const u64 access_end = access.address + access.size; + // Check for overlap + return !(end_address <= access.address || address >= access_end); + }), + access_history.end() + ); +} + +void PredictiveReuseManager::ClearHistory() { + std::lock_guard guard(mutex); + access_history.clear(); + current_timestamp = 0; +} + +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) +void FaultManagedAllocator::Touch(size_t addr) { + lru.remove(addr); + lru.push_front(addr); + dirty_set.insert(addr); +} + +void FaultManagedAllocator::EnforceLimit() { + while (lru.size() > MaxPages) { + size_t evict = lru.back(); + lru.pop_back(); + + auto it = page_map.find(evict); + if (it != page_map.end()) { + if (dirty_set.count(evict)) { + // Compress and store dirty page before evicting + std::vector compressed((u8*)it->second, (u8*)it->second + PageSize); + compressed_store[evict] = std::move(compressed); + dirty_set.erase(evict); + } + +#if defined(__linux__) || defined(__ANDROID__) + munmap(it->second, PageSize); +#elif defined(_WIN32) + VirtualFree(it->second, 0, MEM_RELEASE); +#endif + page_map.erase(it); + } + } +} + +void* FaultManagedAllocator::GetOrAlloc(size_t addr) { + std::lock_guard guard(lock); + + if (page_map.count(addr)) { + Touch(addr); + return page_map[addr]; + } + +#if defined(__linux__) || defined(__ANDROID__) + void* mem = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mem == MAP_FAILED) { + LOG_ERROR(Render_Vulkan, "Failed to mmap memory for fault handler"); + return nullptr; + } +#elif defined(_WIN32) + void* mem = VirtualAlloc(nullptr, PageSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); + if (!mem) { + LOG_ERROR(Render_Vulkan, "Failed to VirtualAlloc memory for fault handler"); + return nullptr; + } +#endif + + if (compressed_store.count(addr)) { + // Decompress stored page data + std::memcpy(mem, compressed_store[addr].data(), compressed_store[addr].size()); + compressed_store.erase(addr); + } else { + std::memset(mem, 0, PageSize); + } + + page_map[addr] = mem; + lru.push_front(addr); + dirty_set.insert(addr); + EnforceLimit(); + + return mem; +} + +#if defined(_WIN32) +// Static member initialization +FaultManagedAllocator* FaultManagedAllocator::current_instance = nullptr; + +LONG WINAPI FaultManagedAllocator::VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info) { + // Only handle access violations (page faults) + if (exception_info->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) { + return EXCEPTION_CONTINUE_SEARCH; + } + + if (!current_instance) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Get the faulting address - use ULONG_PTR for Windows + const ULONG_PTR fault_addr = static_cast(exception_info->ExceptionRecord->ExceptionInformation[1]); + const ULONG_PTR base_addr = reinterpret_cast(current_instance->base_address); + + // Check if the address is within our managed range + if (fault_addr < base_addr || + fault_addr >= (base_addr + static_cast(current_instance->memory_size))) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Calculate the base address of the page + const ULONG_PTR page_addr = fault_addr & ~(static_cast(PageSize) - 1); + const size_t relative_addr = static_cast(page_addr - base_addr); + + // Handle the fault by allocating memory + void* page = current_instance->GetOrAlloc(relative_addr); + if (!page) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // Copy the page data to the faulting address + DWORD old_protect; + void* target_addr = reinterpret_cast(page_addr); + + // Make the target page writable + if (VirtualProtect(target_addr, PageSize, PAGE_READWRITE, &old_protect)) { + std::memcpy(target_addr, page, PageSize); + // Restore original protection + VirtualProtect(target_addr, PageSize, old_protect, &old_protect); + return EXCEPTION_CONTINUE_EXECUTION; + } + + return EXCEPTION_CONTINUE_SEARCH; +} + +void FaultManagedAllocator::ExceptionHandlerThread() { + while (running) { + // Sleep to avoid busy waiting + Sleep(10); + } +} +#endif + +void FaultManagedAllocator::Initialize(void* base, size_t size) { +#if defined(__linux__) || defined(__ANDROID__) + uffd = static_cast(syscall(SYS_userfaultfd, O_CLOEXEC | O_NONBLOCK)); + if (uffd < 0) { + LOG_ERROR(Render_Vulkan, "Failed to create userfaultfd, fault handling disabled"); + return; + } + + struct uffdio_api api = { .api = UFFD_API }; + ioctl(uffd, UFFDIO_API, &api); + + struct uffdio_register reg = { + .range = { .start = (uintptr_t)base, .len = size }, + .mode = UFFDIO_REGISTER_MODE_MISSING + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) < 0) { + LOG_ERROR(Render_Vulkan, "Failed to register memory range with userfaultfd"); + close(uffd); + uffd = -1; + return; + } + + running = true; + fault_handler = std::thread(&FaultManagedAllocator::FaultThread, this); +#elif defined(_WIN32) + // Setup Windows memory for fault handling + base_address = base; + memory_size = size; + + // Reserve memory range but don't commit it yet - it will be demand-paged + DWORD oldProtect; + VirtualProtect(base, size, PAGE_NOACCESS, &oldProtect); + + // Install a vectored exception handler + current_instance = this; + AddVectoredExceptionHandler(1, VectoredExceptionHandler); + + running = true; + exception_handler = std::thread(&FaultManagedAllocator::ExceptionHandlerThread, this); + + LOG_INFO(Render_Vulkan, "Windows fault-managed memory initialized at {:p}, size: {}", + base, size); +#endif +} + +#if defined(__linux__) || defined(__ANDROID__) +void FaultManagedAllocator::FaultThread() { + struct pollfd pfd = { uffd, POLLIN, 0 }; + + while (running) { + if (poll(&pfd, 1, 10) > 0) { + struct uffd_msg msg; + read(uffd, &msg, sizeof(msg)); + + if (msg.event == UFFD_EVENT_PAGEFAULT) { + size_t addr = msg.arg.pagefault.address & ~(PageSize - 1); + void* page = GetOrAlloc(addr); + + if (page) { + struct uffdio_copy copy = { + .dst = (uintptr_t)addr, + .src = (uintptr_t)page, + .len = PageSize, + .mode = 0 + }; + + ioctl(uffd, UFFDIO_COPY, ©); + } + } + } + } +} +#endif + +void* FaultManagedAllocator::Translate(size_t addr) { + std::lock_guard guard(lock); + + size_t base = addr & ~(PageSize - 1); + if (!page_map.count(base)) { + return nullptr; + } + + Touch(base); + return (u8*)page_map[base] + (addr % PageSize); +} + +void FaultManagedAllocator::SaveSnapshot(const std::string& path) { + std::lock_guard guard(lock); + + std::ofstream out(path, std::ios::binary); + if (!out) { + LOG_ERROR(Render_Vulkan, "Failed to open snapshot file for writing: {}", path); + return; + } + + for (auto& [addr, mem] : page_map) { + out.write(reinterpret_cast(&addr), sizeof(addr)); + out.write(reinterpret_cast(mem), PageSize); + } + + LOG_INFO(Render_Vulkan, "Saved memory snapshot to {}", path); +} + +void FaultManagedAllocator::SaveDifferentialSnapshot(const std::string& path) { + std::lock_guard guard(lock); + + std::ofstream out(path, std::ios::binary); + if (!out) { + LOG_ERROR(Render_Vulkan, "Failed to open diff snapshot file for writing: {}", path); + return; + } + + size_t dirty_count = 0; + for (const auto& addr : dirty_set) { + if (page_map.count(addr)) { + out.write(reinterpret_cast(&addr), sizeof(addr)); + out.write(reinterpret_cast(page_map[addr]), PageSize); + dirty_count++; + } + } + + LOG_INFO(Render_Vulkan, "Saved differential snapshot to {} ({} dirty pages)", + path, dirty_count); +} + +void FaultManagedAllocator::ClearDirtySet() { + std::lock_guard guard(lock); + dirty_set.clear(); + LOG_DEBUG(Render_Vulkan, "Cleared dirty page tracking"); +} + +FaultManagedAllocator::~FaultManagedAllocator() { + running = false; + +#if defined(__linux__) || defined(__ANDROID__) + if (fault_handler.joinable()) { + fault_handler.join(); + } + + for (auto& [addr, mem] : page_map) { + munmap(mem, PageSize); + } + + if (uffd != -1) { + close(uffd); + } +#elif defined(_WIN32) + if (exception_handler.joinable()) { + exception_handler.join(); + } + + // Remove the vectored exception handler + RemoveVectoredExceptionHandler(VectoredExceptionHandler); + current_instance = nullptr; + + for (auto& [addr, mem] : page_map) { + VirtualFree(mem, 0, MEM_RELEASE); + } + + // Free the base memory if needed + if (base_address) { + VirtualFree(base_address, 0, MEM_RELEASE); + base_address = nullptr; + } +#endif +} +#endif // defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + +HybridMemory::HybridMemory(const Device& device_, MemoryAllocator& allocator, size_t reuse_history) + : device(device_), memory_allocator(allocator), reuse_manager(reuse_history) { +} + +HybridMemory::~HybridMemory() = default; + +void HybridMemory::InitializeGuestMemory(void* base, size_t size) { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + fmaa.Initialize(base, size); + LOG_INFO(Render_Vulkan, "Initialized fault-managed guest memory at {:p}, size: {}", + base, size); +#else + LOG_INFO(Render_Vulkan, "Fault-managed memory not supported on this platform"); +#endif +} + +void* HybridMemory::TranslateAddress(size_t addr) { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + return fmaa.Translate(addr); +#else + return nullptr; +#endif +} + +ComputeBuffer HybridMemory::CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, + MemoryUsage memory_type) { + ComputeBuffer buffer; + buffer.size = size; + + VkBufferCreateInfo buffer_ci = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = usage | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + // Using CreateBuffer directly handles memory allocation internally + buffer.buffer = memory_allocator.CreateBuffer(buffer_ci, memory_type); + + LOG_DEBUG(Render_Vulkan, "Created compute buffer: size={}, usage={:x}", + size, usage); + + return buffer; +} + +void HybridMemory::SaveSnapshot(const std::string& path) { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + fmaa.SaveSnapshot(path); +#else + LOG_ERROR(Render_Vulkan, "Memory snapshots not supported on this platform"); +#endif +} + +void HybridMemory::SaveDifferentialSnapshot(const std::string& path) { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + fmaa.SaveDifferentialSnapshot(path); +#else + LOG_ERROR(Render_Vulkan, "Differential memory snapshots not supported on this platform"); +#endif +} + +void HybridMemory::ResetDirtyTracking() { +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + fmaa.ClearDirtySet(); +#endif +} + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/hybrid_memory.h b/src/video_core/vulkan_common/hybrid_memory.h new file mode 100644 index 0000000000..faff2de065 --- /dev/null +++ b/src/video_core/vulkan_common/hybrid_memory.h @@ -0,0 +1,119 @@ +// SPDX-FileCopyrightText: Copyright 2025 citron Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +struct ComputeBuffer { + vk::Buffer buffer{}; + VkDeviceSize size = 0; +}; + +class PredictiveReuseManager { +public: + explicit PredictiveReuseManager(size_t history_size) : max_history{history_size} {} + + void RecordUsage(u64 address, u64 size, bool write_access); + bool IsHotRegion(u64 address, u64 size) const; + void EvictRegion(u64 address, u64 size); + void ClearHistory(); + +private: + struct MemoryAccess { + u64 address; + u64 size; + bool write_access; + u64 timestamp; + }; + + std::vector access_history; + const size_t max_history; + u64 current_timestamp{0}; + mutable std::mutex mutex; +}; + +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) +class FaultManagedAllocator { +public: + static constexpr size_t PageSize = 0x1000; + static constexpr size_t MaxPages = 16384; + + void Initialize(void* base, size_t size); + void* Translate(size_t addr); + void SaveSnapshot(const std::string& path); + void SaveDifferentialSnapshot(const std::string& path); + void ClearDirtySet(); + ~FaultManagedAllocator(); + +private: + std::map page_map; + std::list lru; + std::set dirty_set; + std::unordered_map> compressed_store; + std::mutex lock; + +#if defined(__linux__) || defined(__ANDROID__) + int uffd = -1; + std::atomic running{false}; + std::thread fault_handler; + void FaultThread(); +#elif defined(_WIN32) + void* base_address = nullptr; + size_t memory_size = 0; + HANDLE exception_port = nullptr; + std::atomic running{false}; + std::thread exception_handler; + void ExceptionHandlerThread(); + static LONG WINAPI VectoredExceptionHandler(PEXCEPTION_POINTERS exception_info); + static FaultManagedAllocator* current_instance; +#endif + + void Touch(size_t addr); + void EnforceLimit(); + void* GetOrAlloc(size_t addr); +}; +#endif + +class HybridMemory { +public: + explicit HybridMemory(const Device& device, MemoryAllocator& allocator, size_t reuse_history = 32); + ~HybridMemory(); + + void InitializeGuestMemory(void* base, size_t size); + void* TranslateAddress(size_t addr); + + ComputeBuffer CreateComputeBuffer(VkDeviceSize size, VkBufferUsageFlags usage, MemoryUsage memory_type); + + void SaveSnapshot(const std::string& path); + void SaveDifferentialSnapshot(const std::string& path); + void ResetDirtyTracking(); + +private: + const Device& device; + MemoryAllocator& memory_allocator; + PredictiveReuseManager reuse_manager; + +#if defined(__linux__) || defined(__ANDROID__) || defined(_WIN32) + FaultManagedAllocator fmaa; +#endif +}; + +} // namespace Vulkan \ No newline at end of file diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 5e2c5c645f..f2d554a65a 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -594,9 +594,10 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR dynamic_state3_enables = false; } } - if (extensions.extended_dynamic_state3 && is_amd_driver) { + if (extensions.extended_dynamic_state3 && (is_amd_driver || driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY)) { + // AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation LOG_WARNING(Render_Vulkan, - "AMD drivers have broken extendedDynamicState3ColorBlendEquation"); + "AMD and Samsung drivers have broken extendedDynamicState3ColorBlendEquation"); features.extended_dynamic_state3.extendedDynamicState3ColorBlendEnable = false; features.extended_dynamic_state3.extendedDynamicState3ColorBlendEquation = false; dynamic_state3_blending = false; @@ -919,7 +920,8 @@ bool Device::ShouldBoostClocks() const { driver_id == VK_DRIVER_ID_MESA_RADV || driver_id == VK_DRIVER_ID_NVIDIA_PROPRIETARY || driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS || driver_id == VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA || - driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP; + driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP || + driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY; const bool is_steam_deck = (vendor_id == 0x1002 && device_id == 0x163F) || (vendor_id == 0x1002 && device_id == 0x1435); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 54331688e3..c54ab3d09b 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -140,6 +140,10 @@ public: return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; } + [[nodiscard]] bool IsEmpty() const noexcept { + return commits.empty(); + } + private: [[nodiscard]] static constexpr u32 ShiftType(u32 type) { return 1U << type; @@ -284,39 +288,78 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M const u32 type_mask = requirements.memoryTypeBits; const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); + + // First attempt if (std::optional commit = TryCommit(requirements, flags)) { return std::move(*commit); } - // Commit has failed, allocate more memory. + + // Commit has failed, allocate more memory const u64 chunk_size = AllocationChunkSize(requirements.size); - if (!TryAllocMemory(flags, type_mask, chunk_size)) { - // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); + if (TryAllocMemory(flags, type_mask, chunk_size)) { + return TryCommit(requirements, flags).value(); } - // Commit again, this time it won't fail since there's a fresh allocation above. - // If it does, there's a bug. - return TryCommit(requirements, flags).value(); + + // Memory allocation failed - try to recover by releasing empty allocations + for (auto it = allocations.begin(); it != allocations.end();) { + if ((*it)->IsEmpty()) { + it = allocations.erase(it); + } else { + ++it; + } + } + + // Try allocating again after cleanup + if (TryAllocMemory(flags, type_mask, chunk_size)) { + return TryCommit(requirements, flags).value(); + } + + // If still failing, try with non-device-local memory as a last resort + if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) { + const VkMemoryPropertyFlags fallback_flags = flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + if (TryAllocMemory(fallback_flags, type_mask, chunk_size)) { + if (auto commit = TryCommit(requirements, fallback_flags)) { + LOG_WARNING(Render_Vulkan, "Falling back to non-device-local memory due to OOM"); + return std::move(*commit); + } + } + } + + LOG_CRITICAL(Render_Vulkan, "Vulkan memory allocation failed - out of device memory"); + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); } bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { - const u32 type = FindType(flags, type_mask).value(); + const auto type_opt = FindType(flags, type_mask); + if (!type_opt) { + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Try to allocate non device local memory + return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size); + } + return false; + } + + const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? + Common::AlignUp(size, 4096) : // Adreno requires 4KB alignment + size; // Others (NVIDIA, AMD, Intel, etc) + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = nullptr, - .allocationSize = size, - .memoryTypeIndex = type, + .allocationSize = aligned_size, + .memoryTypeIndex = *type_opt, }); + if (!memory) { if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { // Try to allocate non device local memory return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size); - } else { - // RIP - return false; } + return false; } + allocations.push_back( - std::make_unique(this, std::move(memory), flags, size, type)); + std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); return true; } diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 0549e8ae44..440467010b 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -71,6 +71,10 @@ std::unique_ptr InitializeTranslations(QWidget* parent) { "faster or not.\n200% for a 30 FPS game is 60 FPS, and for a " "60 FPS game it will be 120 FPS.\nDisabling it means unlocking the framerate to the " "maximum your PC can reach.")); + INSERT(Settings, sync_core_speed, tr("Synchronize Core Speed"), + tr("Synchronizes CPU core speed with the game's maximum rendering speed to boost FPS without affecting game speed (animations, physics, etc.).\n" + "Compatibility varies by game; many (especially older ones) may not respond well.\n" + "Can help reduce stuttering at lower framerates.")); // Cpu INSERT(Settings, cpu_accuracy, tr("Accuracy:"), @@ -143,6 +147,10 @@ std::unique_ptr InitializeTranslations(QWidget* parent) { tr("Allows saving shaders to storage for faster loading on following game " "boots.\nDisabling " "it is only intended for debugging.")); + INSERT(Settings, optimize_spirv_output, tr("Optimize SPIRV output shader"), + tr("Runs an additional optimization pass over generated SPIRV shaders.\n" + "Will increase time required for shader compilation.\nMay slightly improve " + "performance.\nThis feature is experimental.")); INSERT( Settings, use_asynchronous_gpu_emulation, tr("Use asynchronous GPU emulation"), tr("Uses an extra CPU thread for rendering.\nThis option should always remain enabled.")); @@ -306,7 +314,12 @@ std::unique_ptr ComboboxEnumeration(QWidget* parent) { PAIR(AppletMode, HLE, tr("Custom frontend")), PAIR(AppletMode, LLE, tr("Real applet")), }}); - + translations->insert({Settings::EnumMetadata::Index(), + { + PAIR(SpirvOptimizeMode, Never, tr("Never")), + PAIR(SpirvOptimizeMode, OnLoad, tr("On Load")), + PAIR(SpirvOptimizeMode, Always, tr("Always")), + }}); translations->insert({Settings::EnumMetadata::Index(), { PAIR(AstcDecodeMode, Cpu, tr("CPU")), diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index 61704c6fa6..4bbdb1e11c 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -277,3 +277,4 @@ Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); Q_DECLARE_METATYPE(Settings::AstcRecompression); Q_DECLARE_METATYPE(Settings::AstcDecodeMode); +Q_DECLARE_METATYPE(Settings::SpirvOptimizeMode); \ No newline at end of file