diff --git a/pcsx2/SPU2/Debug.cpp b/pcsx2/SPU2/Debug.cpp index 0b6e0d7aba87b..9bc5c7053c435 100644 --- a/pcsx2/SPU2/Debug.cpp +++ b/pcsx2/SPU2/Debug.cpp @@ -200,7 +200,6 @@ void SPU2::DoFullDump() fprintf(dump, " - Sound Start Address: %x\n", Cores[c].Voices[v].StartA); fprintf(dump, " - Next Data Address: %x\n", Cores[c].Voices[v].NextA); fprintf(dump, " - Play Status: %s\n", (Cores[c].Voices[v].ADSR.Phase > 0) ? "Playing" : "Not Playing"); - fprintf(dump, " - Block Sample: %d\n", Cores[c].Voices[v].SCurrent); } fprintf(dump, "#### END OF DUMP.\n\n"); } diff --git a/pcsx2/SPU2/Mixer.cpp b/pcsx2/SPU2/Mixer.cpp index 97ee5a9a529a6..f807c0eb79eb9 100644 --- a/pcsx2/SPU2/Mixer.cpp +++ b/pcsx2/SPU2/Mixer.cpp @@ -89,55 +89,13 @@ int g_counter_cache_ignores = 0; #define XAFLAG_LOOP (1ul << 1) #define XAFLAG_LOOP_START (1ul << 2) -static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx) +static __forceinline void GetNextDataBuffered(V_Core& thiscore, uint voiceidx) { V_Voice& vc(thiscore.Voices[voiceidx]); - if ((vc.SCurrent & 3) == 0) + if (vc.SBuffer == nullptr) { - IncrementNextA(thiscore, voiceidx); - - if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent - { - if (vc.LoopFlags & XAFLAG_LOOP_END) - { - thiscore.Regs.ENDX |= (1 << voiceidx); - vc.NextA = vc.LoopStartA | 1; - if (!(vc.LoopFlags & XAFLAG_LOOP)) - { - vc.Stop(); - - if (IsDevBuild) - { - if (SPU2::MsgVoiceOff()) - SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx); - } - } - } - else - vc.NextA++; // no, don't IncrementNextA here. We haven't read the header yet. - } - } - - if (vc.SCurrent == 28) - { - vc.SCurrent = 0; - - // We'll need the loop flags and buffer pointers regardless of cache status: - - for (int i = 0; i < 2; i++) - if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8)) - SetIrqCall(i); - - s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8); - vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte. - - if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode) - { - vc.LoopStartA = vc.NextA & 0xFFFF8; - } - - const int cacheIdx = vc.NextA / pcm_WordsPerBlock; + const int cacheIdx = (vc.NextA & 0xFFFF8) / pcm_WordsPerBlock; PcmCacheEntry& cacheLine = pcm_cache_data[cacheIdx]; vc.SBuffer = cacheLine.Sampledata; @@ -172,46 +130,18 @@ static __forceinline s32 GetNextDataBuffered(V_Core& thiscore, uint voiceidx) g_counter_cache_misses++; } - XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2); - } - } - - return vc.SBuffer[vc.SCurrent++]; -} - -static __forceinline void GetNextDataDummy(V_Core& thiscore, uint voiceidx) -{ - V_Voice& vc(thiscore.Voices[voiceidx]); - - IncrementNextA(thiscore, voiceidx); - if ((vc.NextA & 7) == 0) // vc.SCurrent == 24 equivalent - { - if (vc.LoopFlags & XAFLAG_LOOP_END) - { - thiscore.Regs.ENDX |= (1 << voiceidx); - vc.NextA = vc.LoopStartA | 1; + s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8); + XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2); } - else - vc.NextA++; // no, don't IncrementNextA here. We haven't read the header yet. } - if (vc.SCurrent == 28) + // Get the sample index for NextA, we have to subtract 1 to ignore the loop header + int sampleIdx = ((vc.NextA % pcm_WordsPerBlock) - 1) * 4; + for (int i = 0; i < 4; i++) { - for (int i = 0; i < 2; i++) - if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8)) - SetIrqCall(i); - - vc.LoopFlags = *GetMemPtr(vc.NextA & 0xFFFF8) >> 8; // grab loop flags from the upper byte. - - if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode) - vc.LoopStartA = vc.NextA & 0xFFFF8; - - vc.SCurrent = 0; + vc.DecodeFifo[(vc.DecPosWrite + i) % 32] = vc.SBuffer[sampleIdx + i]; } - - vc.SP -= 0x1000 * (4 - (vc.SCurrent & 3)); - vc.SCurrent += 4 - (vc.SCurrent & 3); } ///////////////////////////////////////////////////////////////////////////////////////// @@ -237,6 +167,69 @@ static __forceinline StereoOut32 ApplyVolume(const StereoOut32& data, const V_Vo ApplyVolume(data.Right, volume.Right.Value)); } +static __forceinline void UpdateBlockHeader(V_Core& thiscore, uint voiceidx) +{ + V_Voice& vc(thiscore.Voices[voiceidx]); + + for (int i = 0; i < 2; i++) + if (Cores[i].IRQEnable && Cores[i].IRQA == (vc.NextA & 0xFFFF8)) + SetIrqCall(i); + + s16* memptr = GetMemPtr(vc.NextA & 0xFFFF8); + vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte. + + if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode) + { + vc.LoopStartA = vc.NextA & 0xFFFF8; + } +} + +static __forceinline void DecodeSamples(uint coreidx, uint voiceidx) +{ + V_Core& thiscore(Cores[coreidx]); + V_Voice& vc(thiscore.Voices[voiceidx]); + + // Update the block header on every audio frame + UpdateBlockHeader(thiscore, voiceidx); + + // When a voice is started at 0 pitch, NAX quickly advances to SSA + 5 + // So that would mean the decode buffer holds around 12 samples + if (((int)(vc.DecPosWrite - vc.DecPosRead)) > 12) { + // Sufficient data buffered + return; + } + + if (vc.ADSR.Phase > V_ADSR::PHASE_STOPPED) + { + GetNextDataBuffered(thiscore, voiceidx); + } + + vc.DecPosWrite += 4; + + IncrementNextA(thiscore, voiceidx); + if ((vc.NextA & 7) == 0) + { + if (vc.LoopFlags & XAFLAG_LOOP_END) + { + thiscore.Regs.ENDX |= (1 << voiceidx); + vc.NextA = vc.LoopStartA; + if (!(vc.LoopFlags & XAFLAG_LOOP)) + { + vc.Stop(); + + if (IsDevBuild) + { + if (SPU2::MsgVoiceOff()) + SPU2::ConLog("* SPU2: Voice Off by EndPoint: %d \n", voiceidx); + } + } + } + + IncrementNextA(thiscore, voiceidx); + vc.SBuffer = nullptr; + } +} + static void __forceinline UpdatePitch(uint coreidx, uint voiceidx) { V_Voice& vc(Cores[coreidx].Voices[voiceidx]); @@ -278,33 +271,27 @@ static __forceinline void CalculateADSR(V_Core& thiscore, uint voiceidx) pxAssume(vc.ADSR.Value >= 0); // ADSR should never be negative... } -__forceinline static s32 GaussianInterpolate(s32 pv4, s32 pv3, s32 pv2, s32 pv1, s32 i) +static __forceinline void ConsumeSamples(V_Core& thiscore, uint voiceidx) { - s32 out = 0; - out = (interpTable[i][0] * pv4) >> 15; - out += (interpTable[i][1] * pv3) >> 15; - out += (interpTable[i][2] * pv2) >> 15; - out += (interpTable[i][3] * pv1) >> 15; + V_Voice& vc(thiscore.Voices[voiceidx]); - return out; + int consumed = vc.SP >> 12; + vc.SP &= 0xfff; + vc.DecPosRead += consumed; } static __forceinline s32 GetVoiceValues(V_Core& thiscore, uint voiceidx) { V_Voice& vc(thiscore.Voices[voiceidx]); - while (vc.SP >= 0) - { - vc.PV4 = vc.PV3; - vc.PV3 = vc.PV2; - vc.PV2 = vc.PV1; - vc.PV1 = GetNextDataBuffered(thiscore, voiceidx); - vc.SP -= 0x1000; - } - - const s32 mu = vc.SP + 0x1000; + int phase = (vc.SP & 0x0ff0) >> 4; + s32 out = 0; + out += (interpTable[phase][0] * vc.DecodeFifo[(vc.DecPosRead + 0) % 32]) >> 15; + out += (interpTable[phase][1] * vc.DecodeFifo[(vc.DecPosRead + 1) % 32]) >> 15; + out += (interpTable[phase][2] * vc.DecodeFifo[(vc.DecPosRead + 2) % 32]) >> 15; + out += (interpTable[phase][3] * vc.DecodeFifo[(vc.DecPosRead + 3) % 32]) >> 15; - return GaussianInterpolate(vc.PV4, vc.PV3, vc.PV2, vc.PV1, (mu & 0x0ff0) >> 4); + return out; } // This is Dr. Hell's noise algorithm as implemented in pcsxr @@ -382,21 +369,13 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx) V_Core& thiscore(Cores[coreidx]); V_Voice& vc(thiscore.Voices[voiceidx]); - // If this assertion fails, it mans SCurrent is being corrupted somewhere, or is not initialized - // properly. Invalid values in SCurrent will cause errant IRQs and corrupted audio. - pxAssertMsg((vc.SCurrent <= 28) && (vc.SCurrent != 0), "Current sample should always range from 1->28"); - // Most games don't use much volume slide effects. So only call the UpdateVolume // methods when needed by checking the flag outside the method here... // (Note: Ys 6 : Ark of Nephistm uses these effects) vc.Volume.Update(); - // SPU2 Note: The spu2 continues to process voices for eternity, always, so we - // have to run through all the motions of updating the voice regardless of it's - // audible status. Otherwise IRQs might not trigger and emulation might fail. - - UpdatePitch(coreidx, voiceidx); + DecodeSamples(coreidx, voiceidx); StereoOut32 voiceOut(0, 0); s32 Value = 0; @@ -419,11 +398,14 @@ static __forceinline StereoOut32 MixVoice(uint coreidx, uint voiceidx) voiceOut = ApplyVolume(StereoOut32(Value, Value), vc.Volume); } - else - { - while (vc.SP >= 0) - GetNextDataDummy(thiscore, voiceidx); // Dummy is enough - } + + // SPU2 Note: The spu2 continues to process voices for eternity, always, so we + // have to run through all the motions of updating the voice regardless of it's + // audible status. Otherwise IRQs might not trigger and emulation might fail. + + UpdatePitch(coreidx, voiceidx); + + ConsumeSamples(thiscore, voiceidx); // Write-back of raw voice data (post ADSR applied) if (voiceidx == 1) @@ -533,7 +515,8 @@ StereoOut32 V_Core::Mix(const VoiceMixSet& inVoices, const StereoOut32& Input, c return TD + ApplyVolume(RV, FxVol); } -static StereoOut32 DCFilter(StereoOut32 input) { +static StereoOut32 DCFilter(StereoOut32 input) +{ // A simple DC blocking high-pass filter // Implementation from http://peabody.sapp.org/class/dmp2/lab/dcblock/ // The magic number 0x7f5c is ceil(INT16_MAX * 0.995) @@ -634,9 +617,9 @@ __forceinline void spu2Mix() if (SPU2::MsgCache()) { SPU2::ConLog(" * SPU2 > CacheStats > Hits: %d Misses: %d Ignores: %d\n", - g_counter_cache_hits, - g_counter_cache_misses, - g_counter_cache_ignores); + g_counter_cache_hits, + g_counter_cache_misses, + g_counter_cache_ignores); } g_counter_cache_hits = diff --git a/pcsx2/SPU2/defs.h b/pcsx2/SPU2/defs.h index 80713657d8380..2fd12015c5604 100644 --- a/pcsx2/SPU2/defs.h +++ b/pcsx2/SPU2/defs.h @@ -256,29 +256,16 @@ struct V_Voice // Sample pointer (19:12 bit fixed point) s32 SP; - // Sample pointer for Cubic Interpolation - // Cubic interpolation mixes a sample behind Linear, so that it - // can have sample data to either side of the end points from which - // to extrapolate. This SP represents that late sample position. - s32 SPc; - - // Previous sample values - used for interpolation - // Inverted order of these members to match the access order in the - // code (might improve cache hits). - s32 PV4; - s32 PV3; - s32 PV2; - s32 PV1; - // Last outputted audio value, used for voice modulation. s32 OutX; - s32 NextCrest; // temp value for Crest calculation // SBuffer now points directly to an ADPCM cache entry. s16* SBuffer; - // sample position within the current decoded packet. - s32 SCurrent; + // Each voice has a buffer of decoded samples + s32 DecodeFifo[32]; + u32 DecPosWrite; + u32 DecPosRead; // it takes a few ticks for voices to start on the real SPU2? void Start(); diff --git a/pcsx2/SPU2/spu2sys.cpp b/pcsx2/SPU2/spu2sys.cpp index 18a5690a39772..54086c78ad484 100644 --- a/pcsx2/SPU2/spu2sys.cpp +++ b/pcsx2/SPU2/spu2sys.cpp @@ -181,7 +181,6 @@ void V_Core::Init(int index) VoiceGates[v].WetR = -1; Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max; - Voices[v].SCurrent = 28; Voices[v].ADSR.Counter = 0; Voices[v].ADSR.Value = 0; @@ -190,6 +189,10 @@ void V_Core::Init(int index) Voices[v].NextA = 0x2801; Voices[v].StartA = 0x2800; Voices[v].LoopStartA = 0x2800; + + memset(Voices[v].DecodeFifo, 0, sizeof(Voices[v].DecodeFifo)); + Voices[v].DecPosRead = 0; + Voices[v].DecPosWrite = 0; } DMAICounter = 0; @@ -212,23 +215,18 @@ void V_Voice::Start() } ADSR.Attack(); - SCurrent = 28; LoopMode = 0; - // When SP >= 0 the next sample will be grabbed, we don't want this to happen - // instantly because in the case of pitch being 0 we want to delay getting - // the next block header. This is a hack to work around the fact that unlike - // the HW we don't update the block header on every cycle. - SP = -1; + SP = 0; LoopFlags = 0; NextA = StartA | 1; Prev1 = 0; Prev2 = 0; - PV1 = PV2 = 0; - PV3 = PV4 = 0; - NextCrest = -0x8000; + SBuffer = nullptr; + DecPosRead = 0; + DecPosWrite = 0; } void V_Voice::Stop() @@ -1015,12 +1013,10 @@ static void RegWrite_VoiceAddr(u16 value) // Wallace And Gromit: Curse Of The Were-Rabbit. thisvoice.NextA = ((u32)(value & 0x0F) << 16) | (thisvoice.NextA & 0xFFF8) | 1; - thisvoice.SCurrent = 28; break; case 5: thisvoice.NextA = (thisvoice.NextA & 0x0F0000) | (value & 0xFFF8) | 1; - thisvoice.SCurrent = 28; break; } } @@ -1238,7 +1234,6 @@ static void RegWrite_Core(u16 value) for (uint v = 0; v < 24; ++v) { Cores[1].Voices[v].Volume = V_VolumeSlideLR(0, 0); // V_VolumeSlideLR::Max; - Cores[1].Voices[v].SCurrent = 28; Cores[1].Voices[v].ADSR.Value = 0; Cores[1].Voices[v].ADSR.Phase = 0; diff --git a/pcsx2/SaveState.h b/pcsx2/SaveState.h index f0739194770c6..11bc3a5e90592 100644 --- a/pcsx2/SaveState.h +++ b/pcsx2/SaveState.h @@ -25,7 +25,7 @@ enum class FreezeAction // [SAVEVERSION+] // This informs the auto updater that the users savestates will be invalidated. -static const u32 g_SaveVersion = (0x9A55 << 16) | 0x0000; +static const u32 g_SaveVersion = (0x9A57 << 16) | 0x0000; // the freezing data between submodules and core