diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 64f50b9..c350404 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -167,7 +167,7 @@ jobs: - name: Test under emulation run: > ctest --test-dir build --output-on-failure - -E 'AsrcQuality|AsrcLock|TwoThreadStress|TransparentPrototypeMeetsSpec|MultiChannel\.' + -E 'AsrcQuality|AsrcLock|TwoThreadStress|TransparentPrototypeMeetsSpec|MultiChannel\.|Feasibility|Reset\.' # Cross-compile for Arm Cortex-M55 (bare metal, newlib + semihosting) and # run the emulation-sized test subset on QEMU's MPS3 AN547 board model. diff --git a/README.md b/README.md index 6596d53..917d14c 100644 --- a/README.md +++ b/README.md @@ -143,8 +143,18 @@ latency = targetLatencyFrames + (L·T − 1)/(2L) [input frames] `designedLatencySeconds()` reports the figure; the FIFO term breathes by a fraction of the block size as the servo tracks drift. The filter is linear phase. For lower latency use `FilterSpec::fast()` (~16-frame group delay) -and a smaller `targetLatencyFrames`; the FIFO setpoint must stay above the -peak occupancy excursion of your push/pull block jitter. +and a smaller `targetLatencyFrames`. + +**The setpoint must exceed the pull block size** — a pull synthesizes from +frames already buffered, so a setpoint at or below the callback size is +infeasible and would drain into a permanent dropout cycle. The converter +enforces this automatically: when it observes pull blocks larger than the +configured setpoint it raises the effective setpoint (block + ~half-block +margin, bounded by FIFO capacity) and reports the value in +`Status::effectiveTargetLatencyFrames`; latency follows the raised +setpoint. Callbacks above ~340 frames also need `fifoFrames` sized +explicitly. The setpoint must additionally stay above the peak occupancy +excursion of your push/pull jitter, as before. ## Measured performance diff --git a/include/srt/asrc.hpp b/include/srt/asrc.hpp index 57206ca..a26291d 100644 --- a/include/srt/asrc.hpp +++ b/include/srt/asrc.hpp @@ -3,9 +3,12 @@ #ifndef SRT_ASRC_HPP #define SRT_ASRC_HPP +#include #include #include +#include #include +#include #include #include @@ -16,9 +19,9 @@ namespace srt { -/// Converter configuration. The defaults realize the whitepaper's worked -/// budget: ~1 ms core latency (FIFO setpoint 48 frames + ~24 frames filter -/// group delay) at 48 kHz, transparent for clocks within +/-1000 ppm. +/// Converter configuration. The defaults give ~1.5 ms designed latency at +/// 48 kHz (FIFO setpoint 48 frames + ~24 frames filter group delay; see +/// the README latency section), transparent for clocks within +/-1000 ppm. struct Config { double sampleRateHz = 48000.0; ///< nominal rate of BOTH clock domains std::size_t channels = 2; @@ -72,6 +75,11 @@ struct Status { std::uint64_t overruns = 0; ///< push() calls that could not accept every ///< offered frame (FIFO full; excess dropped) std::uint64_t resyncs = 0; ///< hard occupancy resyncs (high watermark) + /// The setpoint actually in force. Starts at Config::targetLatencyFrames + /// and is raised automatically when pull() blocks larger than the + /// setpoint are observed (see pull()); differs from the configured value + /// exactly when that adaptation has occurred. + std::uint64_t effectiveTargetLatencyFrames = 0; }; /// Near-unity asynchronous sample rate converter between two clock domains. @@ -93,11 +101,22 @@ class BasicAsyncSampleRateConverter { resampler_(bank_, cfg_.channels, kPopChunkFrames), ring_(ringCapacityElems(cfg_, bank_.taps())), servo_(cfg_.servo, cfg_.sampleRateHz, static_cast(cfg_.targetLatencyFrames)), + targetFrames_(cfg_.targetLatencyFrames), fillThresholdFrames_(cfg_.targetLatencyFrames + bank_.taps()), highWaterFrames_(std::max(3 * cfg_.targetLatencyFrames, fillThresholdFrames_ + cfg_.targetLatencyFrames)) { if (ring_.capacity() / cfg_.channels <= highWaterFrames_) throw std::invalid_argument("AsyncSampleRateConverter: fifoFrames too small"); + // Largest setpoint the FIFO capacity supports while keeping the + // high-watermark relation; bounds the adaptive raise in pull(). + const std::size_t capFrames = ring_.capacity() / cfg_.channels; + const std::size_t taps = bank_.taps(); + maxTargetFrames_ = std::max(cfg_.targetLatencyFrames, + std::min((capFrames - 1) / 3, capFrames > taps + 1 + ? (capFrames - taps - 1) / 2 + : cfg_.targetLatencyFrames)); + effectiveTarget_.store(static_cast(targetFrames_), + std::memory_order_relaxed); } BasicAsyncSampleRateConverter(const BasicAsyncSampleRateConverter&) = delete; @@ -117,14 +136,43 @@ class BasicAsyncSampleRateConverter { /// Consumer thread: produce exactly `frames` interleaved output frames at /// the output clock. Silence-pads while filling and on underrun, and /// fades the first kFadeFrames frames in after every (re)fill so dropout - /// recovery does not click. Returns the number of frames synthesized - /// from real input. + /// recovery does not click. (The dropout onset itself and a hard-resync + /// splice are unfaded cuts: there is nothing valid to fade to at the + /// moment they occur.) Returns the number of frames synthesized from + /// real input. std::size_t pull(S* interleaved, std::size_t frames) noexcept { const std::size_t ch = cfg_.channels; const auto popFn = [this](S* dst, std::size_t maxFrames) noexcept { return ring_.read(dst, maxFrames * cfg_.channels) / cfg_.channels; }; + // Feasibility: a pull must synthesize from frames already buffered, + // so the occupancy setpoint must exceed the pull block size or the + // loop drains into a permanent underrun limit cycle (dropouts every + // few hundred ms, never locking). Raise the effective setpoint to + // the largest observed block plus slew/sawtooth margin, bounded by + // FIFO capacity; the servo slews to the new setpoint glitch-free + // (integrator kept, occupancy only grows). Cost: latency follows + // the raised setpoint — see Status::effectiveTargetLatencyFrames. + if (frames > observedMaxPull_) { + observedMaxPull_ = frames; + // Margin sized to the block-beat sawtooth (~half the block) so + // the entry occupancy never grazes the pull size; configs that + // already satisfy it (e.g. the 32-frame default transfer against + // the 48-frame default setpoint) are left exactly as configured. + const std::size_t needed = frames + std::max(frames / 2, kPopChunkFrames); + const std::size_t newTarget = + std::clamp(needed, cfg_.targetLatencyFrames, maxTargetFrames_); + if (newTarget > targetFrames_) { + targetFrames_ = newTarget; + fillThresholdFrames_ = newTarget + bank_.taps(); + highWaterFrames_ = std::max(3 * newTarget, fillThresholdFrames_ + newTarget); + servo_.setTarget(static_cast(newTarget)); + effectiveTarget_.store(static_cast(newTarget), + std::memory_order_relaxed); + } + } + double occ = backlogFrames(); if (filling_) { @@ -143,8 +191,15 @@ class BasicAsyncSampleRateConverter { } if (occ > static_cast(highWaterFrames_)) { // hard resync - const double target = static_cast(cfg_.targetLatencyFrames); - const auto dropFrames = static_cast(occ - target); + const double target = static_cast(targetFrames_); + // The discard can only come from the ring; frames staged in the + // resampler scratch are part of occ but not discardable. Clamp, + // or a setpoint below the staged count drains the ring entirely + // and cascades straight back into Filling. + const std::size_t ringFrames = ring_.readAvailable() / ch; + const double excess = occ - target; + const std::size_t dropFrames = + std::min(ringFrames, excess > 0.0 ? static_cast(excess) : 0); ring_.discard(dropFrames * ch); resyncs_.fetch_add(1, std::memory_order_relaxed); occ = backlogFrames(); @@ -178,6 +233,7 @@ class BasicAsyncSampleRateConverter { s.underruns = underruns_.load(std::memory_order_relaxed); s.overruns = overruns_.load(std::memory_order_relaxed); s.resyncs = resyncs_.load(std::memory_order_relaxed); + s.effectiveTargetLatencyFrames = effectiveTarget_.load(std::memory_order_relaxed); return s; } @@ -191,10 +247,12 @@ class BasicAsyncSampleRateConverter { publishStatus(); } - /// Nominal design latency: FIFO setpoint + filter group delay. The actual - /// figure breathes by a fraction of a frame as the servo tracks drift. + /// Nominal design latency: FIFO setpoint + filter group delay. Uses the + /// effective (possibly adaptively raised) setpoint; the actual figure + /// breathes by a fraction of a frame as the servo tracks drift. double designedLatencySeconds() const noexcept { - return (static_cast(cfg_.targetLatencyFrames) + bank_.groupDelaySamples()) / + return (static_cast(effectiveTarget_.load(std::memory_order_relaxed)) + + bank_.groupDelaySamples()) / cfg_.sampleRateHz; } @@ -205,8 +263,12 @@ class BasicAsyncSampleRateConverter { static std::size_t ringCapacityElems(const Config& cfg, std::size_t taps) { const std::size_t fillThreshold = cfg.targetLatencyFrames + taps; + // The 1024-frame floor (21 ms at 48 kHz) leaves the adaptive + // setpoint raise enough capacity for pull blocks up to ~340 frames + // without explicit fifoFrames sizing; larger callbacks need + // fifoFrames set by the caller (the raise clamps to capacity). const std::size_t frames = - cfg.fifoFrames != 0 ? cfg.fifoFrames : std::max(256, 4 * fillThreshold); + cfg.fifoFrames != 0 ? cfg.fifoFrames : std::max(1024, 4 * fillThreshold); return std::bit_ceil(frames * cfg.channels); } @@ -254,9 +316,40 @@ class BasicAsyncSampleRateConverter { fill_.store(static_cast(servo_.smoothedOccupancy()), std::memory_order_relaxed); } + /// Rejects configurations that would otherwise construct successfully + /// and misbehave silently: NaN/Inf anywhere (a NaN sample rate designs + /// an all-NaN coefficient table), band edges whose sum exceeds the rate + /// (anti-image cutoff above input Nyquist passes images wholesale), a + /// deviation clamp large enough to overflow the Q0.64 eps conversion + /// (UB), and size products that overflow 32-bit size_t targets. static Config validated(Config cfg) { - if (cfg.channels == 0 || cfg.sampleRateHz <= 0.0 || cfg.targetLatencyFrames == 0) + const auto finite = [](double v) { return std::isfinite(v); }; + if (cfg.channels == 0 || cfg.targetLatencyFrames == 0 || !finite(cfg.sampleRateHz) || + cfg.sampleRateHz <= 0.0) throw std::invalid_argument("AsyncSampleRateConverter: bad Config"); + const FilterSpec& f = cfg.filter; + if (!finite(f.passbandHz) || !finite(f.stopbandHz) || !finite(f.stopbandAttenDb) || + f.passbandHz + f.stopbandHz > cfg.sampleRateHz) + throw std::invalid_argument("AsyncSampleRateConverter: bad FilterSpec " + "(need passbandHz + stopbandHz <= sampleRateHz)"); + const ServoConfig& sv = cfg.servo; + if (!finite(sv.acquireBandwidthHz) || !finite(sv.trackBandwidthHz) || + !finite(sv.quietBandwidthHz) || !finite(sv.damping) || !finite(sv.acquireSmootherHz) || + !finite(sv.trackSmootherHz) || !finite(sv.quietSmootherHz) || + !finite(sv.lockThresholdFrames) || !finite(sv.lockHoldSeconds) || + !finite(sv.quietHoldSeconds) || !finite(sv.unlockThresholdFrames) || + !finite(sv.maxDeviationPpm) || sv.maxDeviationPpm <= 0.0 || + sv.maxDeviationPpm > 100000.0) // |eps| stays far from the Q0.64 int64 limit + throw std::invalid_argument("AsyncSampleRateConverter: bad ServoConfig"); + // Size products evaluated later must not wrap on 32-bit size_t. + const auto mulOk = [](std::size_t a, std::size_t b) { + return b == 0 || a <= std::numeric_limits::max() / b; + }; + const std::size_t phases = std::bit_ceil(f.numPhases); + if (!mulOk(phases + 1, f.tapsPerPhase) || + !mulOk(cfg.targetLatencyFrames + f.tapsPerPhase, 8 * cfg.channels) || + !mulOk(cfg.fifoFrames, 2 * cfg.channels)) + throw std::invalid_argument("AsyncSampleRateConverter: Config sizes overflow"); return cfg; } @@ -267,8 +360,12 @@ class BasicAsyncSampleRateConverter { FractionalResampler resampler_; SpscRing ring_; PiServo servo_; + // Consumer-thread setpoint state (see the adaptive raise in pull()). + std::size_t targetFrames_; std::size_t fillThresholdFrames_; std::size_t highWaterFrames_; + std::size_t maxTargetFrames_ = 0; + std::size_t observedMaxPull_ = 0; bool filling_ = true; // consumer-thread state; mirrored into state_ std::size_t fadeFramesLeft_ = 0; // consumer-thread state @@ -279,6 +376,9 @@ class BasicAsyncSampleRateConverter { std::atomic state_{static_cast(State::Filling)}; std::atomic ppm_{0.0f}; std::atomic fill_{0.0f}; + // Effective setpoint mirror for status()/designedLatencySeconds() from + // any thread; written only by the consumer (32-bit: lock-free everywhere). + std::atomic effectiveTarget_{0}; std::atomic underruns_{0}; std::atomic overruns_{0}; std::atomic resyncs_{0}; diff --git a/include/srt/detail/kaiser.hpp b/include/srt/detail/kaiser.hpp index 5c551c2..e9ac8a3 100644 --- a/include/srt/detail/kaiser.hpp +++ b/include/srt/detail/kaiser.hpp @@ -52,8 +52,12 @@ inline double kaiserBeta(double attenDb) noexcept { /// (e.g. 8 kHz transition at 48 kHz -> 8000/48000) /// \return estimated taps per polyphase phase: N = (A - 8) / (2.285 * 2*pi * df) inline std::size_t estimateTaps(double attenDb, double transWidthNorm) noexcept { + // Clamp pathological inputs (attenDb < 8, non-positive width): the raw + // formula goes negative/infinite there and casting that to size_t is UB. + if (!(transWidthNorm > 0.0)) + return 4; const double n = (attenDb - 8.0) / (2.285 * 2.0 * std::numbers::pi * transWidthNorm); - return static_cast(std::ceil(n)); + return n > 4.0 ? static_cast(std::ceil(n)) : 4; } /// sin(pi x)/(pi x) with the removable singularity handled. diff --git a/include/srt/pi_servo.hpp b/include/srt/pi_servo.hpp index 0c738f6..a9ce41d 100644 --- a/include/srt/pi_servo.hpp +++ b/include/srt/pi_servo.hpp @@ -119,6 +119,12 @@ class PiServo { /// step. void seed(double occPlusMu) noexcept { lpFast_ = q1_ = q2_ = q3_ = occPlusMu; } + /// Move the occupancy setpoint. The integrator (ppm estimate) is kept and + /// the smoothers are left tracking the real observable, so the loop slews + /// to the new setpoint at its clamped rate with no transient discontinuity + /// — used by the converter's adaptive pull-block setpoint raise. + void setTarget(double targetFrames) noexcept { target_ = targetFrames; } + /// One control update; call once per pull() before synthesis. /// \param occFrames raw backlog in frames (FIFO + staged frames) /// \param mu current fractional read position; occ + mu changes diff --git a/include/srt/polyphase_filter.hpp b/include/srt/polyphase_filter.hpp index aa9f5ec..345604a 100644 --- a/include/srt/polyphase_filter.hpp +++ b/include/srt/polyphase_filter.hpp @@ -331,8 +331,9 @@ inline void dotRowsFrameMajor(const typename SampleTraits::Coeff* SRT_RESTRIC /// Streaming fractional-delay engine for one converter instance. /// -/// Owns the per-channel history delay lines (planar, contiguous windows with -/// periodic compaction) and the phase accumulator mu. Input frames are pulled +/// Owns the history delay lines (planar per-channel below the +/// channel-parallel threshold, frame-major above it — see the hist_ +/// field) and the phase accumulator mu. Input frames are pulled /// through a caller-supplied PopFn in small bulk chunks and deinterleaved into /// the histories as the integer read position advances. /// @@ -404,6 +405,11 @@ class FractionalResampler { /// the number produced; fewer than maxFrames means the source ran dry /// (underrun). RT-safe: no allocation, locks or exceptions. /// + /// Preconditions (the converter upholds both; direct users must too): + /// a successful prime() before the first process() — the window math + /// underflows otherwise — and reset()+reprime after any dry return, as + /// a dry advance==2 slip leaves history and phase one frame apart. + /// /// PopFn: std::size_t popFrames(S* dst, std::size_t maxFrames) — bulk-pops /// interleaved frames, returning the count actually delivered. template diff --git a/include/srt/sample_traits.hpp b/include/srt/sample_traits.hpp index 72085b3..b3eb832 100644 --- a/include/srt/sample_traits.hpp +++ b/include/srt/sample_traits.hpp @@ -128,10 +128,11 @@ struct SampleTraits { } static Coeff blend(Coeff a, Coeff b, BlendFactor fr) noexcept { - // Q14 + (Q15 * Q14) >> 15, in int64: the int32 product would fit - // today's coefficients (fr <= 32767 by construction), but only with - // ~5% margin against a worst-case adjacent-phase delta — not worth - // the silent invariant. One smull on 32-bit cores. + // Q14 + (Q15 * Q14) >> 15, in int64: the worst-case int32 product + // 32767 * 65535 = 2,147,385,345 sits 0.005% under INT32_MAX — + // real adjacent-phase deltas are tiny (|diff| <= 41 measured on the + // transparent table), but a margin that thin is not an invariant + // worth relying on silently. One smull on 32-bit cores. const std::int64_t diff = static_cast(b) - a; return static_cast(a + ((fr * diff) >> 15)); } diff --git a/include/srt/spsc_ring.hpp b/include/srt/spsc_ring.hpp index 577e062..e70b562 100644 --- a/include/srt/spsc_ring.hpp +++ b/include/srt/spsc_ring.hpp @@ -32,6 +32,8 @@ namespace srt { template class SpscRing { static_assert(std::is_trivially_copyable_v); + // The lock-free claim of the whole audio path rests on these indices. + static_assert(std::atomic::is_always_lock_free); public: /// Allocates the buffer; capacity is rounded up to a power of two. diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 999e5f2..defff1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -37,6 +37,7 @@ add_executable(srt_tests test_asrc_quality.cpp test_asrc_quality_16k.cpp test_fade.cpp + test_hardening.cpp test_latency.cpp test_multichannel.cpp) target_link_libraries(srt_tests PRIVATE diff --git a/tests/bare_metal_main.cpp b/tests/bare_metal_main.cpp index fd909db..4dba52e 100644 --- a/tests/bare_metal_main.cpp +++ b/tests/bare_metal_main.cpp @@ -18,7 +18,7 @@ int main() { ::testing::GTEST_FLAG(filter) = "-AsrcQuality*:AsrcLock.*:Servo.*:Kaiser.*MeetsSpec:" "FixedPoint.AsrcQuality*:" "FixedPoint.FullScaleSineDoesNotWrapQ15:" - "MultiChannel.*"; + "MultiChannel.*:Feasibility.*:Reset.*"; ::testing::InitGoogleTest(); const int rc = RUN_ALL_TESTS(); // CTest's pass criterion: printed only if we get all the way here, so a diff --git a/tests/test_hardening.cpp b/tests/test_hardening.cpp new file mode 100644 index 0000000..de94dc0 --- /dev/null +++ b/tests/test_hardening.cpp @@ -0,0 +1,267 @@ +// Regression tests from the package audit: the pull-block feasibility +// adaptation, hardened Config validation, resync accounting, consumer +// reset, degenerate call sizes, fixed-point fade-in — plus QuickQuality, +// an emulation-sized end-to-end SNR/saturation gate that (by name) runs +// on the bare-metal and Hexagon CI legs, which exclude the long quality +// suites and previously had no end-to-end SNR coverage at all. +#include +#include +#include +#include + +#include + +#include "srt/asrc.hpp" +#include "support/sine_analysis.hpp" +#include "support/two_clock_sim.hpp" + +namespace { + +constexpr double kFs = 48000.0; + +// Audit finding F1: with defaults, any pull block larger than the 48-frame +// setpoint used to drain into a permanent underrun limit cycle (64-frame +// callbacks dropped out every ~0.24 s forever). The converter now raises +// its effective setpoint to the observed block; these runs must lock with +// zero underruns and report the raise. +void runFeasibility(std::size_t pullBlock) { + srt::Config cfg; + cfg.channels = 1; + // Lock-stage promotion gates compare smoothed occupancy error against + // frame thresholds; with very coarse blocks the block-quantization + // sawtooth dwarfs the 1-frame default and Acquire->Track never + // promotes. Follow the ServoConfig guidance (thresholds sized to the + // block) for the 240-frame case; the feasibility fix under test is + // independent of this tuning. + if (pullBlock >= 240) { + cfg.servo.lockThresholdFrames = static_cast(pullBlock) / 8.0; + cfg.servo.unlockThresholdFrames = static_cast(pullBlock) * 1.5; + } + srt::AsyncSampleRateConverter asrc(cfg); + srt_test::TwoClockSim sim{.asrc = asrc, + .fsIn = kFs * (1.0 + 200e-6), + .fsOut = kFs, + .channels = 1, + .chunkIn = 32, + .chunkOut = pullBlock}; + sim.gen = [](std::uint64_t i) { + return static_cast(0.5 * std::sin(0.13 * static_cast(i))); + }; + // Coarse blocks keep the servo in Track, where instantaneous ppm swings + // with the block-beat FM — average it, as the 48 kHz lock test does. + double ppmSum = 0.0; + std::size_t blocks = 0; + sim.run(20.0, [&](const float*, std::size_t, double t) { + if (t > 10.0) { + ppmSum += asrc.status().ppm; + ++blocks; + } + }); + const auto st = asrc.status(); + EXPECT_EQ(st.state, srt::State::Locked) << "pull=" << pullBlock; + EXPECT_EQ(st.underruns, 0u) << "pull=" << pullBlock; + EXPECT_GT(st.effectiveTargetLatencyFrames, 48u) << "pull=" << pullBlock; + EXPECT_NEAR(ppmSum / static_cast(blocks), 200.0, 25.0) << "pull=" << pullBlock; +} + +TEST(Feasibility, Pull64LocksCleanly) { + runFeasibility(64); +} +TEST(Feasibility, Pull128LocksCleanly) { + runFeasibility(128); +} +TEST(Feasibility, Pull240LocksCleanly) { + runFeasibility(240); +} + +TEST(Feasibility, SmallPullsKeepConfiguredSetpoint) { + srt::Config cfg; + cfg.channels = 1; + srt::AsyncSampleRateConverter asrc(cfg); + srt_test::TwoClockSim sim{ + .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1}; + sim.run(5.0, [](const float*, std::size_t, double) {}); + // 32-frame pulls against the 48-frame default were always feasible; + // the adaptation must not inflate latency for them. + EXPECT_EQ(asrc.status().effectiveTargetLatencyFrames, 48u); +} + +// Audit finding F2: these all constructed successfully and misbehaved +// silently (NaN coefficient tables, image-passing filters, UB-range eps). +TEST(ConfigValidation, RejectsSilentMisbehavior) { + { + srt::Config c; + c.sampleRateHz = std::numeric_limits::quiet_NaN(); + EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument); + } + { + srt::Config c; // anti-image cutoff above input Nyquist + c.filter.passbandHz = 23000.0; + c.filter.stopbandHz = 47000.0; + EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument); + } + { + srt::Config c; // eps * 2^64 would overflow int64 in the phase path + c.servo.maxDeviationPpm = 400000.0; + EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument); + } + { + srt::Config c; + c.servo.quietBandwidthHz = std::numeric_limits::infinity(); + EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument); + } + { + srt::Config c; + c.fifoFrames = 64; // below the high-watermark capacity requirement + EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument); + } + // The rate-scaling factory sits exactly on the band-edge sum boundary + // (passband + stopband == fs up to rounding); it must keep constructing. + EXPECT_NO_THROW(srt::AsyncSampleRateConverter{srt::Config::forSampleRate(16000.0)}); + EXPECT_NO_THROW(srt::AsyncSampleRateConverter{srt::Config::forSampleRate(44100.0)}); +} + +// Audit finding F3: with a setpoint below the resampler's staged-scratch +// size (16 frames), a hard resync used to drain the ring entirely and +// cascade straight back into Filling. +TEST(Resync, SmallSetpointRecovers) { + srt::Config cfg; + cfg.channels = 1; + cfg.targetLatencyFrames = 4; + srt::AsyncSampleRateConverter asrc(cfg); + std::vector in(32, 0.25f); + std::vector out(64); + for (int i = 0; i < 8; ++i) // reach steady operation + asrc.push(in.data(), 32), asrc.pull(out.data(), 32); + for (int i = 0; i < 40; ++i) // consumer stall: drive occupancy over the watermark + asrc.push(in.data(), 32); + std::size_t madeAfter = 0; + for (int i = 0; i < 8; ++i) { + asrc.push(in.data(), 32); + madeAfter += asrc.pull(out.data(), 32); + } + EXPECT_GE(asrc.status().resyncs, 1u); + // The old behavior produced 0 frames here (permanent refill cascade). + EXPECT_GT(madeAfter, 6u * 32u); +} + +TEST(Reset, ConsumerResetRelocks) { + srt::Config cfg; + cfg.channels = 1; + srt::AsyncSampleRateConverter asrc(cfg); + srt_test::TwoClockSim sim{ + .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1}; + sim.run(5.0, [](const float*, std::size_t, double) {}); + ASSERT_EQ(asrc.status().state, srt::State::Locked); + asrc.resetFromConsumer(); + EXPECT_EQ(asrc.status().state, srt::State::Filling); + srt_test::TwoClockSim sim2{ + .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1}; + sim2.run(5.0, [](const float*, std::size_t, double) {}); + EXPECT_EQ(asrc.status().state, srt::State::Locked); +} + +TEST(EdgeCalls, ZeroLengthAndOversized) { + srt::Config cfg; + cfg.channels = 2; + srt::AsyncSampleRateConverter asrc(cfg); + std::vector in(2 * 4096, 0.1f); + std::vector out(2 * 8192); + EXPECT_EQ(asrc.push(in.data(), 0), 0u); + EXPECT_EQ(asrc.pull(out.data(), 0), 0u); + for (int i = 0; i < 64; ++i) + asrc.push(in.data(), 32); + // Oversized pull: bounded behavior — synthesize what the backlog allows, + // silence-pad the rest, count the underrun; every sample finite. + const std::size_t made = asrc.pull(out.data(), 8192); + EXPECT_LE(made, 8192u); + for (float v : out) + ASSERT_TRUE(std::isfinite(v)); +} + +// Fixed-point fade-in: test_fade.cpp covers float only; the Q15 scaleSample +// branch (round-and-saturate) was untested. +TEST(FadeQ15, OutputRampsAfterFill) { + srt::Config cfg; + cfg.channels = 1; + srt::AsyncSampleRateConverterQ15 asrc(cfg); + std::vector in(32, 16384); + std::vector out(32); + std::vector made; + for (int it = 0; it < 400 && made.size() < 200; ++it) { + asrc.push(in.data(), in.size()); + const std::size_t n = asrc.pull(out.data(), out.size()); + for (std::size_t k = 0; k < n; ++k) + made.push_back(out[k]); + } + ASSERT_GE(made.size(), 200u); + EXPECT_LT(std::abs(made[0]), 3300) << "first frame attenuated"; + for (std::size_t k = 1; k < 64; ++k) + EXPECT_GE(made[k] + 1, made[k - 1]) << "monotonic ramp at " << k; + EXPECT_NEAR(made[80], 16384, 200) << "full level after the ramp"; +} + +// Emulation-sized end-to-end gates (these run on the M33/M55 bare-metal +// suites and the Hexagon leg, whose exclusion filters keep out every long +// quality suite — leaving those targets without any on-target SNR check). +TEST(QuickQuality, Q15Tone997) { + srt::Config cfg; + cfg.channels = 1; + srt::AsyncSampleRateConverterQ15 asrc(cfg); + srt_test::TwoClockSimT sim{.asrc = asrc, + .fsIn = kFs * (1.0 + 200e-6), + .fsOut = kFs, + .channels = 1, + .chunkIn = 8, + .chunkOut = 8}; + const double nu = 997.0 / kFs; + sim.gen = [&](std::uint64_t i) { + return srt::detail::roundSat( + 0.5 * 32767.0 * std::sin(2.0 * std::numbers::pi * nu * static_cast(i))); + }; + std::vector tail; + sim.run(4.0, [&](const std::int16_t* x, std::size_t frames, double t) { + if (t >= 3.5) + for (std::size_t n = 0; n < frames; ++n) + tail.push_back(static_cast(x[n]) / 32768.0f); + }); + EXPECT_EQ(asrc.status().underruns, 0u); + const auto fit = srt_test::fitSineTracked(tail, nu * (1.0 + 200e-6)); + // Track-stage run (8-frame blocks, 4 s): block-beat FM dominates the + // tracked-fit residual at ~40+ dB — far below the Quiet-stage Q15 + // figure, far above any gross datapath regression (saturation, + // wrong-phase rows land below 10 dB). Same floor as MultiChannelShort. + EXPECT_GT(srt_test::snrDb(fit), 35.0); +} + +TEST(QuickQuality, FullScaleQ15Short) { + // 1 s near-full-scale variant of FixedPoint.FullScaleSineDoesNotWrapQ15, + // sized for emulation and named so the bare-metal filter keeps it: the + // wide-MAC (SMLALD) target previously never saw near-full-scale input. + srt::Config cfg; + cfg.channels = 1; + srt::AsyncSampleRateConverterQ15 asrc(cfg); + srt_test::TwoClockSimT sim{.asrc = asrc, + .fsIn = kFs * (1.0 + 500e-6), + .fsOut = kFs, + .channels = 1, + .chunkIn = 8, + .chunkOut = 8}; + const double nu = 1000.0 / kFs; + sim.gen = [&](std::uint64_t i) { + return srt::detail::roundSat( + 0.99 * 32767.0 * std::sin(2.0 * std::numbers::pi * nu * static_cast(i))); + }; + std::vector tail; + sim.run(1.0, [&](const std::int16_t* x, std::size_t frames, double t) { + if (t > 0.5) + for (std::size_t n = 0; n < frames; ++n) + tail.push_back(static_cast(x[n]) / 32768.0); + }); + const double omega = 2.0 * std::numbers::pi * nu; + const double bound = 1.5 * 0.99 * omega * omega + 4.0 / 32768.0; + for (std::size_t n = 1; n + 1 < tail.size(); ++n) + ASSERT_LT(std::abs(tail[n + 1] - 2.0 * tail[n] + tail[n - 1]), bound) << "n=" << n; +} + +} // namespace diff --git a/tests/test_multichannel.cpp b/tests/test_multichannel.cpp index 9014d39..76dcf54 100644 --- a/tests/test_multichannel.cpp +++ b/tests/test_multichannel.cpp @@ -152,6 +152,28 @@ TEST(MultiChannel, Independence16chQ15) { // MultiChannel.* runs are excluded): a Track-stage run that still catches // any channel permutation or gross crosstalk on the target's own datapath // — including the wide-MAC dotRow paths (SMLALD on M33-class). +// Channels 5 and 7 are the only counts that reach the channel-parallel +// K=2 and K=1 remainder tiles (8/4/2/1 tiling: 5 = 4+1, 7 = 4+2+1) — the +// audit found those tiles had zero coverage. Float, because float is the +// channel-parallel sample type. +TEST(MultiChannelShort, Independence5chFloat) { + const auto r = measureIndependence(5, 4.0, 0.25, 8); + for (const auto& ch : r) { + EXPECT_NEAR(ch.amplitude, kAmp, 0.05); + EXPECT_GT(ch.snrDb, 35.0); + EXPECT_LT(ch.worstCrosstalkDb, -50.0); + } +} + +TEST(MultiChannelShort, Independence7chFloat) { + const auto r = measureIndependence(7, 4.0, 0.25, 8); + for (const auto& ch : r) { + EXPECT_NEAR(ch.amplitude, kAmp, 0.05); + EXPECT_GT(ch.snrDb, 35.0); + EXPECT_LT(ch.worstCrosstalkDb, -50.0); + } +} + TEST(MultiChannelShort, Independence12chQ15) { const auto r = measureIndependence(12, 4.0, 0.25, 8); for (const auto& ch : r) { diff --git a/tools/capi/srt_capi.cpp b/tools/capi/srt_capi.cpp index c5db95a..0858bb9 100644 --- a/tools/capi/srt_capi.cpp +++ b/tools/capi/srt_capi.cpp @@ -1,12 +1,14 @@ /// \file srt_capi.cpp /// \brief C ABI shim over the float converter, for FFI consumers (ctypes, -/// cffi, Julia, ...). Build with SRT_BUILD_CAPI=ON; see +/// cffi, Julia, ...). Build with SRT_BUILD_CAPI=ON; srt_capi.h is the +/// contract (thread affinity, error convention); see /// notebooks/asrc_demo.ipynb for a worked client. /// /// The shim is intentionally minimal: an opaque handle, the push/pull hot /// path, telemetry, and designed latency. Errors surface as null handles or -/// zero return values; the hot-path functions keep the library's noexcept -/// guarantee. +/// zero return values, and every entry point tolerates a null handle — the +/// documented error convention ("check srt_create for NULL") otherwise +/// invites a crash on exactly the path where the caller forgot to check. #include #include #include @@ -14,8 +16,23 @@ #include "srt/srt.hpp" extern "C" { - struct SrtHandle; // opaque +} + +namespace { +srt::AsyncSampleRateConverter* impl(SrtHandle* h) noexcept { + return reinterpret_cast(h); +} +const srt::AsyncSampleRateConverter* impl(const SrtHandle* h) noexcept { + return reinterpret_cast(h); +} +} // namespace + +extern "C" { + +unsigned srt_version(void) noexcept { + return SRT_VERSION_MAJOR * 10000u + SRT_VERSION_MINOR * 100u + SRT_VERSION_PATCH; +} /// preset: 0 = fast, 1 = balanced, 2 = transparent. SrtHandle* srt_create(double sampleRateHz, std::size_t channels, std::size_t targetLatencyFrames, @@ -36,21 +53,26 @@ SrtHandle* srt_create(double sampleRateHz, std::size_t channels, std::size_t tar } void srt_destroy(SrtHandle* h) noexcept { - delete reinterpret_cast(h); + delete impl(h); } std::size_t srt_push(SrtHandle* h, const float* interleaved, std::size_t frames) noexcept { - return reinterpret_cast(h)->push(interleaved, frames); + return h ? impl(h)->push(interleaved, frames) : 0; } std::size_t srt_pull(SrtHandle* h, float* interleaved, std::size_t frames) noexcept { - return reinterpret_cast(h)->pull(interleaved, frames); + return h ? impl(h)->pull(interleaved, frames) : 0; } /// out[0]=state (0 Filling, 1 Acquiring, 2 Locked), out[1]=ppm, /// out[2]=fifoFillFrames, out[3]=underruns, out[4]=overruns, out[5]=resyncs. void srt_status(const SrtHandle* h, double out[6]) noexcept { - const srt::Status s = reinterpret_cast(h)->status(); + if (!h) { + for (int i = 0; i < 6; ++i) + out[i] = 0.0; + return; + } + const srt::Status s = impl(h)->status(); out[0] = static_cast(static_cast(s.state)); out[1] = s.ppm; out[2] = s.fifoFillFrames; @@ -60,11 +82,12 @@ void srt_status(const SrtHandle* h, double out[6]) noexcept { } double srt_designed_latency_seconds(const SrtHandle* h) noexcept { - return reinterpret_cast(h)->designedLatencySeconds(); + return h ? impl(h)->designedLatencySeconds() : 0.0; } void srt_reset_from_consumer(SrtHandle* h) noexcept { - reinterpret_cast(h)->resetFromConsumer(); + if (h) + impl(h)->resetFromConsumer(); } } // extern "C" diff --git a/tools/capi/srt_capi.h b/tools/capi/srt_capi.h new file mode 100644 index 0000000..b8b3195 --- /dev/null +++ b/tools/capi/srt_capi.h @@ -0,0 +1,63 @@ +/* SampleRateTap C ABI — FFI surface over the float converter. + * + * Build the shared library with -DSRT_BUILD_CAPI=ON. This header is the + * contract for C/cffi/Julia consumers (the ctypes notebooks re-declare the + * same prototypes); it must stay in sync with srt_capi.cpp. + * + * Thread contract (identical to the C++ API): one producer thread calls + * srt_push at the input clock, one consumer thread calls srt_pull at the + * output clock; srt_status may be called from any thread; + * srt_reset_from_consumer only from the consumer thread; srt_create / + * srt_destroy from any single thread, never concurrently with push/pull. + * + * Errors: srt_create returns NULL on invalid configuration or allocation + * failure. Every function tolerates a NULL handle (no-op / zero return), + * so an unchecked failed create degrades to silence, not a crash. + * + * size_t in these signatures follows the platform ABI (32-bit on 32-bit + * targets) — declare foreign types accordingly. + */ +#ifndef SRT_CAPI_H +#define SRT_CAPI_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct SrtHandle SrtHandle; + +/* ABI/version probe: returns SRT_VERSION_MAJOR*10000 + + * SRT_VERSION_MINOR*100 + SRT_VERSION_PATCH (e.g. 100 for 0.1.0). */ +unsigned srt_version(void); + +/* preset: 0 = fast, 1 = balanced, 2 = transparent. + * targetLatencyFrames = 0 selects the library default (48). */ +SrtHandle* srt_create(double sampleRateHz, size_t channels, size_t targetLatencyFrames, int preset); + +void srt_destroy(SrtHandle* h); + +/* Producer thread. Returns frames accepted (< frames on FIFO-full). */ +size_t srt_push(SrtHandle* h, const float* interleaved, size_t frames); + +/* Consumer thread. Always fills `frames` output frames (silence while + * filling / on underrun); returns frames synthesized from real input. */ +size_t srt_pull(SrtHandle* h, float* interleaved, size_t frames); + +/* out[0]=state (0 Filling, 1 Acquiring, 2 Locked), out[1]=ppm, + * out[2]=fifoFillFrames, out[3]=underruns, out[4]=overruns, + * out[5]=resyncs. */ +void srt_status(const SrtHandle* h, double out[6]); + +double srt_designed_latency_seconds(const SrtHandle* h); + +/* Consumer thread: discard all buffered input, forget the ppm estimate, + * return to Filling. */ +void srt_reset_from_consumer(SrtHandle* h); + +#ifdef __cplusplus +} +#endif + +#endif /* SRT_CAPI_H */