diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 64f50b9..c350404 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -167,7 +167,7 @@ jobs:
       - name: Test under emulation
         run: >
           ctest --test-dir build --output-on-failure
-          -E 'AsrcQuality|AsrcLock|TwoThreadStress|TransparentPrototypeMeetsSpec|MultiChannel\.'
+          -E 'AsrcQuality|AsrcLock|TwoThreadStress|TransparentPrototypeMeetsSpec|MultiChannel\.|Feasibility|Reset\.'
 
   # Cross-compile for Arm Cortex-M55 (bare metal, newlib + semihosting) and
   # run the emulation-sized test subset on QEMU's MPS3 AN547 board model.
diff --git a/README.md b/README.md
index 6596d53..917d14c 100644
--- a/README.md
+++ b/README.md
@@ -143,8 +143,18 @@ latency = targetLatencyFrames + (L·T − 1)/(2L)      [input frames]
 `designedLatencySeconds()` reports the figure; the FIFO term breathes by a
 fraction of the block size as the servo tracks drift. The filter is linear
 phase. For lower latency use `FilterSpec::fast()` (~16-frame group delay)
-and a smaller `targetLatencyFrames`; the FIFO setpoint must stay above the
-peak occupancy excursion of your push/pull block jitter.
+and a smaller `targetLatencyFrames`.
+
+**The setpoint must exceed the pull block size** — a pull synthesizes from
+frames already buffered, so a setpoint at or below the callback size is
+infeasible and would drain into a permanent dropout cycle. The converter
+enforces this automatically: when it observes pull blocks larger than the
+configured setpoint it raises the effective setpoint (block + ~half-block
+margin, bounded by FIFO capacity) and reports the value in
+`Status::effectiveTargetLatencyFrames`; latency follows the raised
+setpoint. Callbacks above ~340 frames also need `fifoFrames` sized
+explicitly. The setpoint must additionally stay above the peak occupancy
+excursion of your push/pull jitter, as before.
 
 ## Measured performance
 
diff --git a/include/srt/asrc.hpp b/include/srt/asrc.hpp
index 57206ca..a26291d 100644
--- a/include/srt/asrc.hpp
+++ b/include/srt/asrc.hpp
@@ -3,9 +3,12 @@
 #ifndef SRT_ASRC_HPP
 #define SRT_ASRC_HPP
 
+#include <algorithm>
 #include <atomic>
 #include <bit>
+#include <cmath>
 #include <cstdint>
+#include <limits>
 #include <stdexcept>
 #include <type_traits>
 
@@ -16,9 +19,9 @@
 
 namespace srt {
 
-/// Converter configuration. The defaults realize the whitepaper's worked
-/// budget: ~1 ms core latency (FIFO setpoint 48 frames + ~24 frames filter
-/// group delay) at 48 kHz, transparent for clocks within +/-1000 ppm.
+/// Converter configuration. The defaults give ~1.5 ms designed latency at
+/// 48 kHz (FIFO setpoint 48 frames + ~24 frames filter group delay; see
+/// the README latency section), transparent for clocks within +/-1000 ppm.
 struct Config {
     double sampleRateHz = 48000.0; ///< nominal rate of BOTH clock domains
     std::size_t channels = 2;
@@ -72,6 +75,11 @@ struct Status {
     std::uint64_t overruns = 0;  ///< push() calls that could not accept every
                                  ///< offered frame (FIFO full; excess dropped)
     std::uint64_t resyncs = 0;   ///< hard occupancy resyncs (high watermark)
+    /// The setpoint actually in force. Starts at Config::targetLatencyFrames
+    /// and is raised automatically when pull() blocks larger than the
+    /// setpoint are observed (see pull()); differs from the configured value
+    /// exactly when that adaptation has occurred.
+    std::uint64_t effectiveTargetLatencyFrames = 0;
 };
 
 /// Near-unity asynchronous sample rate converter between two clock domains.
@@ -93,11 +101,22 @@ class BasicAsyncSampleRateConverter {
           resampler_(bank_, cfg_.channels, kPopChunkFrames),
           ring_(ringCapacityElems(cfg_, bank_.taps())),
           servo_(cfg_.servo, cfg_.sampleRateHz, static_cast<double>(cfg_.targetLatencyFrames)),
+          targetFrames_(cfg_.targetLatencyFrames),
           fillThresholdFrames_(cfg_.targetLatencyFrames + bank_.taps()),
           highWaterFrames_(std::max(3 * cfg_.targetLatencyFrames,
                                     fillThresholdFrames_ + cfg_.targetLatencyFrames)) {
         if (ring_.capacity() / cfg_.channels <= highWaterFrames_)
             throw std::invalid_argument("AsyncSampleRateConverter: fifoFrames too small");
+        // Largest setpoint the FIFO capacity supports while keeping the
+        // high-watermark relation; bounds the adaptive raise in pull().
+        const std::size_t capFrames = ring_.capacity() / cfg_.channels;
+        const std::size_t taps = bank_.taps();
+        maxTargetFrames_ = std::max(cfg_.targetLatencyFrames,
+                                    std::min((capFrames - 1) / 3, capFrames > taps + 1
+                                                                      ? (capFrames - taps - 1) / 2
+                                                                      : cfg_.targetLatencyFrames));
+        effectiveTarget_.store(static_cast<std::uint32_t>(targetFrames_),
+                               std::memory_order_relaxed);
     }
 
     BasicAsyncSampleRateConverter(const BasicAsyncSampleRateConverter&) = delete;
@@ -117,14 +136,43 @@ class BasicAsyncSampleRateConverter {
     /// Consumer thread: produce exactly `frames` interleaved output frames at
     /// the output clock. Silence-pads while filling and on underrun, and
     /// fades the first kFadeFrames frames in after every (re)fill so dropout
-    /// recovery does not click. Returns the number of frames synthesized
-    /// from real input.
+    /// recovery does not click. (The dropout onset itself and a hard-resync
+    /// splice are unfaded cuts: there is nothing valid to fade to at the
+    /// moment they occur.) Returns the number of frames synthesized from
+    /// real input.
     std::size_t pull(S* interleaved, std::size_t frames) noexcept {
         const std::size_t ch = cfg_.channels;
         const auto popFn = [this](S* dst, std::size_t maxFrames) noexcept {
             return ring_.read(dst, maxFrames * cfg_.channels) / cfg_.channels;
         };
 
+        // Feasibility: a pull must synthesize from frames already buffered,
+        // so the occupancy setpoint must exceed the pull block size or the
+        // loop drains into a permanent underrun limit cycle (dropouts every
+        // few hundred ms, never locking). Raise the effective setpoint to
+        // the largest observed block plus slew/sawtooth margin, bounded by
+        // FIFO capacity; the servo slews to the new setpoint glitch-free
+        // (integrator kept, occupancy only grows). Cost: latency follows
+        // the raised setpoint — see Status::effectiveTargetLatencyFrames.
+        if (frames > observedMaxPull_) {
+            observedMaxPull_ = frames;
+            // Margin sized to the block-beat sawtooth (~half the block) so
+            // the entry occupancy never grazes the pull size; configs that
+            // already satisfy it (e.g. the 32-frame default transfer against
+            // the 48-frame default setpoint) are left exactly as configured.
+            const std::size_t needed = frames + std::max<std::size_t>(frames / 2, kPopChunkFrames);
+            const std::size_t newTarget =
+                std::clamp(needed, cfg_.targetLatencyFrames, maxTargetFrames_);
+            if (newTarget > targetFrames_) {
+                targetFrames_ = newTarget;
+                fillThresholdFrames_ = newTarget + bank_.taps();
+                highWaterFrames_ = std::max(3 * newTarget, fillThresholdFrames_ + newTarget);
+                servo_.setTarget(static_cast<double>(newTarget));
+                effectiveTarget_.store(static_cast<std::uint32_t>(newTarget),
+                                       std::memory_order_relaxed);
+            }
+        }
+
         double occ = backlogFrames();
 
         if (filling_) {
@@ -143,8 +191,15 @@ class BasicAsyncSampleRateConverter {
         }
 
         if (occ > static_cast<double>(highWaterFrames_)) { // hard resync
-            const double target = static_cast<double>(cfg_.targetLatencyFrames);
-            const auto dropFrames = static_cast<std::size_t>(occ - target);
+            const double target = static_cast<double>(targetFrames_);
+            // The discard can only come from the ring; frames staged in the
+            // resampler scratch are part of occ but not discardable. Clamp,
+            // or a setpoint below the staged count drains the ring entirely
+            // and cascades straight back into Filling.
+            const std::size_t ringFrames = ring_.readAvailable() / ch;
+            const double excess = occ - target;
+            const std::size_t dropFrames =
+                std::min(ringFrames, excess > 0.0 ? static_cast<std::size_t>(excess) : 0);
             ring_.discard(dropFrames * ch);
             resyncs_.fetch_add(1, std::memory_order_relaxed);
             occ = backlogFrames();
@@ -178,6 +233,7 @@ class BasicAsyncSampleRateConverter {
         s.underruns = underruns_.load(std::memory_order_relaxed);
         s.overruns = overruns_.load(std::memory_order_relaxed);
         s.resyncs = resyncs_.load(std::memory_order_relaxed);
+        s.effectiveTargetLatencyFrames = effectiveTarget_.load(std::memory_order_relaxed);
         return s;
     }
 
@@ -191,10 +247,12 @@ class BasicAsyncSampleRateConverter {
         publishStatus();
     }
 
-    /// Nominal design latency: FIFO setpoint + filter group delay. The actual
-    /// figure breathes by a fraction of a frame as the servo tracks drift.
+    /// Nominal design latency: FIFO setpoint + filter group delay. Uses the
+    /// effective (possibly adaptively raised) setpoint; the actual figure
+    /// breathes by a fraction of a frame as the servo tracks drift.
     double designedLatencySeconds() const noexcept {
-        return (static_cast<double>(cfg_.targetLatencyFrames) + bank_.groupDelaySamples()) /
+        return (static_cast<double>(effectiveTarget_.load(std::memory_order_relaxed)) +
+                bank_.groupDelaySamples()) /
                cfg_.sampleRateHz;
     }
 
@@ -205,8 +263,12 @@ class BasicAsyncSampleRateConverter {
 
     static std::size_t ringCapacityElems(const Config& cfg, std::size_t taps) {
         const std::size_t fillThreshold = cfg.targetLatencyFrames + taps;
+        // The 1024-frame floor (21 ms at 48 kHz) leaves the adaptive
+        // setpoint raise enough capacity for pull blocks up to ~340 frames
+        // without explicit fifoFrames sizing; larger callbacks need
+        // fifoFrames set by the caller (the raise clamps to capacity).
         const std::size_t frames =
-            cfg.fifoFrames != 0 ? cfg.fifoFrames : std::max<std::size_t>(256, 4 * fillThreshold);
+            cfg.fifoFrames != 0 ? cfg.fifoFrames : std::max<std::size_t>(1024, 4 * fillThreshold);
         return std::bit_ceil(frames * cfg.channels);
     }
 
@@ -254,9 +316,40 @@ class BasicAsyncSampleRateConverter {
         fill_.store(static_cast<float>(servo_.smoothedOccupancy()), std::memory_order_relaxed);
     }
 
+    /// Rejects configurations that would otherwise construct successfully
+    /// and misbehave silently: NaN/Inf anywhere (a NaN sample rate designs
+    /// an all-NaN coefficient table), band edges whose sum exceeds the rate
+    /// (anti-image cutoff above input Nyquist passes images wholesale), a
+    /// deviation clamp large enough to overflow the Q0.64 eps conversion
+    /// (UB), and size products that overflow 32-bit size_t targets.
     static Config validated(Config cfg) {
-        if (cfg.channels == 0 || cfg.sampleRateHz <= 0.0 || cfg.targetLatencyFrames == 0)
+        const auto finite = [](double v) { return std::isfinite(v); };
+        if (cfg.channels == 0 || cfg.targetLatencyFrames == 0 || !finite(cfg.sampleRateHz) ||
+            cfg.sampleRateHz <= 0.0)
             throw std::invalid_argument("AsyncSampleRateConverter: bad Config");
+        const FilterSpec& f = cfg.filter;
+        if (!finite(f.passbandHz) || !finite(f.stopbandHz) || !finite(f.stopbandAttenDb) ||
+            f.passbandHz + f.stopbandHz > cfg.sampleRateHz)
+            throw std::invalid_argument("AsyncSampleRateConverter: bad FilterSpec "
+                                        "(need passbandHz + stopbandHz <= sampleRateHz)");
+        const ServoConfig& sv = cfg.servo;
+        if (!finite(sv.acquireBandwidthHz) || !finite(sv.trackBandwidthHz) ||
+            !finite(sv.quietBandwidthHz) || !finite(sv.damping) || !finite(sv.acquireSmootherHz) ||
+            !finite(sv.trackSmootherHz) || !finite(sv.quietSmootherHz) ||
+            !finite(sv.lockThresholdFrames) || !finite(sv.lockHoldSeconds) ||
+            !finite(sv.quietHoldSeconds) || !finite(sv.unlockThresholdFrames) ||
+            !finite(sv.maxDeviationPpm) || sv.maxDeviationPpm <= 0.0 ||
+            sv.maxDeviationPpm > 100000.0) // |eps| stays far from the Q0.64 int64 limit
+            throw std::invalid_argument("AsyncSampleRateConverter: bad ServoConfig");
+        // Size products evaluated later must not wrap on 32-bit size_t.
+        const auto mulOk = [](std::size_t a, std::size_t b) {
+            return b == 0 || a <= std::numeric_limits<std::size_t>::max() / b;
+        };
+        const std::size_t phases = std::bit_ceil(f.numPhases);
+        if (!mulOk(phases + 1, f.tapsPerPhase) ||
+            !mulOk(cfg.targetLatencyFrames + f.tapsPerPhase, 8 * cfg.channels) ||
+            !mulOk(cfg.fifoFrames, 2 * cfg.channels))
+            throw std::invalid_argument("AsyncSampleRateConverter: Config sizes overflow");
         return cfg;
     }
 
@@ -267,8 +360,12 @@ class BasicAsyncSampleRateConverter {
     FractionalResampler<S> resampler_;
     SpscRing<S> ring_;
     PiServo servo_;
+    // Consumer-thread setpoint state (see the adaptive raise in pull()).
+    std::size_t targetFrames_;
     std::size_t fillThresholdFrames_;
     std::size_t highWaterFrames_;
+    std::size_t maxTargetFrames_ = 0;
+    std::size_t observedMaxPull_ = 0;
     bool filling_ = true;            // consumer-thread state; mirrored into state_
     std::size_t fadeFramesLeft_ = 0; // consumer-thread state
 
@@ -279,6 +376,9 @@ class BasicAsyncSampleRateConverter {
     std::atomic<int> state_{static_cast<int>(State::Filling)};
     std::atomic<float> ppm_{0.0f};
     std::atomic<float> fill_{0.0f};
+    // Effective setpoint mirror for status()/designedLatencySeconds() from
+    // any thread; written only by the consumer (32-bit: lock-free everywhere).
+    std::atomic<std::uint32_t> effectiveTarget_{0};
     std::atomic<std::uint32_t> underruns_{0};
     std::atomic<std::uint32_t> overruns_{0};
     std::atomic<std::uint32_t> resyncs_{0};
diff --git a/include/srt/detail/kaiser.hpp b/include/srt/detail/kaiser.hpp
index 5c551c2..e9ac8a3 100644
--- a/include/srt/detail/kaiser.hpp
+++ b/include/srt/detail/kaiser.hpp
@@ -52,8 +52,12 @@ inline double kaiserBeta(double attenDb) noexcept {
 ///                       (e.g. 8 kHz transition at 48 kHz -> 8000/48000)
 /// \return estimated taps per polyphase phase: N = (A - 8) / (2.285 * 2*pi * df)
 inline std::size_t estimateTaps(double attenDb, double transWidthNorm) noexcept {
+    // Clamp pathological inputs (attenDb < 8, non-positive width): the raw
+    // formula goes negative/infinite there and casting that to size_t is UB.
+    if (!(transWidthNorm > 0.0))
+        return 4;
     const double n = (attenDb - 8.0) / (2.285 * 2.0 * std::numbers::pi * transWidthNorm);
-    return static_cast<std::size_t>(std::ceil(n));
+    return n > 4.0 ? static_cast<std::size_t>(std::ceil(n)) : 4;
 }
 
 /// sin(pi x)/(pi x) with the removable singularity handled.
diff --git a/include/srt/pi_servo.hpp b/include/srt/pi_servo.hpp
index 0c738f6..a9ce41d 100644
--- a/include/srt/pi_servo.hpp
+++ b/include/srt/pi_servo.hpp
@@ -119,6 +119,12 @@ class PiServo {
     /// step.
     void seed(double occPlusMu) noexcept { lpFast_ = q1_ = q2_ = q3_ = occPlusMu; }
 
+    /// Move the occupancy setpoint. The integrator (ppm estimate) is kept and
+    /// the smoothers are left tracking the real observable, so the loop slews
+    /// to the new setpoint at its clamped rate with no transient discontinuity
+    /// — used by the converter's adaptive pull-block setpoint raise.
+    void setTarget(double targetFrames) noexcept { target_ = targetFrames; }
+
     /// One control update; call once per pull() before synthesis.
     /// \param occFrames raw backlog in frames (FIFO + staged frames)
     /// \param mu        current fractional read position; occ + mu changes
diff --git a/include/srt/polyphase_filter.hpp b/include/srt/polyphase_filter.hpp
index aa9f5ec..345604a 100644
--- a/include/srt/polyphase_filter.hpp
+++ b/include/srt/polyphase_filter.hpp
@@ -331,8 +331,9 @@ inline void dotRowsFrameMajor(const typename SampleTraits<S>::Coeff* SRT_RESTRIC
 
 /// Streaming fractional-delay engine for one converter instance.
 ///
-/// Owns the per-channel history delay lines (planar, contiguous windows with
-/// periodic compaction) and the phase accumulator mu. Input frames are pulled
+/// Owns the history delay lines (planar per-channel below the
+/// channel-parallel threshold, frame-major above it — see the hist_
+/// field) and the phase accumulator mu. Input frames are pulled
 /// through a caller-supplied PopFn in small bulk chunks and deinterleaved into
 /// the histories as the integer read position advances.
 ///
@@ -404,6 +405,11 @@ class FractionalResampler {
     /// the number produced; fewer than maxFrames means the source ran dry
     /// (underrun). RT-safe: no allocation, locks or exceptions.
     ///
+    /// Preconditions (the converter upholds both; direct users must too):
+    /// a successful prime() before the first process() — the window math
+    /// underflows otherwise — and reset()+reprime after any dry return, as
+    /// a dry advance==2 slip leaves history and phase one frame apart.
+    ///
     /// PopFn: std::size_t popFrames(S* dst, std::size_t maxFrames) — bulk-pops
     /// interleaved frames, returning the count actually delivered.
     template <typename PopFn>
diff --git a/include/srt/sample_traits.hpp b/include/srt/sample_traits.hpp
index 72085b3..b3eb832 100644
--- a/include/srt/sample_traits.hpp
+++ b/include/srt/sample_traits.hpp
@@ -128,10 +128,11 @@ struct SampleTraits<std::int16_t> {
     }
 
     static Coeff blend(Coeff a, Coeff b, BlendFactor fr) noexcept {
-        // Q14 + (Q15 * Q14) >> 15, in int64: the int32 product would fit
-        // today's coefficients (fr <= 32767 by construction), but only with
-        // ~5% margin against a worst-case adjacent-phase delta — not worth
-        // the silent invariant. One smull on 32-bit cores.
+        // Q14 + (Q15 * Q14) >> 15, in int64: the worst-case int32 product
+        // 32767 * 65535 = 2,147,385,345 sits 0.005% under INT32_MAX —
+        // real adjacent-phase deltas are tiny (|diff| <= 41 measured on the
+        // transparent table), but a margin that thin is not an invariant
+        // worth relying on silently. One smull on 32-bit cores.
         const std::int64_t diff = static_cast<std::int64_t>(b) - a;
         return static_cast<Coeff>(a + ((fr * diff) >> 15));
     }
diff --git a/include/srt/spsc_ring.hpp b/include/srt/spsc_ring.hpp
index 577e062..e70b562 100644
--- a/include/srt/spsc_ring.hpp
+++ b/include/srt/spsc_ring.hpp
@@ -32,6 +32,8 @@ namespace srt {
 template <typename T>
 class SpscRing {
     static_assert(std::is_trivially_copyable_v<T>);
+    // The lock-free claim of the whole audio path rests on these indices.
+    static_assert(std::atomic<std::size_t>::is_always_lock_free);
 
 public:
     /// Allocates the buffer; capacity is rounded up to a power of two.
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 999e5f2..defff1a 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -37,6 +37,7 @@ add_executable(srt_tests
     test_asrc_quality.cpp
     test_asrc_quality_16k.cpp
     test_fade.cpp
+    test_hardening.cpp
     test_latency.cpp
     test_multichannel.cpp)
 target_link_libraries(srt_tests PRIVATE
diff --git a/tests/bare_metal_main.cpp b/tests/bare_metal_main.cpp
index fd909db..4dba52e 100644
--- a/tests/bare_metal_main.cpp
+++ b/tests/bare_metal_main.cpp
@@ -18,7 +18,7 @@ int main() {
     ::testing::GTEST_FLAG(filter) = "-AsrcQuality*:AsrcLock.*:Servo.*:Kaiser.*MeetsSpec:"
                                     "FixedPoint.AsrcQuality*:"
                                     "FixedPoint.FullScaleSineDoesNotWrapQ15:"
-                                    "MultiChannel.*";
+                                    "MultiChannel.*:Feasibility.*:Reset.*";
     ::testing::InitGoogleTest();
     const int rc = RUN_ALL_TESTS();
     // CTest's pass criterion: printed only if we get all the way here, so a
diff --git a/tests/test_hardening.cpp b/tests/test_hardening.cpp
new file mode 100644
index 0000000..de94dc0
--- /dev/null
+++ b/tests/test_hardening.cpp
@@ -0,0 +1,267 @@
+// Regression tests from the package audit: the pull-block feasibility
+// adaptation, hardened Config validation, resync accounting, consumer
+// reset, degenerate call sizes, fixed-point fade-in — plus QuickQuality,
+// an emulation-sized end-to-end SNR/saturation gate that (by name) runs
+// on the bare-metal and Hexagon CI legs, which exclude the long quality
+// suites and previously had no end-to-end SNR coverage at all.
+#include <cmath>
+#include <limits>
+#include <numbers>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "srt/asrc.hpp"
+#include "support/sine_analysis.hpp"
+#include "support/two_clock_sim.hpp"
+
+namespace {
+
+constexpr double kFs = 48000.0;
+
+// Audit finding F1: with defaults, any pull block larger than the 48-frame
+// setpoint used to drain into a permanent underrun limit cycle (64-frame
+// callbacks dropped out every ~0.24 s forever). The converter now raises
+// its effective setpoint to the observed block; these runs must lock with
+// zero underruns and report the raise.
+void runFeasibility(std::size_t pullBlock) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    // Lock-stage promotion gates compare smoothed occupancy error against
+    // frame thresholds; with very coarse blocks the block-quantization
+    // sawtooth dwarfs the 1-frame default and Acquire->Track never
+    // promotes. Follow the ServoConfig guidance (thresholds sized to the
+    // block) for the 240-frame case; the feasibility fix under test is
+    // independent of this tuning.
+    if (pullBlock >= 240) {
+        cfg.servo.lockThresholdFrames = static_cast<double>(pullBlock) / 8.0;
+        cfg.servo.unlockThresholdFrames = static_cast<double>(pullBlock) * 1.5;
+    }
+    srt::AsyncSampleRateConverter asrc(cfg);
+    srt_test::TwoClockSim sim{.asrc = asrc,
+                              .fsIn = kFs * (1.0 + 200e-6),
+                              .fsOut = kFs,
+                              .channels = 1,
+                              .chunkIn = 32,
+                              .chunkOut = pullBlock};
+    sim.gen = [](std::uint64_t i) {
+        return static_cast<float>(0.5 * std::sin(0.13 * static_cast<double>(i)));
+    };
+    // Coarse blocks keep the servo in Track, where instantaneous ppm swings
+    // with the block-beat FM — average it, as the 48 kHz lock test does.
+    double ppmSum = 0.0;
+    std::size_t blocks = 0;
+    sim.run(20.0, [&](const float*, std::size_t, double t) {
+        if (t > 10.0) {
+            ppmSum += asrc.status().ppm;
+            ++blocks;
+        }
+    });
+    const auto st = asrc.status();
+    EXPECT_EQ(st.state, srt::State::Locked) << "pull=" << pullBlock;
+    EXPECT_EQ(st.underruns, 0u) << "pull=" << pullBlock;
+    EXPECT_GT(st.effectiveTargetLatencyFrames, 48u) << "pull=" << pullBlock;
+    EXPECT_NEAR(ppmSum / static_cast<double>(blocks), 200.0, 25.0) << "pull=" << pullBlock;
+}
+
+TEST(Feasibility, Pull64LocksCleanly) {
+    runFeasibility(64);
+}
+TEST(Feasibility, Pull128LocksCleanly) {
+    runFeasibility(128);
+}
+TEST(Feasibility, Pull240LocksCleanly) {
+    runFeasibility(240);
+}
+
+TEST(Feasibility, SmallPullsKeepConfiguredSetpoint) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    srt::AsyncSampleRateConverter asrc(cfg);
+    srt_test::TwoClockSim sim{
+        .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1};
+    sim.run(5.0, [](const float*, std::size_t, double) {});
+    // 32-frame pulls against the 48-frame default were always feasible;
+    // the adaptation must not inflate latency for them.
+    EXPECT_EQ(asrc.status().effectiveTargetLatencyFrames, 48u);
+}
+
+// Audit finding F2: these all constructed successfully and misbehaved
+// silently (NaN coefficient tables, image-passing filters, UB-range eps).
+TEST(ConfigValidation, RejectsSilentMisbehavior) {
+    {
+        srt::Config c;
+        c.sampleRateHz = std::numeric_limits<double>::quiet_NaN();
+        EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument);
+    }
+    {
+        srt::Config c; // anti-image cutoff above input Nyquist
+        c.filter.passbandHz = 23000.0;
+        c.filter.stopbandHz = 47000.0;
+        EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument);
+    }
+    {
+        srt::Config c; // eps * 2^64 would overflow int64 in the phase path
+        c.servo.maxDeviationPpm = 400000.0;
+        EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument);
+    }
+    {
+        srt::Config c;
+        c.servo.quietBandwidthHz = std::numeric_limits<double>::infinity();
+        EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument);
+    }
+    {
+        srt::Config c;
+        c.fifoFrames = 64; // below the high-watermark capacity requirement
+        EXPECT_THROW(srt::AsyncSampleRateConverter{c}, std::invalid_argument);
+    }
+    // The rate-scaling factory sits exactly on the band-edge sum boundary
+    // (passband + stopband == fs up to rounding); it must keep constructing.
+    EXPECT_NO_THROW(srt::AsyncSampleRateConverter{srt::Config::forSampleRate(16000.0)});
+    EXPECT_NO_THROW(srt::AsyncSampleRateConverter{srt::Config::forSampleRate(44100.0)});
+}
+
+// Audit finding F3: with a setpoint below the resampler's staged-scratch
+// size (16 frames), a hard resync used to drain the ring entirely and
+// cascade straight back into Filling.
+TEST(Resync, SmallSetpointRecovers) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    cfg.targetLatencyFrames = 4;
+    srt::AsyncSampleRateConverter asrc(cfg);
+    std::vector<float> in(32, 0.25f);
+    std::vector<float> out(64);
+    for (int i = 0; i < 8; ++i) // reach steady operation
+        asrc.push(in.data(), 32), asrc.pull(out.data(), 32);
+    for (int i = 0; i < 40; ++i) // consumer stall: drive occupancy over the watermark
+        asrc.push(in.data(), 32);
+    std::size_t madeAfter = 0;
+    for (int i = 0; i < 8; ++i) {
+        asrc.push(in.data(), 32);
+        madeAfter += asrc.pull(out.data(), 32);
+    }
+    EXPECT_GE(asrc.status().resyncs, 1u);
+    // The old behavior produced 0 frames here (permanent refill cascade).
+    EXPECT_GT(madeAfter, 6u * 32u);
+}
+
+TEST(Reset, ConsumerResetRelocks) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    srt::AsyncSampleRateConverter asrc(cfg);
+    srt_test::TwoClockSim sim{
+        .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1};
+    sim.run(5.0, [](const float*, std::size_t, double) {});
+    ASSERT_EQ(asrc.status().state, srt::State::Locked);
+    asrc.resetFromConsumer();
+    EXPECT_EQ(asrc.status().state, srt::State::Filling);
+    srt_test::TwoClockSim sim2{
+        .asrc = asrc, .fsIn = kFs * (1.0 + 200e-6), .fsOut = kFs, .channels = 1};
+    sim2.run(5.0, [](const float*, std::size_t, double) {});
+    EXPECT_EQ(asrc.status().state, srt::State::Locked);
+}
+
+TEST(EdgeCalls, ZeroLengthAndOversized) {
+    srt::Config cfg;
+    cfg.channels = 2;
+    srt::AsyncSampleRateConverter asrc(cfg);
+    std::vector<float> in(2 * 4096, 0.1f);
+    std::vector<float> out(2 * 8192);
+    EXPECT_EQ(asrc.push(in.data(), 0), 0u);
+    EXPECT_EQ(asrc.pull(out.data(), 0), 0u);
+    for (int i = 0; i < 64; ++i)
+        asrc.push(in.data(), 32);
+    // Oversized pull: bounded behavior — synthesize what the backlog allows,
+    // silence-pad the rest, count the underrun; every sample finite.
+    const std::size_t made = asrc.pull(out.data(), 8192);
+    EXPECT_LE(made, 8192u);
+    for (float v : out)
+        ASSERT_TRUE(std::isfinite(v));
+}
+
+// Fixed-point fade-in: test_fade.cpp covers float only; the Q15 scaleSample
+// branch (round-and-saturate) was untested.
+TEST(FadeQ15, OutputRampsAfterFill) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    srt::AsyncSampleRateConverterQ15 asrc(cfg);
+    std::vector<std::int16_t> in(32, 16384);
+    std::vector<std::int16_t> out(32);
+    std::vector<std::int16_t> made;
+    for (int it = 0; it < 400 && made.size() < 200; ++it) {
+        asrc.push(in.data(), in.size());
+        const std::size_t n = asrc.pull(out.data(), out.size());
+        for (std::size_t k = 0; k < n; ++k)
+            made.push_back(out[k]);
+    }
+    ASSERT_GE(made.size(), 200u);
+    EXPECT_LT(std::abs(made[0]), 3300) << "first frame attenuated";
+    for (std::size_t k = 1; k < 64; ++k)
+        EXPECT_GE(made[k] + 1, made[k - 1]) << "monotonic ramp at " << k;
+    EXPECT_NEAR(made[80], 16384, 200) << "full level after the ramp";
+}
+
+// Emulation-sized end-to-end gates (these run on the M33/M55 bare-metal
+// suites and the Hexagon leg, whose exclusion filters keep out every long
+// quality suite — leaving those targets without any on-target SNR check).
+TEST(QuickQuality, Q15Tone997) {
+    srt::Config cfg;
+    cfg.channels = 1;
+    srt::AsyncSampleRateConverterQ15 asrc(cfg);
+    srt_test::TwoClockSimT<std::int16_t> sim{.asrc = asrc,
+                                             .fsIn = kFs * (1.0 + 200e-6),
+                                             .fsOut = kFs,
+                                             .channels = 1,
+                                             .chunkIn = 8,
+                                             .chunkOut = 8};
+    const double nu = 997.0 / kFs;
+    sim.gen = [&](std::uint64_t i) {
+        return srt::detail::roundSat<std::int16_t>(
+            0.5 * 32767.0 * std::sin(2.0 * std::numbers::pi * nu * static_cast<double>(i)));
+    };
+    std::vector<float> tail;
+    sim.run(4.0, [&](const std::int16_t* x, std::size_t frames, double t) {
+        if (t >= 3.5)
+            for (std::size_t n = 0; n < frames; ++n)
+                tail.push_back(static_cast<float>(x[n]) / 32768.0f);
+    });
+    EXPECT_EQ(asrc.status().underruns, 0u);
+    const auto fit = srt_test::fitSineTracked(tail, nu * (1.0 + 200e-6));
+    // Track-stage run (8-frame blocks, 4 s): block-beat FM dominates the
+    // tracked-fit residual at ~40+ dB — far below the Quiet-stage Q15
+    // figure, far above any gross datapath regression (saturation,
+    // wrong-phase rows land below 10 dB). Same floor as MultiChannelShort.
+    EXPECT_GT(srt_test::snrDb(fit), 35.0);
+}
+
+TEST(QuickQuality, FullScaleQ15Short) {
+    // 1 s near-full-scale variant of FixedPoint.FullScaleSineDoesNotWrapQ15,
+    // sized for emulation and named so the bare-metal filter keeps it: the
+    // wide-MAC (SMLALD) target previously never saw near-full-scale input.
+    srt::Config cfg;
+    cfg.channels = 1;
+    srt::AsyncSampleRateConverterQ15 asrc(cfg);
+    srt_test::TwoClockSimT<std::int16_t> sim{.asrc = asrc,
+                                             .fsIn = kFs * (1.0 + 500e-6),
+                                             .fsOut = kFs,
+                                             .channels = 1,
+                                             .chunkIn = 8,
+                                             .chunkOut = 8};
+    const double nu = 1000.0 / kFs;
+    sim.gen = [&](std::uint64_t i) {
+        return srt::detail::roundSat<std::int16_t>(
+            0.99 * 32767.0 * std::sin(2.0 * std::numbers::pi * nu * static_cast<double>(i)));
+    };
+    std::vector<double> tail;
+    sim.run(1.0, [&](const std::int16_t* x, std::size_t frames, double t) {
+        if (t > 0.5)
+            for (std::size_t n = 0; n < frames; ++n)
+                tail.push_back(static_cast<double>(x[n]) / 32768.0);
+    });
+    const double omega = 2.0 * std::numbers::pi * nu;
+    const double bound = 1.5 * 0.99 * omega * omega + 4.0 / 32768.0;
+    for (std::size_t n = 1; n + 1 < tail.size(); ++n)
+        ASSERT_LT(std::abs(tail[n + 1] - 2.0 * tail[n] + tail[n - 1]), bound) << "n=" << n;
+}
+
+} // namespace
diff --git a/tests/test_multichannel.cpp b/tests/test_multichannel.cpp
index 9014d39..76dcf54 100644
--- a/tests/test_multichannel.cpp
+++ b/tests/test_multichannel.cpp
@@ -152,6 +152,28 @@ TEST(MultiChannel, Independence16chQ15) {
 // MultiChannel.* runs are excluded): a Track-stage run that still catches
 // any channel permutation or gross crosstalk on the target's own datapath
 // — including the wide-MAC dotRow paths (SMLALD on M33-class).
+// Channels 5 and 7 are the only counts that reach the channel-parallel
+// K=2 and K=1 remainder tiles (8/4/2/1 tiling: 5 = 4+1, 7 = 4+2+1) — the
+// audit found those tiles had zero coverage. Float, because float is the
+// channel-parallel sample type.
+TEST(MultiChannelShort, Independence5chFloat) {
+    const auto r = measureIndependence<float>(5, 4.0, 0.25, 8);
+    for (const auto& ch : r) {
+        EXPECT_NEAR(ch.amplitude, kAmp, 0.05);
+        EXPECT_GT(ch.snrDb, 35.0);
+        EXPECT_LT(ch.worstCrosstalkDb, -50.0);
+    }
+}
+
+TEST(MultiChannelShort, Independence7chFloat) {
+    const auto r = measureIndependence<float>(7, 4.0, 0.25, 8);
+    for (const auto& ch : r) {
+        EXPECT_NEAR(ch.amplitude, kAmp, 0.05);
+        EXPECT_GT(ch.snrDb, 35.0);
+        EXPECT_LT(ch.worstCrosstalkDb, -50.0);
+    }
+}
+
 TEST(MultiChannelShort, Independence12chQ15) {
     const auto r = measureIndependence<std::int16_t>(12, 4.0, 0.25, 8);
     for (const auto& ch : r) {
diff --git a/tools/capi/srt_capi.cpp b/tools/capi/srt_capi.cpp
index c5db95a..0858bb9 100644
--- a/tools/capi/srt_capi.cpp
+++ b/tools/capi/srt_capi.cpp
@@ -1,12 +1,14 @@
 /// \file srt_capi.cpp
 /// \brief C ABI shim over the float converter, for FFI consumers (ctypes,
-/// cffi, Julia, ...). Build with SRT_BUILD_CAPI=ON; see
+/// cffi, Julia, ...). Build with SRT_BUILD_CAPI=ON; srt_capi.h is the
+/// contract (thread affinity, error convention); see
 /// notebooks/asrc_demo.ipynb for a worked client.
 ///
 /// The shim is intentionally minimal: an opaque handle, the push/pull hot
 /// path, telemetry, and designed latency. Errors surface as null handles or
-/// zero return values; the hot-path functions keep the library's noexcept
-/// guarantee.
+/// zero return values, and every entry point tolerates a null handle — the
+/// documented error convention ("check srt_create for NULL") otherwise
+/// invites a crash on exactly the path where the caller forgot to check.
 #include <cstddef>
 #include <cstdint>
 #include <new>
@@ -14,8 +16,23 @@
 #include "srt/srt.hpp"
 
 extern "C" {
-
 struct SrtHandle; // opaque
+}
+
+namespace {
+srt::AsyncSampleRateConverter* impl(SrtHandle* h) noexcept {
+    return reinterpret_cast<srt::AsyncSampleRateConverter*>(h);
+}
+const srt::AsyncSampleRateConverter* impl(const SrtHandle* h) noexcept {
+    return reinterpret_cast<const srt::AsyncSampleRateConverter*>(h);
+}
+} // namespace
+
+extern "C" {
+
+unsigned srt_version(void) noexcept {
+    return SRT_VERSION_MAJOR * 10000u + SRT_VERSION_MINOR * 100u + SRT_VERSION_PATCH;
+}
 
 /// preset: 0 = fast, 1 = balanced, 2 = transparent.
 SrtHandle* srt_create(double sampleRateHz, std::size_t channels, std::size_t targetLatencyFrames,
@@ -36,21 +53,26 @@ SrtHandle* srt_create(double sampleRateHz, std::size_t channels, std::size_t tar
 }
 
 void srt_destroy(SrtHandle* h) noexcept {
-    delete reinterpret_cast<srt::AsyncSampleRateConverter*>(h);
+    delete impl(h);
 }
 
 std::size_t srt_push(SrtHandle* h, const float* interleaved, std::size_t frames) noexcept {
-    return reinterpret_cast<srt::AsyncSampleRateConverter*>(h)->push(interleaved, frames);
+    return h ? impl(h)->push(interleaved, frames) : 0;
 }
 
 std::size_t srt_pull(SrtHandle* h, float* interleaved, std::size_t frames) noexcept {
-    return reinterpret_cast<srt::AsyncSampleRateConverter*>(h)->pull(interleaved, frames);
+    return h ? impl(h)->pull(interleaved, frames) : 0;
 }
 
 /// out[0]=state (0 Filling, 1 Acquiring, 2 Locked), out[1]=ppm,
 /// out[2]=fifoFillFrames, out[3]=underruns, out[4]=overruns, out[5]=resyncs.
 void srt_status(const SrtHandle* h, double out[6]) noexcept {
-    const srt::Status s = reinterpret_cast<const srt::AsyncSampleRateConverter*>(h)->status();
+    if (!h) {
+        for (int i = 0; i < 6; ++i)
+            out[i] = 0.0;
+        return;
+    }
+    const srt::Status s = impl(h)->status();
     out[0] = static_cast<double>(static_cast<int>(s.state));
     out[1] = s.ppm;
     out[2] = s.fifoFillFrames;
@@ -60,11 +82,12 @@ void srt_status(const SrtHandle* h, double out[6]) noexcept {
 }
 
 double srt_designed_latency_seconds(const SrtHandle* h) noexcept {
-    return reinterpret_cast<const srt::AsyncSampleRateConverter*>(h)->designedLatencySeconds();
+    return h ? impl(h)->designedLatencySeconds() : 0.0;
 }
 
 void srt_reset_from_consumer(SrtHandle* h) noexcept {
-    reinterpret_cast<srt::AsyncSampleRateConverter*>(h)->resetFromConsumer();
+    if (h)
+        impl(h)->resetFromConsumer();
 }
 
 } // extern "C"
diff --git a/tools/capi/srt_capi.h b/tools/capi/srt_capi.h
new file mode 100644
index 0000000..b8b3195
--- /dev/null
+++ b/tools/capi/srt_capi.h
@@ -0,0 +1,63 @@
+/* SampleRateTap C ABI — FFI surface over the float converter.
+ *
+ * Build the shared library with -DSRT_BUILD_CAPI=ON. This header is the
+ * contract for C/cffi/Julia consumers (the ctypes notebooks re-declare the
+ * same prototypes); it must stay in sync with srt_capi.cpp.
+ *
+ * Thread contract (identical to the C++ API): one producer thread calls
+ * srt_push at the input clock, one consumer thread calls srt_pull at the
+ * output clock; srt_status may be called from any thread;
+ * srt_reset_from_consumer only from the consumer thread; srt_create /
+ * srt_destroy from any single thread, never concurrently with push/pull.
+ *
+ * Errors: srt_create returns NULL on invalid configuration or allocation
+ * failure. Every function tolerates a NULL handle (no-op / zero return),
+ * so an unchecked failed create degrades to silence, not a crash.
+ *
+ * size_t in these signatures follows the platform ABI (32-bit on 32-bit
+ * targets) — declare foreign types accordingly.
+ */
+#ifndef SRT_CAPI_H
+#define SRT_CAPI_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct SrtHandle SrtHandle;
+
+/* ABI/version probe: returns SRT_VERSION_MAJOR*10000 +
+ * SRT_VERSION_MINOR*100 + SRT_VERSION_PATCH (e.g. 100 for 0.1.0). */
+unsigned srt_version(void);
+
+/* preset: 0 = fast, 1 = balanced, 2 = transparent.
+ * targetLatencyFrames = 0 selects the library default (48). */
+SrtHandle* srt_create(double sampleRateHz, size_t channels, size_t targetLatencyFrames, int preset);
+
+void srt_destroy(SrtHandle* h);
+
+/* Producer thread. Returns frames accepted (< frames on FIFO-full). */
+size_t srt_push(SrtHandle* h, const float* interleaved, size_t frames);
+
+/* Consumer thread. Always fills `frames` output frames (silence while
+ * filling / on underrun); returns frames synthesized from real input. */
+size_t srt_pull(SrtHandle* h, float* interleaved, size_t frames);
+
+/* out[0]=state (0 Filling, 1 Acquiring, 2 Locked), out[1]=ppm,
+ * out[2]=fifoFillFrames, out[3]=underruns, out[4]=overruns,
+ * out[5]=resyncs. */
+void srt_status(const SrtHandle* h, double out[6]);
+
+double srt_designed_latency_seconds(const SrtHandle* h);
+
+/* Consumer thread: discard all buffered input, forget the ppm estimate,
+ * return to Filling. */
+void srt_reset_from_consumer(SrtHandle* h);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* SRT_CAPI_H */