Here’s a standalone program for looking at energy decay in a room. There are just two calls to the QA40x Python analyzer code: The init() and the sendreceive() for getting the calibrated time-domain data in and out of the QA403. The rest is handled in the code below. Much of the code below doesn’t care about the absolute calibration, except for the instantaneous RMS estimate. I used an SM58 in the home office, and should have used the Earthworks M23R. But it was at work.
I used GPT a lot for this code. You can tell it to study a git repo (such as PyQa40x) and use that as a starting point. And it’s pretty good in that regard. What you get from 250 lines of Python blows my mind. I know, I’m late to the party
We are very close to being able to ask ChatGPT “I need some software to figure out the gain of this amp, and then plot THD versus power out making sure to not exceed 1% THD at any point. Use a 32K FFT with a rect window.” Anoterh 6-12 months of GPT evolution and some more samples in the repo will dramatically lower the bar for writing custom code for both testing and algo development.
And as a partner for developing audio algorithms, this exercise with gpt has really impressed me. GPT iterating on the code, pasting it into Jupyter, running, pasting the plots back to gpt and asking “what went wrong” is really, really good. I can only imagine where this goes when GPT is more integrated with an environment like Jupyter or VS.
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import fftconvolve, spectrogram
from mpl_toolkits.mplot3d import Axes3D
from PyQa40x.analyzer import Analyzer
# Test Setup: QA403 drives QA462 whcih drives speaker which
# fires into a room about 13x16x10'. The room sweep is picked
# up by an SM58, amplified 20 dB by a QA472 and then recovered
# by the QA403
# Key setup vars
fft_size = 2 ** 17 # Buffer length in samples
chirp_amplitude_dbv = -10 # Chirp amplitude in dBV. QA462 has 20 dB of gain
sample_rate = 48000
# Define mic sensitivity in dBV/Pa and external pre-amp gain
mic_sensitivity_dbv_pa = -54.5 # Sure SM58
pre_amp_gain_db = 20 # QA472 20 dB
rms_slice_interval_ms = 10 # RMS calculation interval in ms
# Convert mic sensitivity from dBV/Pa to V/Pa
mic_sensitivity_v_pa = 10 ** (mic_sensitivity_dbv_pa / 20.0)
def farina_chirp(total_buffer_length, f1, f2, fs, chirp_amplitude_dbv):
# Chirp lives in 60% of buffer. The remaining 40% will hold the room reflections.
T = (total_buffer_length * 0.6) / fs
t = np.arange(0, T, 1 / fs)
R = np.log(f2 / f1)
# Convert chirp amplitude from dBV to Vrms and then to peak value
V_rms = 10 ** (chirp_amplitude_dbv / 20.0)
peak = V_rms * np.sqrt(2)
chirp_signal = peak * np.sin((2 * np.pi * f1 * T / R) * (np.exp(t * R / T) - 1))
# Pad the signal to the total buffer length with silence at the end
padding = total_buffer_length - len(chirp_signal)
padded_chirp = np.pad(chirp_signal, (0, padding), 'constant')
padded_t = np.arange(0, len(padded_chirp)) / fs
# Generate the inverse filter
k = np.exp(padded_t * R / T)
inverse_filter = padded_chirp[::-1] / k
return padded_chirp, inverse_filter
def calculate_rt(ir, sample_rate, decay_db):
squared_ir = ir ** 2
edc = np.cumsum(squared_ir[::-1])[::-1]
edc_db = 10 * np.log10(edc / np.max(edc))
start_idx = np.where(edc_db <= -5)[0][0]
end_idx = np.where(edc_db <= -5 - decay_db)[0][0]
slope, intercept = np.polyfit(np.arange(start_idx, end_idx) / sample_rate, edc_db[start_idx:end_idx], 1)
rt = -(decay_db + 5) / slope
return rt, edc_db, start_idx, end_idx
def compute_instantaneous_dbspl(signal, sample_rate, mic_sensitivity_v_pa, pre_amp_gain_db, rms_slice_interval_ms):
segment_duration = rms_slice_interval_ms / 1000.0 # Convert to seconds
segment_samples = int(segment_duration * sample_rate)
pre_amp_gain_linear = 10 ** (pre_amp_gain_db / 20.0)
dbspl_values = []
for start in range(0, len(signal), segment_samples):
segment = signal[start:start + segment_samples]
if len(segment) == 0:
break
rms_value = np.sqrt(np.mean(segment**2))
# Convert the RMS value to the sound pressure level in Pascals
p_acquired = rms_value / pre_amp_gain_linear / mic_sensitivity_v_pa
# Convert the sound pressure level to dB SPL
db_spl = 20 * np.log10(p_acquired / 20e-6)
dbspl_values.append(db_spl)
time_values = np.linspace(0, len(signal) / sample_rate, len(dbspl_values))
return np.array(dbspl_values), time_values
def generate_window(buffer_size, window_start, window_end, ramp_up, ramp_down):
small_value = 0.000001
buffer = np.full(buffer_size, small_value)
flat_top_length = max(0, window_end - window_start - ramp_up - ramp_down)
if ramp_up > 0:
ramp_up_window = np.hanning(2 * ramp_up)[:ramp_up]
else:
ramp_up_window = np.array([])
if ramp_down > 0:
ramp_down_window = np.hanning(2 * ramp_down)[ramp_down:]
else:
ramp_down_window = np.array([])
window_function = np.concatenate([
ramp_up_window,
np.ones(flat_top_length),
ramp_down_window
])
buffer[window_start:window_start + len(window_function)] = window_function
return buffer
analyzer = Analyzer()
params = analyzer.init(sample_rate=sample_rate, max_input_level=0, max_output_level=18, fft_size=fft_size, window_type='hamming')
chirp, inv_filter = farina_chirp(fft_size, 20, 20000, params.sample_rate, chirp_amplitude_dbv)
left_dac_full_buffer = chirp
right_dac_full_buffer = left_dac_full_buffer
left_adc_full_buffer, right_adc_full_buffer = analyzer.send_receive(left_dac_full_buffer, right_dac_full_buffer)
# done with analyzer
analyzer.cleanup()
# Get the data buffer and remove DC
main_adc_buffer = left_adc_full_buffer.get_main_buffer()
main_adc_buffer = main_adc_buffer - np.mean(main_adc_buffer)
# Get the room impulse response from the ADC time domain data
ir_data = fftconvolve(main_adc_buffer, inv_filter, mode='same')
# Center IR peak in window
peak_index = np.argmax(np.abs(ir_data))
centered_ir_data = np.roll(ir_data, len(ir_data) // 2 - peak_index)
# Build a window to apply to the IR
buffer_size = len(centered_ir_data)
window_start = len(centered_ir_data) // 2 - 1000
window_width = buffer_size // 2
window_end = window_start + window_width
ramp_up = 500
ramp_down = 1000
window = generate_window(buffer_size, window_start, window_end, ramp_up, ramp_down)
windowed_ir_data = centered_ir_data * window
# Compute decay times
rt60, edc_db, start_idx, end_idx = calculate_rt(windowed_ir_data, sample_rate, decay_db=60)
rt40, _, _, _ = calculate_rt(windowed_ir_data, sample_rate, decay_db=40)
rt30, _, _, _ = calculate_rt(windowed_ir_data, sample_rate, decay_db=30)
rt20, _, _, _ = calculate_rt(windowed_ir_data, sample_rate, decay_db=20)
rt10, _, _, _ = calculate_rt(windowed_ir_data, sample_rate, decay_db=10)
rt5, _, _, _ = calculate_rt(windowed_ir_data, sample_rate, decay_db=5)
time = np.arange(len(left_dac_full_buffer)) / sample_rate
# Compute dBSPL on ADC data
dbspl_values, dbspl_time = compute_instantaneous_dbspl(main_adc_buffer, sample_rate, mic_sensitivity_v_pa, pre_amp_gain_db, rms_slice_interval_ms)
plt.figure(figsize=(15, 25))
plt.subplot(6, 1, 1)
plt.plot(time, left_dac_full_buffer)
plt.title("DAC Data")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.subplot(6, 1, 2)
plt.plot(time[:len(main_adc_buffer)], main_adc_buffer)
plt.title("ADC Data")
plt.xlabel("Time (s)")
plt.ylabel("Amplitude")
plt.subplot(6, 1, 3)
plt.plot(dbspl_time, dbspl_values)
plt.title("ADC Instantaneous dB SPL")
plt.xlabel("Time (s)")
plt.ylabel("dB SPL")
plt.grid(True, which='both', axis='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.minorticks_on()
plt.gca().xaxis.set_major_locator(plt.MultipleLocator(0.25))
plt.gca().xaxis.set_minor_locator(plt.MultipleLocator(0.05))
plt.gca().yaxis.set_major_locator(plt.MultipleLocator(10))
plt.gca().yaxis.set_minor_locator(plt.MultipleLocator(2))
plt.subplot(6, 1, 4)
ax1 = plt.gca()
ax2 = ax1.twinx()
ax1.plot(time[:len(centered_ir_data)], centered_ir_data, label='IR Data', color='b')
ax2.plot(time[:len(window)], window, label='Window', color='r')
ax1.set_xlabel("Time (s)")
ax1.set_ylabel("Amplitude (IR Data)")
ax2.set_ylabel("Amplitude (Window)")
ax1.set_title("Impulse Response (IR) with Window")
ax1.legend(loc='upper left')
ax2.legend(loc='upper right')
edc_time = np.arange(len(edc_db)) / sample_rate
plt.subplot(6, 1, 5)
plt.plot(edc_time, edc_db)
plt.title("Energy Decay Curve (EDC)")
plt.xlabel("Time (s)")
plt.ylabel("EDC (dB)")
plt.axvline(x=(start_idx + np.where(edc_db[start_idx:] <= -5)[0][0]) / sample_rate, color='r', linestyle='--', label='-5 dB')
plt.axvline(x=(start_idx + np.where(edc_db[start_idx:] <= -10)[0][0]) / sample_rate, color='g', linestyle='--', label='-10 dB')
plt.axvline(x=(start_idx + np.where(edc_db[start_idx:] <= -20)[0][0]) / sample_rate, color='b', linestyle='--', label='-20 dB')
plt.axvline(x=(start_idx + np.where(edc_db[start_idx:] <= -40)[0][0]) / sample_rate, color='m', linestyle='--', label='-40 dB')
plt.axvline(x=(start_idx + np.where(edc_db[start_idx:] <= -60)[0][0]) / sample_rate, color='y', linestyle='--', label='-60 dB')
plt.legend()
plt.ylim(-75, 5)
plt.grid(True, which='both', axis='both')
plt.grid(which='major', linestyle='-', linewidth='0.5', color='black')
plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
plt.minorticks_on()
plt.gca().xaxis.set_major_locator(plt.MultipleLocator(0.25))
plt.gca().xaxis.set_minor_locator(plt.MultipleLocator(0.05))
plt.gca().yaxis.set_major_locator(plt.MultipleLocator(20))
plt.gca().yaxis.set_minor_locator(plt.MultipleLocator(5))
# Compute the spectrogram
frequencies, times, Sxx = spectrogram(windowed_ir_data, fs=sample_rate, nperseg=1024, noverlap=512)
# Convert power spectrogram to dB scale
Sxx_dB = 10 * np.log10(Sxx)
# Normalize the spectrogram so the maximum intensity is 0 dB
Sxx_dB -= np.max(Sxx_dB)
# Plot Spectrogram
plt.subplot(6, 1, 6)
plt.pcolormesh(times, frequencies, Sxx_dB, shading='gouraud', cmap='inferno', vmin=-100, vmax=0)
plt.title("Normalized Spectrogram of Impulse Response")
plt.xlabel("Time (s)")
plt.ylabel("Frequency (Hz)")
plt.colorbar(label='Intensity (dB)')
plt.ylim(20, 20000) # Set the y-axis range to 20 Hz to 20 kHz
plt.grid(True, which='both', axis='both') # Enable grid lines
plt.tight_layout()
plt.show()
print(f"RT5: {rt5:.2f} seconds ({5 / rt5:.2f} dB/sec)")
print(f"RT10: {rt10:.2f} seconds ({10 / rt10:.2f} dB/sec)")
print(f"RT20: {rt20:.2f} seconds ({20 / rt20:.2f} dB/sec)")
print(f"RT30: {rt30:.2f} seconds ({30 / rt30:.2f} dB/sec)")
print(f"RT40: {rt40:.2f} seconds ({40 / rt40:.2f} dB/sec)")
print(f"RT60: {rt60:.2f} seconds ({60 / rt60:.2f} dB/sec)")