// Copyright Epic Games Tools, LLC. All Rights Reserved. //////////////////////////////////////////////////////////////////////////// // // RADaudio is a new audio codec made by Epic Game Tools for use in games, // optimized for fast SIMD decoding and decent quality (roughly similar to // Vorbis). // // It is a classical MDCT-based codec with two block sizes, and it uses // the Oodle Data huffman entropy coder to store data. #include #define ENCODER_VERSION 0x00000000 // unstable debug version //#define ENCODER_VERSION 0x01000001 // use this for first released version #define HUFFMAN_ENCODE // get Huffman encode tables #include #include #include "radaudio_encoder.h" #include "radaudio_encoder_internal.h" #include "radaudio_encoder_sse.h" #include "radaudio_encoder_neon.h" #include "radaudio_common.h" #include "radaudio_mdct.h" #include "rrCore.h" #include "radaudio_common.inl" RR_COMPILER_ASSERT(sizeof(radaudio_encoder_state) <= sizeof(radaudio_encoder)); RR_COMPILER_ASSERT(sizeof(radaudio_stream_header) <= RADAUDIO_STREAM_HEADER_MAX); #ifdef RADAUDIO_DEVELOPMENT #define PROFILE_ZONES() \ PROF(encode_all) \ PROF(coefficients_sum) \ PROF(coefficients_1) \ PROF(coefficients_2) \ PROF(coefficients_3) \ PROF(coefficients_4) \ PROF(coefficients_5) \ PROF(coefficients_n8) \ PROF(coefficients_n16) \ PROF(coefficients_normalize)\ PROF(bands) \ PROF(huffman) \ PROF(analysis1) \ PROF(mdct) \ PROF(window) \ PROF(subbands) \ PROF(varbits) \ PROF(transient_analysis) \ PROF(check_mono) \ PROF(total_count) enum { #define PROF(x) PROF_##x, PROFILE_ZONES() #undef PROF PROF__end }; #define PROF_BEGIN(var) es->profile_times[PROF_##var] -= rrGetTicks() #define PROF_END(var) es->profile_times[PROF_##var] += rrGetTicks() #else #define PROF_BEGIN(var) #define PROF_END(var) #endif typedef struct { U8 *bitstream; // dynamic array U32 pending_bits; int pending_bitcount; int capacity; int length; int error; } radaudio_bit_encoder; typedef struct { radaudio_bit_encoder stream[3]; // HUFF3 encoder size_t total_bits; } huff3_encoder; static void encode_vbstream_init(radaudio_bit_encoder *e, U8 *buffer, int capacity) { e->capacity = capacity; e->bitstream = buffer; e->pending_bits = 0; e->pending_bitcount = 0; e->length = 0; e->error = 0; } static void encode_vbstream_bits(radaudio_bit_encoder *e, U32 bits, int bitlength) { rrAssert(bitlength < 32); if (e->pending_bitcount+bitlength >= 32) { // at most 4 bytes output at once if (e->length + 4 > e->capacity) { e->error = 1; return; } while (e->pending_bitcount >= 8) { e->bitstream[e->length++] = (U8) (e->pending_bits & 255); e->pending_bits >>= 8; e->pending_bitcount -= 8; } } bits &= ((1 << bitlength) - 1); e->pending_bits |= bits << e->pending_bitcount; e->pending_bitcount += bitlength; } static void encode_vbstream_huff(radaudio_bit_encoder *e, radaudio_huffman *h, U8 symbol) { rrAssert(h->encode[symbol].length > 0); encode_vbstream_bits(e, h->encode[symbol].code, h->encode[symbol].length); } static void encode_vbstream_flush(radaudio_bit_encoder *e) { if (e->length + 4 > e->capacity) { e->error = 1; return; } while (e->pending_bitcount > 0) { e->bitstream[e->length++] = (U8) (e->pending_bits & 255); e->pending_bits >>= 8; e->pending_bitcount -= 8; } e->pending_bitcount = 0; e->pending_bits = 0; } static void encode_bits(huff3_encoder *es, U32 bits, int bitlength) { // putting extra bits in stream[0] increases the offset of stream[2] // putting extra bits in stream[1] interacts with bit reversal // so stream[2] is the best place to put non-huffman data, although we could also multi-stream it encode_vbstream_bits(&es->stream[2], bits, bitlength); es->total_bits += bitlength; } static void encode_huff(huff3_encoder *es, radaudio_huffman *h, U8 symbol, unsigned int stream_index) { rrAssert(stream_index >= 0 && stream_index < 3); encode_vbstream_huff(&es->stream[stream_index], h, symbol); } static void encode_recompute_stats(huff3_encoder *es) { es->total_bits = 8 * (es->stream[0].length + es->stream[1].length + es->stream[2].length ) + (es->stream[0].pending_bitcount + es->stream[1].pending_bitcount + es->stream[2].pending_bitcount); } static void encode_huff_array(huff3_encoder *es, radaudio_huffman *huff, U8 *values, int num_values, char *file, int line, int *error) { for (int i=0; i < num_values; ++i) { if (huff->encode[values[i]].length == 0) *error = 1; else encode_huff(es, huff, values[i], (unsigned int) i % 3); } encode_recompute_stats(es); } static float linear_remap(float x, float a, float b, float c, float d) { return (x-a)/(b-a)*(d-c)+c; } // original was 140. @TODO: optimize for different sample rates and bit rates; no decoder change needed #define TRANSIENT_THRESHOLD 180 static int silence_exp_start = -11; static float silence_exp_value = 0.75f; static float inverse_approx_pow_2(float x) { // solve x = 0.345*y^2 + 0.655*y + 1 float A = 0.34375f, B = 0.65625f, C = 1-x; float B2 = 0.4306640625f; // B*B, 441/1024 float discr = B2 - (4*A)*C; float d = sqrtf(discr); float y = (-B + d) / (2*A); rrAssert(x >= 1 && x <= 2); return y; } static void mdct_block(radaudio_encoder_state *es, float *coeff, int is_short_block, float *samples, int offset, int len, int prev_short, int next_short, int stride, rrbool average, radaudio_encode_info *info, int pad_channel) { PROF_BEGIN(window); int i; static int block_length[2] = { RADAUDIO_LONG_BLOCK_LEN, RADAUDIO_SHORT_BLOCK_LEN }; FFT_ALIGN(F32, data[MAX_MDCT_SAMPLES]); FFT_ALIGN(F32, workspace[MAX_MDCT_SAMPLES]); F32 *window; // the window type is the shortest of the two blocks int windowleft = RR_MAX(is_short_block, prev_short); int windowright = RR_MAX(is_short_block, next_short); int n = block_length[is_short_block]; int wlen; int raw_start = offset - n; int raw_end = offset + n; // clamp range to sample, which is defined from 0..len int clamped_start = RR_MAX(raw_start, 0); int clamped_end = RR_MIN(raw_end , len); // convert into space where 0 = start of window, i.e. raw_start maps to 0 int convert_offset = raw_start; raw_start -= convert_offset; raw_end -= convert_offset; clamped_start -= convert_offset; clamped_end -= convert_offset; rrAssert(raw_start == 0); rrAssert(raw_end == 2*n); raw_start = 0; raw_end = 2*n; if (clamped_start > raw_start) { if (info->padding_len > 0) { // this could be turned into two loops, one for the padded samples and one for the 0s, // but since it only happens in one block at start of file, let's just leave it clearer here for (i=raw_start ; i < clamped_start; ++i) { // map back into original sample space: // if i= -convert_offset, then sample_pos is 0 int sample_pos = i+convert_offset; // now sample_pos from -padding_len to -1 maps to 0..padlen-1 in padding int padding_pos = sample_pos + (int) info->padding_len; if (padding_pos >= 0 && padding_pos < (int) info->padding_len) data[i] = info->padding[padding_pos*stride+pad_channel]; else data[i] = 0; } } else { for (i=raw_start ; i < clamped_start; ++i) data[i] = 0; } } if (average) for (i=clamped_start; i < clamped_end ; ++i) data[i] = (samples[(offset-n+i)*stride] + samples[(offset-n+i)*stride]) * 0.5f; else for (i=clamped_start; i < clamped_end ; ++i) data[i] = samples[(offset-n+i)*stride]; #if 0 if (clamped_start > raw_start) { for (i=0; i < raw_end; ++i) { printf("%4d %d\n", i, (int) (data[i]*32767)); } printf("seamless fix for channel %d ends at %d\n", pad_channel, clamped_start); } #endif if (clamped_end < raw_end) { if (info->padding_len > 0) { for (i=clamped_end; i < raw_end; ++i) { // map back into original sample space: // if i=raw_end, then sample_pos is len rrAssert(clamped_end + convert_offset == len); int sample_pos = i+convert_offset; // now sample_pos from len..len+padlen-1 maps to 0..padlen-1 in padding int padding_pos = sample_pos - len; if (padding_pos >= 0 && padding_pos < (int) info->padding_len) data[i] = info->padding[padding_pos*stride+pad_channel]; else data[i] = 0; } #if 0 for (i=0; i < raw_end; ++i) { printf("%4d %d\n", i, (int) (data[i]*32767)); } printf("seamless fix for channel %d starts at %d\n", pad_channel, clamped_end); #endif } else { for (i=clamped_end ; i < raw_end ; ++i) data[i] = 0; } } // apply left window wlen = block_length[windowleft]; window = radaudio_windows[windowleft]; if (wlen < n) { // long block with short window int wstart = RADAUDIO_LONG_BLOCK_LEN/2 - RADAUDIO_SHORT_BLOCK_LEN/2; for (i=0; i < wstart; ++i) data[i] = 0; for (i=0; i < RADAUDIO_SHORT_BLOCK_LEN; ++i) data[wstart+i] *= window[i]; } else { for (i=0; i < wlen; ++i) data[i] *= window[i]; } // apply right window wlen = block_length[windowright]; window = radaudio_windows[windowright]; if (wlen < n) { // long block with short window int wstart = RADAUDIO_LONG_BLOCK_LEN/2 - RADAUDIO_SHORT_BLOCK_LEN/2; for (i=0; i < RADAUDIO_SHORT_BLOCK_LEN; ++i) data[n+wstart+i] *= window[wlen-1-i]; for (i=n-wstart; i < n; ++i) data[n+i] = 0; } else { for (i=0; i < wlen; ++i) data[n+i] *= window[wlen-1-i]; } PROF_END(window); PROF_BEGIN(mdct); radaudio_mdct_fft(es->cpu, coeff, (size_t) n, data, data+n, workspace); PROF_END(mdct); } // computing error betweeen two vectors that are normalized to have L2-norm of 1.0, // without explicitly computing normalized values // // Square error is: sum[(normalizedq[i] - desired[i])^2] // = sum[(normalizedq[i]^2 - 2*normalizedq[i]*desired[i] + desired[i]^2)] // = sum[ normalizedq[i]^2] - 2*sum[normalizedq[i]*desired[i]] + sum[desired[i]^2] // = 1 - 2*normalize_scale * sum[unnormalizedq*desired] + 1 // = 2 - 2*normalize_scale * sum[unnormalizedq*desired] // normalize_scale = 1/sqrt(sum(unnormalizedq^2)) static float compute_quantized_coefficients(S16 quantized_coeff[], F32 normalized_coeff[], int num_coeff, F32 quantizer) { float cross_sum=0; float unnorm_sum2=0; for (int k=0; k < num_coeff; ++k) { float unquantized = (normalized_coeff[k] * quantizer); int quantized = (int) floor(unquantized+0.5); quantized_coeff[k] = (S16) quantized; cross_sum += quantized * normalized_coeff[k]; unnorm_sum2 += (F32) quantized * quantized; } F32 normalize_scale = 1.0f / sqrtf(unnorm_sum2); return 2 - 2 * normalize_scale * cross_sum; } // iterate through a range of quantizers and pick the one with the // smallest error; all of them will in theory be smaller than the // "ideal" quantizer, so we don't bother trading off size vs error, // we just accept the best. static void compute_best_quantized_coeff8_loop(radaudio_encoder_state *es, S16 best_quantized[], F32 best_so_far, F32 ncoeff[], F32 quantizer, F32 step_quantizer, int num_quantizers) { RR_UNUSED_VARIABLE(es); int n = 8; for (int q=0; q < num_quantizers; ++q, quantizer += step_quantizer) { S16 quantized_attempt[8]; F32 dot_product=0; F32 unnorm_sum2=0; for (int z=0; z < n; ++z) { float unquantized = (ncoeff[z] * quantizer); int quantized = (int) floor(unquantized+0.5f); quantized_attempt[z] = (S16) quantized; dot_product += quantized * ncoeff[z]; unnorm_sum2 += (F32) quantized * quantized; } float err2 = (dot_product * dot_product) / unnorm_sum2; if (err2 > best_so_far) { best_so_far = err2; memcpy(best_quantized, quantized_attempt, n * sizeof(best_quantized[0])); } } } static void compute_best_quantized_coeff16_loop(radaudio_encoder_state *es, S16 best_quantized[], F32 best_so_far, F32 ncoeff[], F32 quantizer, F32 step_quantizer, int num_quantizers) { RR_UNUSED_VARIABLE(es); #ifdef DO_BUILD_SSE4 if (es->cpu.has_sse4_1) { radaudio_sse2_compute_best_quantized_coeff16_loop(best_quantized, best_so_far, ncoeff, quantizer, step_quantizer, num_quantizers); return; } #endif #ifdef DO_BUILD_NEON radaudio_neon_compute_best_quantized_coeff16_loop(best_quantized, best_so_far, ncoeff, quantizer, step_quantizer, num_quantizers); return; #endif #ifndef DO_BUILD_NEON // for unreachable code warnings int n = 16; for (int q=0; q < num_quantizers; ++q, quantizer += step_quantizer) { S16 quantized_attempt[16]; for (int z=0; z < n; ++z) { float unquantized = (ncoeff[z] * quantizer); int quantized = (int) floor(unquantized+0.5f); quantized_attempt[z] = (S16) quantized; } // same operation order as in SIMD case to be bitwise identical F32 dot_product_arr[4]; F32 unnorm_sum2_arr[4]; for (int z = 0; z < 4; z++) { F32 q0 = (F32) quantized_attempt[0 + z]; F32 q1 = (F32) quantized_attempt[4 + z]; F32 q2 = (F32) quantized_attempt[8 + z]; F32 q3 = (F32) quantized_attempt[12 + z]; unnorm_sum2_arr[z] = 1.0e-20f; // avoid divide by 0 unnorm_sum2_arr[z] += q0 * q0; unnorm_sum2_arr[z] += q1 * q1; unnorm_sum2_arr[z] += q2 * q2; unnorm_sum2_arr[z] += q3 * q3; dot_product_arr[z] = ncoeff[0 + z] * q0; dot_product_arr[z] += ncoeff[4 + z] * q1; dot_product_arr[z] += ncoeff[8 + z] * q2; dot_product_arr[z] += ncoeff[12 + z] * q3; } F32 unnorm_sum2 = (unnorm_sum2_arr[0] + unnorm_sum2_arr[2]) + (unnorm_sum2_arr[1] + unnorm_sum2_arr[3]); F32 dot_product = (dot_product_arr[0] + dot_product_arr[2]) + (dot_product_arr[1] + dot_product_arr[3]); float err2 = (dot_product * dot_product) / unnorm_sum2; if (err2 > best_so_far) { best_so_far = err2; memcpy(best_quantized, quantized_attempt, n * sizeof(best_quantized[0])); } } #endif // !DO_BUILD_NEON } static int get_rounded_exponent(float n) { #if 0 float recip_log2 = 1.0f / logf(2.0f); int exp2 = (int) floor(logf(n) * recip_log2 + 0.5); #else float two_to_one_half = 1.414213562373f; int exp=0; frexp(n * 0.5 * two_to_one_half, &exp); return exp; #endif } static void compute_band_energies(radaudio_rate_info *info, float coeff[], int band_exponent[], F32 band_energy[]) { // compute band energy int k = 0; for (int j=0; j < info->num_bands; ++j) { float be = 0; for (int i=0; i < info->num_coeffs_for_band[j]; ++i) { float x = coeff[k++]; be += x*x; } be = sqrtf(be) * info->band_scale_encode[j]; // average energy over whole band... this allows coding all bands identically if (be < 0.00002f) be = 0; band_energy[j] = be; if (be == 0) // no need to check minimum, as that gets 0'd as an exponent band_exponent[j] = BAND_EXPONENT_NONE; else { int exp = get_rounded_exponent(be); if (exp < -16) exp = BAND_EXPONENT_NONE; if (exp > 4) exp = 4; band_exponent[j] = exp; } } } // find the num_data/2'th item if data were sorted // can reorder data static int imin(int p0, int p1) { return p0 < p1 ? p0 : p1; } static int imax(int p0, int p1) { return p0 > p1 ? p0 : p1; } static int ifind_nth_slow(S16 *data, int count, int n) { for (int i=0; i <= n; ++i) { S16 smallest = data[i]; int smallest_j = i; for (int j=i+1; j < count; ++j) { if (data[j] < smallest) { smallest = data[j]; smallest_j = j; } } // smallest_j belongs in i S16 t = data[i]; data[i] = smallest; data[smallest_j] = t; } rrAssert(data[n] >= 0); return data[n]; } RR_COMPILER_ASSERT(MAX_COEFF_PER_SUBBAND == 32); static int ifind_nth_fast(radaudio_encoder_state *es, S16 *data, int count, int n) { rrAssert(n <= MAX_COEFF_PER_SUBBAND); S16 temp1[MAX_COEFF_PER_SUBBAND],temp2[MAX_COEFF_PER_SUBBAND], *buf1 = temp1, *buf2 = temp2, *(buf[2]); buf[0] = buf1; buf[1] = buf2; int next_buf=0; buf1 = data; for(;;) { if (count <= 8) { int result; #ifdef DO_BUILD_SSE4 if (es->cpu.has_sse4_1) result = radaudio_sse4_findnth_of_8_or_less(buf1, count, n); else #endif result = ifind_nth_slow(buf1, count, n); return result; } int v0 = data[0]; int v1 = data[count-1]; int v2 = data[count>>1]; int lo = imin(v0,v1); int hi = imax(v0,v1); int clamp_lo = imax(lo,v2); int mid = imin(hi,clamp_lo); // count numbers relative to mid int below=0; int above=MAX_COEFF_PER_SUBBAND; for (int i=0; i < count; ++i) rrAssert(buf1[i] >= 0); for (int i=0; i < count; ++i) if (buf1[i] < mid) buf2[below++] = buf1[i]; else buf2[--above] = buf1[i]; if (n < below) { if (below == count) return ifind_nth_slow(buf1, count, n); // is the nth item in the below section? buf1 = buf2; count = below; } else { if (below == 0) return ifind_nth_slow(buf1, count, n); buf1 = buf2+above; count = MAX_COEFF_PER_SUBBAND-above; n -= below; } buf2 = buf[next_buf]; next_buf ^= 1; } } // only actually ever called with: // num_data= 4 // num_data= 8 // num_data=16 // num_data=32 // // 8 & 16 are the most important to optimize, since they're the only subband sizes for long blocks; // the others come up with short blocks only static float find_median(radaudio_encoder_state *es, F32 *data, int num_data) { // approximate median finder using top 16 bits of floats, because SSE4.1 has a horizontal min we can leverage. // floats are all non-negative (they were already fabs()d), so ordering as integer is valid // unmeasurable change in PEAQ basic to use this instead of float median S32 idata[MAX_COEFF_PER_SUBBAND]; S16 idata16[MAX_COEFF_PER_SUBBAND]; memcpy(idata, data, 4*num_data); for (int i=0; i < num_data; ++i) idata16[i] = (S16) (idata[i] >> 16); // since sign bit is clear, we could use U16 and shift 1 less to keep 1 more mantissa bit // but we keep it signed in case of limitations in other SIMD implementations S32 result; #ifdef DO_BUILD_SSE4 if (num_data == 8 && es->cpu.has_sse4_1) { result = radaudio_sse4_find_median_4th_of_8(idata16); } else if (num_data == 16 && es->cpu.has_sse4_1 && es->cpu.has_popcnt) { result = radaudio_sse4popcnt_find_median_8th_of_16(idata16); } else #elif defined(DO_BUILD_NEON) if (num_data == 8) { result = radaudio_neon_find_median_4th_of_8(idata16); } else if (num_data == 16) { result = radaudio_neon_find_median_8th_of_16(idata16); } else #endif { result = ifind_nth_fast(es, idata16, num_data, num_data>>1); } // most mantissa bits will be 0, but good enough; the median is a heuristic anyway result <<= 16; F32 int_median; memcpy(&int_median, &result, 4); return int_median; } static void encode_channel(radaudio_encoder_state *es, radaudio_block_data *bd, F32 *coeff, int channel, F32 *sbb_energy, U32 midside_bands, radaudio_block_data *mid // only if side encoding ) // mid-side encoding { float band_pulses_raw[2][MAX_BANDS] = { 0 }; int is_short_block = es->current_block_short; radaudio_rate_info *info = es->info[is_short_block]; int i,j,k,s,c; U8 has_nonempty_subbands[24] = { 0 }; RAD_ALIGN(int, band_exponent[24], 16); RAD_ALIGN(F32, band_energy[24], 16); RAD_ALIGN(F32, subband_energy[72], 16); bd->num_bands = info->num_bands; bd->num_quantized_subbands = info->num_quantized_subbands; bd->num_quantized_coeffs = info->num_quantized_coeffs; int blocksize = is_short_block ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN; // ramp off the highest frequencies (if they're even used, only 24/32khz) int ramp_to_zero_length = 8; for (i=1; i < ramp_to_zero_length; ++i) coeff[blocksize-i] *= (float) i / ramp_to_zero_length; for (j=info->num_quantized_coeffs; j < info->num_coeffs; ++j) bd->quantized_coeff_encode[j] = 0; PROF_BEGIN(bands); compute_band_energies(info, coeff, band_exponent, band_energy); for (j=0; j < info->num_bands; ++j) bd->band_exponent[j] = band_exponent[j]; compute_mantissa_bitcount(es->samprate_mode, is_short_block, es->mantissa_param, bd->band_exponent, bd->band_mantissa_bitcount); // compute fine band energy for (j=0; j < info->num_bands; ++j) { if (band_exponent[j] == BAND_EXPONENT_NONE) bd->band_mantissa[j] = 0; else { int nb = bd->band_mantissa_bitcount[j]; if (nb == 0) bd->band_mantissa[j] = 0; else { float be = band_energy[j]; int raw_fine; float x = be / powf(2.0f, (F32) band_exponent[j]); rrAssert(x >= 0.70f && x <= 1.42f); x *= sqrtf(2.0f); if (x < 1) x = 1; if (x > 1.9999f) x = 1.9999f; x = inverse_approx_pow_2(x); // output is 0..1 if (x < 0) x = 0; if (x > 0.9999f) x = 0.9999f; raw_fine = (int) (((1 << MAX_FINE_ENERGY_BITS)-1) * x + 0.5); bd->band_mantissa[j] = raw_fine >> (MAX_FINE_ENERGY_BITS-nb); // truncate to get the correct bucket index } } } PROF_END(bands); PROF_BEGIN(subbands); // subbands: if (es->current_block_short) { for (j=0; j < info->num_bands; ++j) { has_nonempty_subbands[j] = 1; subband_energy[j] = band_energy[j]; } } else { // compute subband energy for (j=0; j < info->num_subbands; ++j) { float sbe = 0; int start = info->first_coeff_for_subband[j]; for (i=0; i < info->num_coeffs_for_subband[j]; ++i) { float x = coeff[start+i]; sbe += x*x; } sbe = sqrtf(sbe); subband_energy[j] = sbe; } for (j=0; info->num_subbands_for_band[j] == 1; ++j) { has_nonempty_subbands[j] = (band_exponent[j] != BAND_EXPONENT_NONE); bd->quantized_subbands[j] = (U16) 1; } for (; j < info->num_bands; ++j) { int count = info->num_subbands_for_band[j]; int start = info->first_subband_for_band[j]; int num_pulses = es->subband_predicted_sum[j]; rrAssert(count != 1); if (num_pulses == 0 || band_exponent[j] == BAND_EXPONENT_NONE) { // skipping empty subbands doesn't really save bitrate on normal files, // but it helps enormously for silence and for mono-in-stereo (where the side channel is silent after mid-side encoding) // for empty bands, we don't save enough bitrate to be worth the complexity of skipping decoded subbands // instead output subbands that are optimal to decode float subband_pulses = (float) num_pulses / count; if (SUBBANDS_SKIP_EMPTY_BANDS) subband_pulses = floorf(subband_pulses); for (i=0; i < count; ++i) bd->quantized_subbands[start+i] = (U16) (subband_pulses + 0.5*(i & 1)); has_nonempty_subbands[j] = false; } else { S32 maxsub=0; F32 sum = 1.0e-20f, inv_sum; for (i=0; i < count; ++i) sum += subband_energy[start+i]; inv_sum = 1.0f / sum; for (i=0; i < count; ++i) { U16 val = (U16) ((num_pulses * (subband_energy[start+i]*inv_sum)+0.5)); bd->quantized_subbands[start+i] = val; maxsub = RR_MAX(maxsub,val); } if (maxsub > 63) { // need to scale the subbands down so the largest value isn't too large to be signalled, // so remap so that maxsub will be 1 lower than limit (to allow rounding/slop) float revised_pulses = num_pulses * 62 / (float) maxsub; maxsub = 0; for (i=0; i < count; ++i) { U16 val = (U16) ((revised_pulses * (subband_energy[start+i]*inv_sum)+0.5)); bd->quantized_subbands[start+i] = val; maxsub = RR_MAX(maxsub,val); } rrAssert(maxsub <= 63); es->stats.block_events[E_subband_renormalize]++; } has_nonempty_subbands[j] = false; for (i=0; i < count; ++i) if (bd->quantized_subbands[start+i] != 0) { has_nonempty_subbands[j] = true; break; } int total=0; for (i=0; i < count; ++i) total += bd->quantized_subbands[start+i]; } } } for (j=0; j < info->num_subbands; ++j) sbb_energy[j] = subband_energy[j]; PROF_END(subbands); PROF_BEGIN(analysis1); // compute the number of pulses per band for two adjacent quality modes so we can interpolate between them float total_desired=0; float total_expected=0; { float weighting[MAX_BANDS] = { 0 }; float total_weighting=1.0e-20f, recip_total; float base_pulses = es->heur.pulse_quality * 100; float num_pulses = (base_pulses * base_pulses * (is_short_block ? es->heur.short_block_pulse_scale : 1)); if (!is_short_block) { if (es->prev_block_short && es->next_block_short) num_pulses *= es->heur.short_overlap_scale2; else if (es->prev_block_short || es->next_block_short) num_pulses *= es->heur.short_overlap_scale1; } for (j=0; j < info->num_bands; ++j) { float w; rrbool midside=false; int be = band_exponent[j]; if (midside_bands & (1 << j)) { be = RR_MAX(be, mid->band_exponent[j]); // weight it as if it was the mid weight, if that's larger; later we'll discard samples based on actual exponent midside = true; } if (be == BAND_EXPONENT_NONE) w = 0; else { float exp = (float) be; w = powf(es->heur.band_exponent_base[is_short_block], exp); } total_expected += 1.0; if (be < -14) { if (be == -15) total_desired += 0.75f; if (be == -16) total_desired += 0.33f; } else total_desired += 1.0; w *= powf((F32) info->num_coeffs_for_band[j], (F32) es->heur.band_count_exponent[is_short_block]); #define RADA_LERP(t,a,b) ((a)+(t)*((b)-(a))) w *= RADA_LERP((float) j/info->num_bands, es->heur.quality_weight_low[is_short_block], 1.0f); #if 0 // this makes 0.002 PEAQ difference even after tuning, not worth the time it would take to tune at all rates // inter-band masking if (j > 0 && j < info->num_bands-1 && !midside) { int exp = band_exponent[j]; int quietest_neighboring_band_exp = RR_MIN(band_exponent[j-1], band_exponent[j+1]); if (exp < quietest_neighboring_band_exp-5) w *= es->heur.band_mask_8[es->samprate_mode][is_short_block][quality]; else if (exp < quietest_neighboring_band_exp-3) w *= es->heur.band_mask_4[es->samprate_mode][is_short_block][quality]; else if (exp < quietest_neighboring_band_exp-2) w *= es->heur.band_mask_2[es->samprate_mode][is_short_block][quality]; else if (exp < quietest_neighboring_band_exp-1) w *= es->heur.band_mask_1[es->samprate_mode][is_short_block][quality]; } #endif if (info->num_coeffs_for_band[j] == 1) w = 0; // if only one coefficient, only needs one pulse to indicate sign weighting[j] = w; total_weighting += weighting[j]; } recip_total = 1.0f/total_weighting; num_pulses *= total_desired / total_expected; for (j=0; j < info->num_bands; ++j) { float raw_pulses = num_pulses * weighting[j] * recip_total; float expectation = (band_exponent[j] - es->heur.expectation_base) * es->heur.expectation_scale; if (expectation < 0.0f) expectation = 0.0f; if (expectation > 1.0f) expectation = 1.0f; if (midside_bands & (1 << j)) { if (band_exponent[j] < mid->band_exponent[j]) { float scale1=1, scale2=1; // we weighted this as if it was the actual mid exponent, now throw away pulses to save space if (midside_bands == 0xffffffff) { // if all bands mid-side // phase 1: if exponents are separate by N, start decaying // phase 2: if exponent is less than some threshold, start decaying if (band_exponent[j] < mid->band_exponent[j] - es->heur.side_exp_threshold_all) { if (band_exponent[j] <= mid->band_exponent[j] - es->heur.side_exp_start2_all) scale1 = 0; else scale1 = linear_remap((float) band_exponent[j], (float) mid->band_exponent[j]-es->heur.side_exp_start2, (float) mid->band_exponent[j]-es->heur.side_exp_threshold, 0.0f, 1.0); } if (band_exponent[j] < es->heur.side_exp_end_all) { if (band_exponent[j] <= es->heur.side_exp_start_all) scale2 = 0; else scale2 = linear_remap((float) band_exponent[j], (float) es->heur.side_exp_start, (float) es->heur.side_exp_end, 0.0f, 1.0); } } else { // if selected bands, which uses a different detector so can have different decay rules // phase 1: if exponents are separate by N, start decaying // phase 2: if exponent is less than some threshold, start decaying if (band_exponent[j] < mid->band_exponent[j] - es->heur.side_exp_threshold) { if (band_exponent[j] <= mid->band_exponent[j] - es->heur.side_exp_start2) scale1 = 0; else scale1 = linear_remap((float) band_exponent[j], (float) mid->band_exponent[j]-es->heur.side_exp_start2, (float) mid->band_exponent[j]-es->heur.side_exp_threshold, 0.0f, 1.0); } if (band_exponent[j] < es->heur.side_exp_end) { if (band_exponent[j] <= es->heur.side_exp_start) scale2 = 0; else scale2 = linear_remap((float) band_exponent[j], (float) es->heur.side_exp_start , (float) es->heur.side_exp_end, 0.0f, 1.0); } } if (scale1 > 1.00) scale1=1.00; if (scale1 < 0.00) scale1=0.00; if (scale2 > 1.00) scale2=1.00; if (scale2 < 0.00) scale2=0.00; float scale = RR_MIN(scale1,scale2); expectation *= scale; } } if (band_exponent[j] < -9) { float scale = linear_remap((float) band_exponent[j], -15.0f , (float) silence_exp_start, 0.05f, (float) silence_exp_value); if (scale > 1.00) scale=1.00; if (scale < 0.00) scale=0.00; if (band_exponent[j] == -16) scale = 0; expectation *= scale; } raw_pulses *= expectation; band_pulses_raw[0][j] = raw_pulses; } } PROF_END(analysis1); PROF_BEGIN(coefficients_sum); c=s=0; // c = coefficient index, s = subband index for (j=0; j < info->num_bands; ++j) { float recip; int num_pulses; PROF_BEGIN(coefficients_1); if (band_exponent[j] == BAND_EXPONENT_NONE) num_pulses = 0; else if (info->num_coeffs_for_band[j] == 1) num_pulses = 1; // just need the sign else { float base_pulses = band_pulses_raw[0][j]; num_pulses = (int) (base_pulses * 44100.0 / es->sample_rate); } { float sum = 1.0e-12f; for (i=0; i < info->num_subbands_for_band[j]; ++i) { float x = (float) subband_energy[s+i]; sum += x*x; } recip = 1.0f/sum; } PROF_END(coefficients_1); for (i=0; i < info->num_subbands_for_band[j]; ++i) { PROF_BEGIN(coefficients_2); float x = (float) subband_energy[s+i]; float data[32], median; int n = info->num_coeffs_for_subband[s+i]; // allocate the pulses per subband, based on squared energy int sub_pulses = (int) (num_pulses * x*x * recip+0.5); // check if we need to boost the pulses because lots of large coefficients if (band_exponent[j] >= -13 && n >= 8) { // find the median #ifdef DO_BUILD_SSE4x if (es->cpu.has_sse2) radaudio_sse2_fabs_coefficients(data, coeff+c, n); else #endif { for (k=0; k < n; ++k) data[k] = fabsf(coeff[c+k]); } median = find_median(es, data, n); // count how many coefficients are significantly above the median for (k=0; k < n; ++k) { if (data[k] > median * es->heur.large_boost_median_test[is_short_block]) sub_pulses += 2; else if (data[k] > median * es->heur.small_boost_median_test[is_short_block]) sub_pulses += 1; } } PROF_END(coefficients_2); /// now distribute the pulses to the coefficients { PROF_BEGIN(coefficients_3); rrbool no_pulses = false; if (sub_pulses == 0) no_pulses = true; if (band_exponent[j] == BAND_EXPONENT_NONE) no_pulses = true; if (!is_short_block) // if the subband is going to get 0 energy, then force all coefficients to 0 if (info->num_subbands_for_band[j] > 1 && bd->quantized_subbands[s+i]==0 && has_nonempty_subbands[j]) no_pulses = true; if (no_pulses) { for (k=0; k < n; ++k) bd->quantized_coeff_encode[c+k] = 0; PROF_END(coefficients_3); } else if (info->num_coeffs_for_subband[s+i] == 1) { // subband has only one coefficient, so just need the sign bd->quantized_coeff_encode[c] = coeff[c] < 0 ? -1 : 1; PROF_END(coefficients_3); } else { PROF_END(coefficients_3); PROF_BEGIN(coefficients_normalize); float ncoeff[MAX_COEFF_PER_BAND], sum2=0; for (k=0; k < n; ++k) { sum2 += coeff[c+k]*coeff[c+k]; } rrAssert(sum2 != 0); float sum=0; { float scale = 1.0f / (sqrtf(sum2) + 1.e-24f); for (k=0; k < n; ++k) { float v = scale * coeff[c+k]; ncoeff[k] = v; sum += fabsf(v); } } rrAssert(sum != 0); // if coefficients were all 0, then subband should have been all 0, so no_pulses should have been true S16 best_coeff[MAX_COEFF_PER_BAND]; memset(best_coeff, 0, n*sizeof(best_coeff[0])); // this rather arbitrary computation must be used as is, // attempts to alter it to non-integer values always lead // to significant quality loss float t0,t1; float pc = (float) sub_pulses; t0 = (F32) (int) (pc/1.30f + 0.5); t1 = (F32) (int) (pc*1.125f + 0.5); //if (t1 > pc+n) t1 = (F32) pc+n; if (t1 > pc+8) t1 = pc+8; if (t0 < pc-8) t0 = pc-8; if (t1 > pc+n/2) t1 = pc+n/2; if (t0 < pc-n) t0 = pc-n; // the squared error if you use 0 for all coefficients is 1, since squared error vs. 0 is same as squared sum float error2_for_zeroes = 1.0f; // but if we transmit all 0s, the decoder replaces with noise, i.e. with random coefficients. // the RMSE error due to using random coefficients will be worse, since when the signs mismatch, // the error will be even larger, up to (say) 2x larger, and squared that's 4x larger, and signs // mismatch half the time. // // BUT we know the perceptual error from randomness is LESS than using all 0s. so using any error // estimate that is LARGER than the error for all zeroes will be perceptually wrong. so we tune // this value. maybe should be per-band: float min_error2 = error2_for_zeroes / 2; // best value experimentlaly PROF_END(coefficients_normalize); PROF_BEGIN(coefficients_4); float best_so_far = min_error2; // optimized error calculation avoids a sqrt() by computing this derived, monotonically consistent error instead: // see compute_quantized_coefficients() for naive version best_so_far -= 2; best_so_far *= best_so_far; best_so_far /= 4; int num_steps = (int) (t1-t0) + 1; float recip_sum = 1.0f / sum; float quantizer = t0 * recip_sum; float step_quantizer = recip_sum; if (n == 8) { PROF_END(coefficients_4); PROF_BEGIN(coefficients_n8); compute_best_quantized_coeff8_loop(es, best_coeff, best_so_far, ncoeff, quantizer, step_quantizer, num_steps); PROF_END(coefficients_n8); } else if (n == 16) { PROF_END(coefficients_4); PROF_BEGIN(coefficients_n16); compute_best_quantized_coeff16_loop(es, best_coeff, best_so_far, ncoeff, quantizer, step_quantizer, num_steps); PROF_END(coefficients_n16); } else { for (int q=0; q < num_steps; ++q, quantizer += step_quantizer) { S16 quantized_attempt[MAX_COEFF_PER_SUBBAND]; F32 cross_sum=0; F32 unnorm_sum2=0; for (int z=0; z < n; ++z) { float unquantized = (ncoeff[z] * quantizer); int quantized = (int) floor(unquantized+0.5f); quantized_attempt[z] = (S16) quantized; cross_sum += quantized * ncoeff[z]; unnorm_sum2 += (F32) quantized * quantized; } // this optimized computation resembles normalizing and computing a dot product float err2 = (cross_sum*cross_sum) / unnorm_sum2; if (err2 > best_so_far) { best_so_far = err2; memcpy(best_coeff, quantized_attempt, n * sizeof(best_coeff[0])); } } PROF_END(coefficients_4); } PROF_BEGIN(coefficients_5); // check if coefficients are too large int largest = 0; for (k=0; k < n; ++k) largest = RR_MAX(largest, abs(best_coeff[k])); if (largest > 112) { ++es->stats.block_events[E_coefficients_renormalize]; float flargest = 0.00001f; for (k=0; k < n; ++k) if (fabsf(ncoeff[k]) > flargest) flargest = fabsf(ncoeff[k]); int target = 112; float scale = target / flargest; // scale * flargest = target for (k=0; k < n; ++k) { best_coeff[k] = (S16) floorf(ncoeff[k] * scale + 0.5f); } } for (k=0; k < n; ++k) bd->quantized_coeff_encode[c+k] = (S16) best_coeff[k]; PROF_END(coefficients_5); } } c += info->num_coeffs_for_subband[s+i]; } s += info->num_subbands_for_band[j]; } PROF_END(coefficients_sum); } static int transient_analysis(F32 *in, int N, int stride, F32 threshold); static int transient_analysis_wrapper(F32 *in, int offset, int stride, F32 threshold) { int result = transient_analysis(in+offset*stride, 1024, stride, threshold); return result; } static int stereo_count_effective_channels(float *stereo_input, size_t input_len, size_t offset, int num_samples, int mono_detection_aggressiveness) { float threshold = ((F32) mono_detection_aggressiveness+0.5f) / 32768.0f; // clamp range (out of range values are treated 0, so always match mono size_t begin = (offset < (size_t)num_samples) ? 0 : offset-num_samples; size_t end = (offset+num_samples > input_len) ? input_len : offset+num_samples; for (size_t i=begin; i < end; ++i) // if signals deviate enough, it's stereo if (fabsf(stereo_input[i*2+0] - stereo_input[i*2+1]) > threshold) return 2; // otherwise it's mono return 1; } RADDEFFUNC int radaudio_determine_preferred_next_block_length(radaudio_encoder *rae, radaudio_blocktype firsttype, F32 *input, size_t input_len, size_t offset) { int cur_short; radaudio_encoder_state *es = (radaudio_encoder_state *) rae; if (es->block_number == 0) cur_short = (firsttype == RADAUDIO_BLOCKTYPE_short); else cur_short = es->current_block_short; int num_samples = cur_short ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN; if (offset + num_samples + RADAUDIO_LONG_BLOCK_LEN >= input_len) return RADAUDIO_BLOCKTYPE_short; else { int stride = es->num_channels; if (transient_analysis_wrapper(input, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD)) return RADAUDIO_BLOCKTYPE_short; if (es->num_channels == 2) { if (transient_analysis_wrapper(input+1, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD)) return RADAUDIO_BLOCKTYPE_short; } } return RADAUDIO_BLOCKTYPE_long; } RADDEFFUNC radaudio_blocktype radaudio_determine_preferred_first_block_length(radaudio_encoder *rae, F32 *input, size_t input_len) { radaudio_encoder_state *es = (radaudio_encoder_state *) rae; size_t offset=0; if (offset + RADAUDIO_LONG_BLOCK_LEN >= input_len) return RADAUDIO_BLOCKTYPE_short; else { if (transient_analysis_wrapper(input, (int) offset, es->num_channels, TRANSIENT_THRESHOLD)) return RADAUDIO_BLOCKTYPE_short; if (es->num_channels == 2) { if (transient_analysis_wrapper(input+1, (int) offset, es->num_channels, TRANSIENT_THRESHOLD)) return RADAUDIO_BLOCKTYPE_short; } } return RADAUDIO_BLOCKTYPE_long; } RADDEFFUNC int radaudio_encode_block(radaudio_encoder *rae, float *input, size_t input_len, // in samples (stereo pairs count as one) size_t *poffset , // in samples (stereo pairs count as one) U8 *encode_buffer, // recommend MAX_ENCODED_BLOCK_SIZE size_t encode_buffer_size) { radaudio_encode_info info = { 0 }; return radaudio_encode_block_ext(rae, input, input_len, poffset, encode_buffer, encode_buffer_size, &info); } static int compute_rle_length(radaudio_encoder_state *es, radaudio_block_data bd[2], int out_channels, int start, int *numsym) { radaudio_rate_info *bi = es->info[0]; int numbits=0; int k=0; int syms=0; for (int c=0; c < out_channels; ++c) { k = 0; for (int i=start; i < bi->num_quantized_coeffs; ++i) { int bits = bd[c].quantized_coeff_encode[i]; if (bits == 0) ++k; else { // new zero-run-length encoding int zr = k; while (zr >= MAX_RUNLEN) { numbits += rada_zero_runlength_huff.encode[MAX_RUNLEN].length; zr -= MAX_RUNLEN; ++syms; } if (zr >= COARSE_RUNLEN_THRESHOLD) { int coarse = zr & ~3; numbits += rada_zero_runlength_huff.encode[coarse].length; ++syms; numbits += 2; } else { numbits += rada_zero_runlength_huff.encode[zr].length; ++syms; } k = 0; } } if (c == 0 && out_channels == 2) { numbits += rada_zero_runlength_huff.encode[END_OF_ZERORUN].length; ++syms; } } if (numsym != NULL) *numsym = syms; return numbits; } // returns number of bytes encoded; // @TODO: reverse the below, 0 should be "done" and -1 should be "too small" // returns 0 if output buffer isn't big enough; // returns -1 if stream is done // returns -2 on internal error int radaudio_encode_block_ext(radaudio_encoder *rae, F32 *input, size_t input_len, size_t *poffset, U8 *encode_buffer, size_t encode_buffer_max, radaudio_encode_info *info) { int force_first_block = -1; int force_next_block = -1; radaudio_encoder_state *es = (radaudio_encoder_state *) rae; size_t offset = *poffset; // 'offset' is the middle of the region we're going to encode, and also // the offset of the samples we will fully encode when we finish encoding this block PROF_BEGIN(encode_all); radaudio_rate_info *bi; RAD_ALIGN(radaudio_block_data, bd[2], 16); int num_samples; if (info->force_first_blocktype == RADAUDIO_BLOCKTYPE_short) force_first_block = 1; else if (info->force_first_blocktype == RADAUDIO_BLOCKTYPE_long) force_first_block = 0; if (info->force_next_blocktype == RADAUDIO_BLOCKTYPE_short) force_next_block = 1; else if (info->force_next_blocktype == RADAUDIO_BLOCKTYPE_long) force_next_block = 0; // do transient analysis on the current block if (es->block_number == 0) { if (force_first_block >= 0) es->current_block_short = force_first_block; else { if (offset + RADAUDIO_LONG_BLOCK_LEN >= input_len) es->current_block_short = 1; else { PROF_BEGIN(transient_analysis); es->current_block_short = (U8) transient_analysis_wrapper(input, (int) offset, es->num_channels, TRANSIENT_THRESHOLD); if (es->num_channels == 2) { if (transient_analysis_wrapper(input+1, (int) offset, es->num_channels, TRANSIENT_THRESHOLD)) es->current_block_short = 1; } PROF_END(transient_analysis); } } } num_samples = es->current_block_short ? RADAUDIO_SHORT_BLOCK_LEN : RADAUDIO_LONG_BLOCK_LEN; if (force_next_block >= 0) es->next_block_short = force_next_block; else { // we need to lookahead to the NEXT block to know how to window the right side of our block // // so our current block is [offset-num_samples, off+num_samples) // and the next block will be centered at offset+num_samples, and // if it's LONG_BLOCK it will be [offset+num_samples-LONG_BLOCK, offset+num_samples+LONG_BLOCK) if (offset + num_samples + RADAUDIO_LONG_BLOCK_LEN >= input_len) es->next_block_short = 1; else { int stride = es->num_channels; es->next_block_short = (U8) transient_analysis_wrapper(input, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD); if (es->num_channels == 2) { if (transient_analysis_wrapper(input+1, (int) offset + num_samples, stride, TRANSIENT_THRESHOLD)) es->next_block_short = 1; } } } // if the leftmost sample we would encode is off the end of the input, we're done if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN) { PROF_END(encode_all); return RADAUDIOENC_AT_EOF; } // always end with a short block, to avoid weird overlap rules. actually, two short blocks if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN*2 + RADAUDIO_LONG_BLOCK_LEN) es->next_block_short = 1; bi = es->info[es->current_block_short]; PROF_BEGIN(check_mono); int num_channels = es->num_channels; int out_channels = num_channels == 1 ? 1 : stereo_count_effective_channels(input, input_len, offset, num_samples, 12); if (out_channels == 1 && es->num_channels == 2) ++es->stats.block_events[E_stereo_as_mono]; PROF_END(check_mono); F32 subband_energy[2][256]; rrbool do_mid_side = false; rrbool do_mid_side_bands = false; U32 mid_side_bands=0; // set bit (1<current_block_short, input, (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, es->num_channels==2, info, 0); encode_channel(es, &bd[0], coeff, 0, subband_energy[0], 0, NULL); } else { FFT_ALIGN(F32, coeff1[MAX_COEFFS]); FFT_ALIGN(F32, coeff2[MAX_COEFFS]); mdct_block(es, coeff1, es->current_block_short, input , (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, false, info, 0); mdct_block(es, coeff2, es->current_block_short, input+1, (int) offset, (int) input_len, es->prev_block_short, es->next_block_short, es->num_channels, false, info, 1); if (es->allow_mid_side) { // mid-side encoding intentionally encodes the side channel with less accuracy, // so we should only use it if the side is significantly quieter than the mid FFT_ALIGN(F32, coeff_mid [MAX_COEFFS]); FFT_ALIGN(F32, coeff_side[MAX_COEFFS]); for (int i=0; i < bi->num_coeffs; ++i) { coeff_mid [i] = (coeff1[i] + coeff2[i]) * 0.5f; coeff_side[i] = (coeff1[i] - coeff2[i]) * 1.0f; } RAD_ALIGN(F32, band_energy_dummy [MAX_BANDS], 16); RAD_ALIGN(int, band_exponent_mid [MAX_BANDS], 16); RAD_ALIGN(int, band_exponent_side[MAX_BANDS], 16); compute_band_energies(bi, coeff_mid , band_exponent_mid , band_energy_dummy); compute_band_energies(bi, coeff_side, band_exponent_side, band_energy_dummy); int tiny=0, smaller=0, much_smaller=0, larger=0; for (int j=0; j < bi->num_bands; ++j) { if (band_exponent_side[j] <= es->heur.mid_side_tiny) ++tiny; else if (band_exponent_side[j] <= band_exponent_mid[j]+es->heur.mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= es->heur.mid_side_threshold)) ++much_smaller; else if (band_exponent_side[j] < band_exponent_mid[j]) ++smaller; else ++larger; } if (larger == 0 && smaller < es->heur.mid_side_max_bad_bands) { // most are tiny or much_smaller do_mid_side = true; encode_channel(es, &bd[0], coeff_mid , 0, subband_energy[0], 0, NULL); encode_channel(es, &bd[1], coeff_side, 1, subband_energy[1], 0xffffffff, &bd[0]); } else if (!es->current_block_short) { // consider doing it per-band int do_band[MAX_BANDS] = { 0 }; for (int j=0; j < bi->num_bands; ++j) { if (band_exponent_side[j] < es->heur.side_exp_end) // mid_side_tiny) do_band[j] = 2; else if (band_exponent_side[j] < band_exponent_mid[j]-es->heur.side_exp_threshold) // + mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= mid_side_threshold)) do_band[j] = 2; else if (band_exponent_side[j] <= es->heur.side_exp_end) // mid_side_tiny) do_band[j] = 1; else if (band_exponent_side[j] <= band_exponent_mid[j]-es->heur.side_exp_threshold) // + mid_side_offset || (band_exponent_side[j] < band_exponent_mid[j] && band_exponent_mid[j] <= mid_side_threshold)) do_band[j] = 1; } do_band[bi->num_bands] = do_band[bi->num_bands+1] = true; int count = 0; U32 actual_bands = 0; int k=0; // we signal in groups of 3 bands, so have to find groups of 3 bands where it's ok to mid-side for (int j=0; j < bi->num_bands; j += 3, ++k) { if (do_band[j+0]+do_band[j+1]+do_band[j+2] >= 5) { // if at least two of the three get reduced, and the third one is close ++count; mid_side_bands |= (1 << k); actual_bands |= (7 << j); } else { do_band[j+0] = do_band[j+1] = do_band[j+2] = 0; } } if (count >= 1) { do_mid_side_bands = true; // create composite coefficient arrays that have a mix of either one or the other in each band for (int j=0; j < bi->num_bands; ++j) { if (do_band[j]) { int start = bi->first_coeff_for_band[j]; int num_coeffs_for_band = bi->num_coeffs_for_band[j]; memcpy(coeff1 + start, coeff_mid + start, sizeof(coeff_mid[0]) * num_coeffs_for_band); memcpy(coeff2 + start, coeff_side + start, sizeof(coeff_mid[0]) * num_coeffs_for_band); } } encode_channel(es, &bd[0], coeff1, 0, subband_energy[0], 0, NULL); encode_channel(es, &bd[1], coeff2, 1, subband_energy[1], actual_bands, &bd[0]); } } } if (!do_mid_side && !do_mid_side_bands) { encode_channel(es, &bd[0], coeff1, 0, subband_energy[0], 0, NULL); encode_channel(es, &bd[1], coeff2, 1, subband_energy[1], 0, NULL); } } int c; U8 band_exponents[32*2]; int num_band_exponents=0; rrbool stereo_predict_exponent = false; // band exponents for (c=0; c < out_channels; ++c) { int lastn = PREDICT_FIRST_BAND_EXP; for (int j=0; j < bi->num_bands; ++j) { int n = bd[c].band_exponent[j]; rrAssert(n == BAND_EXPONENT_NONE || (n >= -32 && n < 32)); if (BAND_EXPONENT_NONE == -17) rrAssert(n >= -17 && n < 32); band_exponents[num_band_exponents++] = (U8) (n - lastn); lastn = n; } } if (out_channels == 2) { // try stereo predicting exponents int right_cost_nostereo=0, right_cost_stereo=0; for (int j=0; j < bi->num_bands; ++j) { right_cost_nostereo += rada_band_exponent_correct_huff.encode[band_exponents[bi->num_bands + j]].length; int predict = bd[1].band_exponent[j] - bd[0].band_exponent[j]; int cost = rada_band_exponent_stereo_correct_huff.encode[(U8) predict].length; if (cost == 0) right_cost_stereo = 256*bi->num_bands; // if invalid, can't use this path else right_cost_stereo += cost; } if (right_cost_stereo < right_cost_nostereo) { stereo_predict_exponent = true; num_band_exponents >>= 1; for (int i=0; i < bi->num_bands; ++i) band_exponents[32+i] = (U8) (bd[1].band_exponent[i] - bd[0].band_exponent[i]); } } U8 subband_value[128*2]; U8 subband_correction[32*2]; S8 big_coefficients[1024*2]; U8 nonzero_coefficients[1024*2]; U8 runlength_data[1025*2]; U8 runlength_bottom[1024*2]; U8 nonzero_flagbits[1024*2/8*16]; U8 subband_stereo_correct[128]; int num_subband_values0=0; int num_subband_corrections=0; int num_subband_stereo_correct=0; int num_big_coefficients=0; int num_nonzero_coefficients=0; int num_runlength_data=0; int num_runlength_bottom=0; rrbool disable_subband_predict = false; // compute subband predictions for (c=0; c < out_channels; ++c) { for (int j=0; j < bi->num_bands; ++j) { // skip subband data if subband occupies whole band if (bi->num_subbands_for_band[j] == 1) continue; int start = bi->first_subband_for_band[j]; int predicted_sum = es->subband_predicted_sum[j]; int actual_sum = 0; for (int i=0; i < bi->num_subbands_for_band[j]; ++i) { int v = bd[c].quantized_subbands[start+i]; actual_sum += v; } int predict = (actual_sum - predicted_sum); if (predict < -128 || predict > 127) { disable_subband_predict = true; } else if (rada_subband_value_last_in_band_correct_huff.encode[(U8) predict].length == 0) { disable_subband_predict = true; } } } es->stats.block_events[E_subband_nopredict] += disable_subband_predict ? 1 : 0; rrbool stereo_predict_subbands = false; if (!es->current_block_short) { if (out_channels == 2) { // try stereo predicting subbands int right_cost_nostereo=0, right_cost_stereo=0; for (int j=0; j < bi->num_bands; ++j) { if (bi->num_subbands_for_band[j] == 1) continue; if (bd[1].band_exponent[j] == BAND_EXPONENT_NONE && SUBBANDS_SKIP_EMPTY_BANDS) continue; int start = bi->first_subband_for_band[j]; int num = bi->num_subbands_for_band[j]; int bias = es->subband_bias[j]; int actual_sum=0; for (int i=0; i < num; ++i) { actual_sum += bd[1].quantized_subbands[start+i]; if (i == num-1 && !disable_subband_predict) right_cost_nostereo += rada_subband_value_last_in_band_correct_huff.encode[(U8) (actual_sum - es->subband_predicted_sum[j])].length; else right_cost_nostereo += rada_subband_value_huff.encode[(bias + bd[1].quantized_subbands[start+i])&63].length; int correct = bd[1].quantized_subbands[start+i] - bd[0].quantized_subbands[start+i]; int cost = rada_subband_value_stereo_correct_huff.encode[(U8) correct].length; if (cost == 0) right_cost_stereo = 256*bi->num_subbands; // if invalid, can't use this path else right_cost_stereo += cost; } } if (right_cost_stereo < right_cost_nostereo) { stereo_predict_subbands = true; } } // compute subband value encodings for (c=0; c < out_channels; ++c) { for (int j=0; j < bi->num_bands; ++j) { // skip subband data if subband occupies whole band, should be first 12 if (bi->num_subbands_for_band[j] == 1) continue; int start = bi->first_subband_for_band[j]; int num_to_write = bi->num_subbands_for_band[j]; if (bd[c].band_exponent[j] == BAND_EXPONENT_NONE && SUBBANDS_SKIP_EMPTY_BANDS) continue; if (c == 1 && stereo_predict_subbands) { for (int i = 0; i < num_to_write; ++i) { int correct = bd[1].quantized_subbands[start+i] - bd[0].quantized_subbands[start+i]; subband_stereo_correct[num_subband_stereo_correct++] = (U8) correct; } } else { int predicted_sum = es->subband_predicted_sum[j]; int bias = es->subband_bias[j]; int actual_sum = 0; if (!disable_subband_predict) --num_to_write; for (int i=0; i < num_to_write; ++i) { int v = bd[c].quantized_subbands[start+i]; actual_sum += v; int code = (v + bias)&63; if (code < 0 || code > LARGEST_BIASED_SUBBAND) fprintf(stderr, "encoded subband value %d (value %d) was outside of range 0..%d\n", code, v, LARGEST_BIASED_SUBBAND); subband_value[num_subband_values0++] = (U8) code; } if (!disable_subband_predict) { actual_sum += bd[c].quantized_subbands[start+num_to_write]; U8 correct = (U8) (actual_sum - predicted_sum); subband_correction[num_subband_corrections++] = correct; } } } } } int nz_mode = 0; // for short blocks, use 0 value to avoid sending extra header flags if (!es->current_block_short) { // encode the nonzero coefficient positions in bitarray for (c=0; c < out_channels; ++c) { int p=c*(1024/8); for (int j=0; j < 1024; j += 8) { U8 flags=0; for (int k=0; k < 8; ++k) { int bits = bd[c].quantized_coeff_encode[j+k]; if (bits != 0) { flags |= (1 << k); } } nonzero_flagbits[p++] = flags; } } int best_no_rle=0, best_num=0; int best_mode=0, best_cost=0x7fffffff; // for each mode, measure the cost for (int m=0; m < 4; ++m) { radaudio_nonzero_blockmode_descriptor *nz_desc = &es->nz_desc[m]; int bits= (m==0 ? 0 : 6); // anything other than mode 0 forces an extra header byte, though we might already be paying it for other reasons so approximate as 3/4ths of the header byte int nc = nz_desc->num_8byte_chunks; int num=0, numsym=0; int base; int bitc[8] = { 0 }; //bits += compute_rle_length(es, bd, out_channels, nc*64); bits = compute_rle_length(es, bd, out_channels, nc*64, &numsym); base = bits; for (c=0; c < out_channels; ++c) { int p = (1024/8)*c; for (int i=0; i < nc; ++i) { radaudio_huffman *h = rada_nonzero_bitflags_huff[nz_desc->huffman_table_for_chunk[i]]; num += 8; int pre = bits; if (nz_desc->invert_chunk[i]) for (int j=0; j < 8; ++j) bits += h->encode[255^nonzero_flagbits[p++]].length; else for (int j=0; j < 8; ++j) bits += h->encode[ nonzero_flagbits[p++]].length; if (m == 0) bitc[nz_desc->huffman_table_for_chunk[i]] += (bits - pre); } } if (bits < best_cost) { best_cost = bits; best_mode = m; } if (m == 0) { best_no_rle = bits - base; best_num = num; } } nz_mode = best_mode; } es->stats.block_events[E_nzmode0 + nz_mode] += 1; // encode non-zero coefficient locations for (c=0; c < out_channels; ++c) { int i,j,k; if (!es->current_block_short && nz_mode != 3) { int count = es->nz_desc[nz_mode].num_8byte_chunks*64; // encode the first `count` coefficients as non-zero based on 1-bit flags to be compressed later for (j=0; j < count; j += 8) { U8 flags=0; for (k=0; k < 8; ++k) { int bits = bd[c].quantized_coeff_encode[j+k]; if (bits != 0) { flags |= (1 << k); if (abs(bits) > 7) { nonzero_coefficients[num_nonzero_coefficients++] = 0; big_coefficients [num_big_coefficients++ ] = (S8) bits; } else { nonzero_coefficients[num_nonzero_coefficients++] = (U8) (bits+8); } } } } } // encode the remaining non-zero coefficient locations by run-length compressing the intervening 0s k=0; int skip = es->nz_desc[nz_mode].num_8byte_chunks * 64; for (j=0; j < bi->num_bands; ++j) { int start = bi->first_coeff_for_band[j]; int count = bi->num_coeffs_for_band[j]; rrAssert(bi->num_coeffs <= 1024); for (i=0; i < count; ++i) { // skip the coefficients we encoded in the previous loop above if (!es->current_block_short && nz_mode != 3 && start+i < skip) continue; int bits = bd[c].quantized_coeff_encode[start+i]; if (bits == 0) ++k; else { // new zero-run-length encoding int zr = k; while (zr >= MAX_RUNLEN) { runlength_data[num_runlength_data++] = (U8) MAX_RUNLEN; zr -= MAX_RUNLEN; } if (zr >= COARSE_RUNLEN_THRESHOLD) { int coarse = zr & ~3; runlength_data[num_runlength_data++] = (U8) coarse; zr -= coarse; runlength_bottom[num_runlength_bottom++] = (U8) zr; } else runlength_data[num_runlength_data++] = (U8) zr; if (abs(bits) > 7) { nonzero_coefficients[num_nonzero_coefficients++] = 0; big_coefficients [num_big_coefficients++ ] = (S8) bits; } else { nonzero_coefficients[num_nonzero_coefficients++] = (U8) (bits+8); } k = 0; } } } // end of channel's data if (c != out_channels-1) runlength_data[num_runlength_data++] = (U8) END_OF_ZERORUN; // end of run marker // don't bother outputting the terminating end of zerorun marker, we can infer that from length we have to use with OodleData huffman } U8 coeff_pairs[1024/2 * 2]; if ((num_nonzero_coefficients & 1) != 0) nonzero_coefficients[num_nonzero_coefficients++] = 7; // cheapest symbol to encode int num_coeff_pairs = (num_nonzero_coefficients+1)/2; for (int i=0; i < num_coeff_pairs; ++i) coeff_pairs[i] = nonzero_coefficients[i*2+0] | (nonzero_coefficients[i*2+1]<<4); int nz_selector = es->current_block_short ? 4 : nz_mode; huff3_encoder he; encode_vbstream_init(&he.stream[0], &es->buffer[0] , 1000); encode_vbstream_init(&he.stream[1], &es->buffer[0]+1024*1, 1000); encode_vbstream_init(&he.stream[2], &es->buffer[0]+1024*2, 3000); he.total_bits = 0; size_t prev_bitcount; prev_bitcount = he.total_bits; // band exponents PROF_BEGIN(huffman); encode_huff_array(&he, &rada_band_exponent_correct_huff, band_exponents, num_band_exponents, __FILE__, __LINE__, &error); if (stereo_predict_exponent) encode_huff_array(&he, &rada_band_exponent_stereo_correct_huff, &band_exponents[32], num_band_exponents, __FILE__, __LINE__, &error); PROF_END(huffman); es->stats.bit_allocation[S_band_exponent] += (he.total_bits - prev_bitcount); // band mantissas -- pack them directly into varbits-array decoder U8 m_sizes[64]; U32 m_values[64]; int num_mantissas=0; prev_bitcount = he.total_bits; for (c=0; c < out_channels; ++c) { for (int j=0; j < bi->num_bands; ++j) { m_values[num_mantissas] = bd[c].band_mantissa[j]; m_sizes [num_mantissas] = (U8) bd[c].band_mantissa_bitcount[j]; ++num_mantissas; } } for (int j=0; j < num_mantissas; ++j) encode_vbstream_bits(&he.stream[2], m_values[j], m_sizes[j]); encode_recompute_stats(&he); es->stats.bit_allocation[S_band_mantissa] += (he.total_bits - prev_bitcount); PROF_BEGIN(huffman); if (!es->current_block_short) { // subband values prev_bitcount = he.total_bits; encode_huff_array(&he, &rada_subband_value_huff , subband_value , num_subband_values0 , __FILE__, __LINE__, &error); if (!disable_subband_predict) encode_huff_array(&he, &rada_subband_value_last_in_band_correct_huff, subband_correction , num_subband_corrections , __FILE__, __LINE__, &error); if (stereo_predict_subbands) encode_huff_array(&he, &rada_subband_value_stereo_correct_huff , subband_stereo_correct, num_subband_stereo_correct, __FILE__, __LINE__, &error); es->stats.bit_allocation[S_subband] += (he.total_bits - prev_bitcount); } PROF_END(huffman); if (!es->current_block_short && nz_mode != 3) { U8 nonzero_flag_huff[1024*2/8*16]; radaudio_nonzero_blockmode_descriptor *nz_desc = &es->nz_desc[nz_mode]; // encode the non-zero flag bits // reorder the bits into an array where each huffman encoding is contiguous // also invert any chunks needing inversion int p=0, s=out_channels-1; for (c=0; c < out_channels; ++c) { p = (1024/8/8) * c; for (int i=0; i < nz_desc->num_8byte_chunks; ++i) { int off = nz_desc->source_pos[s+c][i]; U64 xor = (U64)0 - nz_desc->invert_chunk[i]; // ~0 if invert_chunk, else 0 (assuming invert_chunk is either 0 or 1) RR_PUT64_NATIVE(&nonzero_flag_huff[8*off], xor ^ RR_GET64_NATIVE(&nonzero_flagbits[8*p])); p++; } } // now output the huffman arrays prev_bitcount = he.total_bits; p=0; PROF_BEGIN(huffman); for (int i=0; i < NUM_NZ_HUFF; ++i) { int nc = nz_desc->num_chunks_per_huff[i]; if (nc != 0) { encode_huff_array(&he, rada_nonzero_bitflags_huff[i], &nonzero_flag_huff[p], nc*8*out_channels, __FILE__, __LINE__, &error); p += nc * 8 * out_channels; } } PROF_END(huffman); es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount); } PROF_BEGIN(huffman); prev_bitcount = he.total_bits; encode_huff_array(&he, &rada_zero_runlength_huff, runlength_data, num_runlength_data, __FILE__, __LINE__, &error); es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount); // coefficients -- need to have decoded the runlength data to know how many coefficients prev_bitcount = he.total_bits; int tp = es->nz_correlated_huffman_selectors[HS_COEFF_PAIR][nz_selector]; encode_huff_array(&he, rada_nonzero_coefficient_pair_huff[tp], coeff_pairs, num_coeff_pairs, __FILE__, __LINE__, &error); es->stats.bit_allocation[S_coeff_value] += (he.total_bits - prev_bitcount); PROF_END(huffman); PROF_BEGIN(varbits); // encode bottom runlength bits to vbstream 2 prev_bitcount = he.total_bits; for (int j=0; j < num_runlength_bottom; ++j) encode_vbstream_bits(&he.stream[2], runlength_bottom[j], 2); es->stats.bit_allocation[S_coeff_location] += (he.total_bits - prev_bitcount); PROF_END(varbits); PROF_BEGIN(huffman); // huffman encode big coefficients prev_bitcount = he.total_bits; int tb = es->nz_correlated_huffman_selectors[HS_COEFF_BIG][nz_selector]; encode_huff_array(&he, rada_nonzero_coefficient_big_huff[tb], (U8*) big_coefficients, num_big_coefficients, __FILE__, __LINE__, &error); es->stats.bit_allocation[S_coeff_value_large] += (he.total_bits - prev_bitcount); PROF_END(huffman); prev_bitcount = he.total_bits; for (int i=0; i < 3; ++i) encode_vbstream_flush(&he.stream[i]); encode_recompute_stats(&he); es->stats.bit_allocation[S_padding] += (he.total_bits - prev_bitcount); // assemble the final block radaudio_block_header_unpacked bh = { 0 }; // advance to the new center, which means advance by half of the current and half of the next int completed_samples = es->info[es->current_block_short]->num_coeffs/2 + es->info[es->next_block_short ]->num_coeffs/2; *poffset = offset + completed_samples; if (error) return RADAUDIOENC_INTERNAL_ERROR; // this is the condition for being done: // if (offset >= input_len + RADAUDIO_SHORT_BLOCK_LEN) // so the last block is the one that puts us in that state // Note this is the condition because we already update offset to point to the middle of the next block; // the next block minus RADAUDIO_SHORT_BLOCK_LEN is the number of overlapped samples that block will make if it or this block is short, so every sample before that is complete. // that's not quite right if the this block and the next block are long, but we never make long blocks at end of file, so // even if one long block ends right on the input, the next block will be short bh.final_block = (*poffset >= input_len + RADAUDIO_SHORT_BLOCK_LEN); int discard_samples = 0; if (bh.final_block) discard_samples = (int) (*poffset-RADAUDIO_SHORT_BLOCK_LEN - input_len); // number of fully decoded samples we didn't use bh.this_block_short = es->current_block_short; bh.next_block_short = es->next_block_short; bh.num_channels_encoded = out_channels; bh.final_samples_discard = discard_samples; bh.vbstream0_length = he.stream[0].length; bh.num_runlength_array = num_runlength_data; bh.nonzero_bitarray_mode = nz_mode; bh.disable_final_subband_predict = disable_subband_predict; bh.predict_stereo_subband = stereo_predict_subbands; bh.predict_stereo_exponent = stereo_predict_exponent; bh.mid_side_encoded = do_mid_side; bh.mid_side_bands = do_mid_side_bands; size_t byte_offset = 0; if (encode_buffer_max < 10) return RADAUDIOENC_INSUFFICIENT_BUFFER; size_t midside_len = do_mid_side_bands ? (24/MACRO_BAND_SIZE+7)/8 : 0; size_t non_header_length = he.stream[0].length + he.stream[1].length + he.stream[2].length + midside_len; bh.block_bytes = (U32) non_header_length; int header_size = radaudio_encode_block_header(encode_buffer, &es->biases, &bh); rrAssert(header_size >= 0); if (header_size < 0) return RADAUDIOENC_INTERNAL_ERROR; byte_offset = header_size; es->stats.bit_allocation[S_header] += byte_offset * 8; rrAssert(encode_buffer[0] != 0); #ifdef _DEBUG size_t total_size = byte_offset + non_header_length; #endif if (byte_offset + midside_len > encode_buffer_max) return RADAUDIOENC_INSUFFICIENT_BUFFER; for (int i=0; i < (int)midside_len; ++i) encode_buffer[byte_offset++] = (U8) ((mid_side_bands >> (8*i)) & 255); if (byte_offset + he.stream[0].length > encode_buffer_max) return RADAUDIOENC_INSUFFICIENT_BUFFER; for (int i=0; i < he.stream[0].length; ++i) encode_buffer[byte_offset++] = he.stream[0].bitstream[i]; if (byte_offset + he.stream[2].length > encode_buffer_max) return RADAUDIOENC_INSUFFICIENT_BUFFER; for (int i=0; i < he.stream[2].length; ++i) encode_buffer[byte_offset++] = he.stream[2].bitstream[i]; if (byte_offset + he.stream[1].length > encode_buffer_max) return RADAUDIOENC_INSUFFICIENT_BUFFER; for (int i=he.stream[1].length-1; i >= 0; --i) encode_buffer[byte_offset++] = he.stream[1].bitstream[i]; if (he.stream[0].error || he.stream[1].error | he.stream[2].error) return RADAUDIOENC_INTERNAL_ERROR; rrAssert(encode_buffer[0] != 0); es->prev_block_short = es->current_block_short; es->current_block_short = es->next_block_short; es->samples_fully_coded = (int) *poffset; ++es->block_number; es->lastblock_vbstream0_length = bh.vbstream0_length; es->lastblock_num_runlength_array = bh.num_runlength_array; es->lastblock_block_bytes = bh.block_bytes; rrAssert(byte_offset == total_size); PROF_END(encode_all); return (int) byte_offset; } ///////////////////////////////////////////////////////////////////////////// // // Transient analysis // #define TA_MAX(a,b) ((a) > (b) ? (a) : (b)) int transient_analysis(float* input, int N, int stride, float transient_threshold) { RAD_ALIGN(F32, w[512], 16); rrAssert(N >= 40 && N <= 1024 && N%2 == 0); int N2 = N/2; // IIR highpass, combined with 2:1 decimation float x1 = 0, x2 = 0; for (int i = 0; i < N2; i++) { float x0 = *input + x1 - 0.5f * x2; float ya = 0.625f * (x0 + x2) - 1.25f * x1; x2 = x1; x1 = x0; input += stride; x0 = *input + x1 - 0.5f * x2; float yb = 0.625f * (x0 + x2) - 1.25f * x1; x2 = x1; x1 = x0; input += stride; w[i] = ya*ya + yb*yb; } // clear boundary samples for (int i = 0; i < 6; i++) { w[i] = 0; } // compute sum, do forward exponential moving average float sum_sq = 0; float ema = 0; for (int i = 6; i < N2; i++) { sum_sq += w[i]; const float k = 1.f / 16.f; ema = ema + k * (w[i] - ema); w[i] = ema; } // do backwards exponential moving average, find maximum ema = 0; float max_sq = 0; for (int i = N2-1; i >= 0; i--) { const float k = 1.f / 8.f; ema = ema + k * (w[i] - ema); w[i] = ema; max_sq = TA_MAX(ema, max_sq); } // set up for final pass const float eps = 1e-15f; float geomean = sqrtf(sum_sq * (max_sq * N2 * 0.5f)); float scale = N2 / (geomean + eps); // subsampled harmonic mean const float min_threshold = 1.5f / 64.f; float rcp_sum = 0; for (int i = 12; i < N2-5; i += 4) { rcp_sum += 1.f / TA_MAX((w[i] + eps) * scale, min_threshold); } const int num_summed = (N2 - 17 + 3) >> 2; return rcp_sum > (transient_threshold / 64.f) * num_summed; } // BAND_EXPONENT_BASE0/1 // initial guess 1.43 // This constant affects how many coefficient pulses are assigned per band by // weighting each band with this number raised to the computed band exponent // (e.g. if this number were 2.0, then it would weight by the band energy, and 1.0 weights evenly) static float band_exponent_base[4][2][10] = { // 1.1 instead of 1.2 has 1% better average ODG, but 8% higher standard deviation { { 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.18f, 1.14f, 1.10f, 1.08f, 0.96f }, { 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f }, }, { { 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.15f, 1.13f, 1.11f, 1.08f, 1.05f }, { 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f }, }, { { 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.20f, 1.16f, 1.08f, 1.00f, 0.98f }, { 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f }, }, { { 1.2f, 1.2f, 1.2f, 1.2f, 1.2f, 1.30f, 1.20f, 1.06f, 1.04f, 1.02f }, { 1.6f, 1.6f, 1.6f, 1.6f, 1.6f, 1.60f, 1.50f, 1.40f, 1.30f, 1.20f }, }, }; // BAND_COUNT_EXPONENT0/1 // This constant affects how many coefficient pulses are assigned per band by // weighting each band with the number of coefficients in the band raised to this power // (if this number is 1.0, then they're weighted evenly by # coefficients_sum) // initial guess ~0.85. // std.dev appears to be minimized closer to 1.0, depending on the mode static float band_count_exponent[4][2][10] = { { { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.96f, 0.98f, 0.98f, 0.96f, 0.95f }, { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f }, }, { { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.88f, 0.90f, 0.94f, 0.96f, 0.98f }, { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f }, }, { { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.98f, 0.98f, 0.98f, 0.98f, 0.98f }, { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f }, }, { { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.96f, 0.96f, 0.96f, 0.95f, 0.95f }, { 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f, 0.86f }, }, }; // weight to assign to lowest bands, lerps towards 1.0 at highest bands // QUALITY_WEIGHT_LOW0/1 static float quality_weight_low[4][2][10] = { // 0 1 2 3 4 5 6 7 8 9 { { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 7.00f, 7.00f, 5.50f, 1.75f, 1.50f }, { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f }, }, { // 6.5f { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 5.25f, 4.50f, 3.25f, 2.25f, 2.00f }, { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f }, }, { { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 5.50f, 4.25f, 3.50f, 3.50f, 3.50f }, { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f }, }, { { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 2.50f, 2.50f, 2.00f, 1.50f, 1.50f }, { 3.50f, 3.50f, 3.50f, 3.50f, 3.50f, 4.00f, 4.00f, 4.00f, 4.00f, 4.00f }, }, }; // SHORT_BLOCK_PULSES - initial guess 0.2 static float short_block_pulses[4][10] = { // 0 1 2 3 4 5 6 7 8 9 { 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.11f, 0.13f, 0.15f, 0.080f }, { 0.12f, 0.12f, 0.12f, 0.12f, 0.12f, 0.12f, 0.13f, 0.14f, 0.18f, 0.075f }, { 0.09f, 0.09f, 0.09f, 0.09f, 0.09f, 0.09f, 0.10f, 0.13f, 0.17f, 0.045f }, { 0.08f, 0.08f, 0.08f, 0.08f, 0.08f, 0.08f, 0.09f, 0.09f, 0.09f, 0.060f }, }; // EXTRA_THRESHOLD_BIG0/1 static float extra_threshold_big[4][2][10] = { { { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, }, { { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, }, { { 16.0,16.0,16.0,16.0,16.0, 16.0, 8.0, 6.0, 6.0, 6.0, }, { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, }, { { 16.0,16.0,16.0,16.0,16.0, 16.0,16.0,16.0,16.0,16.0, }, { 16.0,16.0,16.0,16.0,16.0, 12.0,16.0,16.0,16.0,16.0, }, }, }; // EXTRA_THRESHOLD_SMALL0/1 static float extra_threshold_small[4][2][10] = { { { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, { 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, }, { { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, { 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, }, { { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 4.0, 3.0, 1.5, 2.0, }, { 2.0, 2.0, 2.0, 2.0, 2.0, 6.0, 8.0, 8.0, 8.0, 8.0, }, }, { { 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, { 2.0, 2.0, 2.0, 2.0, 2.0, 8.0, 8.0, 8.0, 8.0, 8.0, }, }, }; #define MANTISSA_SCALE 1.00f // used while tuning, env=MANTISSA_SCALE static float band_mantissa_base[11][2][6] = { { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 6.00f,6.00f,6.00f,6.00f,6.00f,5.50f }, { 3.50f,3.50f,3.50f,3.50f,3.25f,3.00f }, }, { { 6.50f,6.50f,6.50f,6.50f,6.00f,5.50f }, { 4.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, }, { { 6.50f,6.50f,6.50f,6.50f,6.50f,5.50f }, { 4.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 3.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, }; static float band_mantissa_decay[11][2][6] = { { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.19f,0.18f,0.17f,0.16f,0.15f,0.14f }, { 0.12f,0.11f,0.10f,0.09f,0.09f,0.08f }, }, { { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.13f,0.12f,0.10f,0.11f,0.10f,0.09f }, }, { { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.13f,0.12f,0.10f,0.11f,0.10f,0.09f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, }; static float band_mantissa_base_improved_short_dc[11][2][6] = { { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.25f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 4.50f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 5.75f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 5.00f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, { { 6.00f,6.00f,6.00f,6.00f,6.00f,5.50f }, { 5.50f,3.50f,3.50f,3.50f,3.25f,3.00f }, }, { { 6.50f,6.50f,6.50f,6.50f,6.00f,5.50f }, { 6.00f,4.00f,4.00f,4.00f,3.75f,3.25f }, }, { { 7.00f,6.50f,6.50f,6.50f,6.50f,5.50f }, { 6.50f,4.00f,4.00f,4.00f,3.75f,3.25f }, }, { { 6.50f,5.50f,5.50f,5.50f,5.00f,4.50f }, { 7.50f,3.25f,3.25f,3.25f,2.95f,2.66f }, }, }; static float band_mantissa_decay_improved_short_dc[11][2][6] = { { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.13f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.15f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, { { 0.19f,0.18f,0.17f,0.16f,0.15f,0.14f }, { 0.17f,0.11f,0.10f,0.09f,0.09f,0.08f }, }, { { 0.21f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.19f,0.12f,0.10f,0.11f,0.10f,0.09f }, }, { { 0.22f,0.20f,0.19f,0.18f,0.16f,0.15f }, { 0.21f,0.12f,0.10f,0.11f,0.10f,0.09f }, }, { { 0.18f,0.17f,0.16f,0.14f,0.13f,0.12f }, { 0.11f,0.10f,0.09f,0.08f,0.08f,0.07f }, }, }; #define SUBBAND_PULSES_SCALE 1.0 // SUBBAND_SCALE static float subband_pulses_for_band[4][10][24] = { { { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 6 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 7 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f }, // 8 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f }, // 9 }, { { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 6 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f }, // 7 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f }, // 8 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f }, // 9 }, { { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,2.50f,2.50f }, // 6 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,3.50f,3.50f }, // 7 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f,3.50f,3.50f }, // 8 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f,4.50f,4.50f }, // 9 }, { { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 9.50f,9.20f,8.80f,8.50f,5.00f,4.50f,4.00f,3.00f,2.50f,2.50f,2.50f }, { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,2.50f,2.50f }, // 6 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 14.3f,13.8f,13.2f,12.8f,7.50f,6.75f,6.00f,4.50f,3.80f,3.50f,3.50f }, // 7 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 19.0f,18.4f,17.6f,17.0f,10.0f,9.00f,8.00f,6.00f,5.00f,3.50f,3.50f }, // 8 { 0,0,0,0, 0,0,0,0, 0,0,0,0, 23.8f,23.0f,22.0f,21.3f,12.5f,11.3f,10.0f,7.50f,6.25f,4.50f,4.50f }, // 9 } }; static U16 header_size_bias[10] = { 0,0,0,0,0, 90,140,250,500,1000 }; static float pulse_quality_table[4][10] = { { 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1908f, 0.2131f, 0.2559f, 0.3383f, 0.5489f }, { 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1910f, 0.2124f, 0.2541f, 0.3405f, 0.5622f }, { 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1935f, 0.2075f, 0.2415f, 0.3104f, 0.6206f }, { 0.03f,0.05f,0.08f,0.1500f,0.1650f, 0.1998f, 0.2228f, 0.2696f, 0.3841f, 0.7721f }, }; #ifdef RADAUDIO_DEVELOPMENT static void radaudio_load_heuristics_from_environment_variables(radaudio_encoder_state *es, radaudio_stream_header_unpacked *h) { char *env; int qmode = es->quality_mode; int rate_mode = es->samprate_mode; RR_UNUSED_VARIABLE(qmode); RR_UNUSED_VARIABLE(rate_mode); int side_exp_spacing = es->heur.side_exp_start2 - es->heur.side_exp_threshold; int side_exp_deadzone = es->heur.side_exp_end - es->heur.side_exp_start; env = getenv("SIDE_EXP_START" ); if (env) es->heur.side_exp_start = atoi(env); env = getenv("SIDE_EXP_THRESHOLD"); if (env) es->heur.side_exp_threshold = atoi(env); env = getenv("SIDE_EXP_DEADZONE" ); if (env) side_exp_deadzone = atoi(env); env = getenv("SIDE_EXP_SPACING" ); if (env) side_exp_spacing = atoi(env); es->heur.side_exp_start2 = es->heur.side_exp_threshold + side_exp_spacing; es->heur.side_exp_end = es->heur.side_exp_start + side_exp_deadzone; env = getenv("MID_SIDE_TINY" ); if (env) es->heur.mid_side_tiny = atoi(env); env = getenv("MID_SIDE_OFFSET" ); if (env) es->heur.mid_side_offset = atoi(env); env = getenv("MID_SIDE_THRESHOLD" ); if (env) es->heur.mid_side_threshold = atoi(env); env = getenv("MID_SIDE_MAX_BAD_BANDS"); if (env) es->heur.mid_side_max_bad_bands = atoi(env); env = getenv("EXPECTATION_SCALE" ); if (env) es->heur.expectation_scale = strtof(env,NULL); env = getenv("EXPECTATION_BASE" ); if (env) es->heur.expectation_base = strtof(env,NULL); env = getenv("SHORT_OVERLAP_SCALE1" ); if (env) es->heur.short_overlap_scale1 = strtof(env,NULL); env = getenv("SHORT_OVERLAP_SCALE2" ); if (env) es->heur.short_overlap_scale2 = strtof(env,NULL); env = getenv("BAND_EXPONENT_BASE0" ); if (env) es->heur.band_exponent_base [0] = strtof(env,NULL); env = getenv("BAND_EXPONENT_BASE1" ); if (env) es->heur.band_exponent_base [1] = strtof(env,NULL); env = getenv("BAND_COUNT_EXPONENT0" ); if (env) es->heur.band_count_exponent [0] = strtof(env,NULL); env = getenv("BAND_COUNT_EXPONENT1" ); if (env) es->heur.band_count_exponent [1] = strtof(env,NULL); env = getenv("QUALITY_WEIGHT_LOW0" ); if (env) es->heur.quality_weight_low [0] = strtof(env,NULL); env = getenv("QUALITY_WEIGHT_LOW1" ); if (env) es->heur.quality_weight_low [1] = strtof(env,NULL); env = getenv("EXTRA_THRESHOLD_BIG0" ); if (env) es->heur.large_boost_median_test[0] = strtof(env,NULL); env = getenv("EXTRA_THRESHOLD_BIG1" ); if (env) es->heur.large_boost_median_test[1] = strtof(env,NULL); env = getenv("EXTRA_THRESHOLD_SMALL0" ); if (env) es->heur.small_boost_median_test[0] = strtof(env,NULL); env = getenv("EXTRA_THRESHOLD_SMALL1" ); if (env) es->heur.small_boost_median_test[1] = strtof(env,NULL); env = getenv("SHORT_BLOCK_PULSES" ); if (env) es->heur.short_block_pulse_scale = strtof(env,NULL); #if 0 env = getenv("BAND_MASK_8" ); if (env) es->heur.band_mask_8 [0] = strtof(env,NULL); env = getenv("BAND_MASK_4" ); if (env) es->heur.band_mask_4 [0] = strtof(env,NULL); env = getenv("BAND_MASK_2" ); if (env) es->heur.band_mask_2 [0] = strtof(env,NULL); env = getenv("BAND_MASK_1" ); if (env) es->heur.band_mask_1 [0] = strtof(env,NULL); #endif float mantissa_scale[2] = { 1.00,1.00f }; env = getenv("MANTISSA_SCALE_LONG" ); if (env) mantissa_scale[0] = strtof(env,NULL); env = getenv("MANTISSA_SCALE_SHORT" ); if (env) mantissa_scale[1] = strtof(env,NULL); float band_mantissa_base0 = 0, band_mantissa_decay0 = 0; int band_mantissa_slot=-1; env = getenv("BAND_MANTISSA_BASE0_I" ); if (env) band_mantissa_base0 = strtof(env,NULL); env = getenv("BAND_MANTISSA_DECAY0_I"); if (env) band_mantissa_decay0 = strtof(env,NULL); env = getenv("BAND_MANTISSA_I" ); if (env) band_mantissa_slot = atoi(env); for (int i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) { F32 base0 = band_mantissa_base [qmode][0][i/4]; F32 base1 = band_mantissa_base [qmode][1][i/4]; F32 decay0 = band_mantissa_decay[qmode][0][i/4]; F32 decay1 = band_mantissa_decay[qmode][1][i/4]; if (i/4 == band_mantissa_slot) { base0 = band_mantissa_base0; decay0 = band_mantissa_decay0; } base0 *= mantissa_scale[0]; base1 *= mantissa_scale[1]; decay0 *= mantissa_scale[0]; decay1 *= mantissa_scale[1]; h->mantissa_param[0][i][0] = (S8) ( base0 * 8 + 0.5); h->mantissa_param[1][i][0] = (S8) ( base1 * 8 + 0.5); h->mantissa_param[0][i][1] = (S8) (decay0 * 256 + 0.5); h->mantissa_param[1][i][1] = (S8) (decay1 * 256 + 0.5); } float pulse_value_lo = 5.0; float pulse_value_hi = 5.0; int pulse_range_lo = -1; int pulse_range_hi = -1; env = getenv("PULSE_VALUE_LO" ); if (env) pulse_value_lo = strtof(env,NULL); pulse_value_hi = pulse_value_lo; env = getenv("PULSE_VALUE_HI" ); if (env) pulse_value_hi = strtof(env,NULL); env = getenv("PULSE_RANGE_LO" ); if (env) pulse_range_lo = atoi(env); env = getenv("PULSE_RANGE_HI" ); if (env) pulse_range_hi = atoi(env); float subband_scale = 1.0f; env = getenv("SUBBAND_SCALE" ); if (env) subband_scale = strtof(env,NULL); for (int i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) { float pulses_per = subband_pulses_for_band[rate_mode][qmode][i] * subband_scale; if (i >= pulse_range_lo && i <= pulse_range_hi) { pulses_per = linear_remap((float) i, (float) pulse_range_lo, (float) pulse_range_hi, (float) pulse_value_lo, (float) pulse_value_hi); } int predicted_subband_sum = (int) (pulses_per * radaudio_rateinfo[0][rate_mode].num_subbands_for_band[i] + 0.5f); h->subband_predicted_sum[i] = (U8) RR_MIN(255, predicted_subband_sum); } } #endif static radaudio_nonzero_blockmode_descriptor nz_encode[RADAUDIO_NUM_RATES][10][NUM_SELECTOR_MODES] = { // 48 Khz { { { 0 } }, // 0 { { 0 } }, // 1 { { 0 } }, // 2 { { 0 } }, // 3 { { 0 } }, // 4 // bitrate:5 { { 4, { 0,2,1,1 }, }, { 6, { 0,2,1,1,1,1 }, }, { 2, { 2,1 }, }, { 0, }, { 0, } }, // bitrate:6 { { 6, { 0,2,1,1,1,1 }, }, { 4, { 0,2,1,1 }, }, { 2, { 2,1 }, }, { 0, }, { 0, } }, // bitrate:7 { { 8, { 0,0,2,1,1,1,1,1, }, }, { 6, { 0,2,1,1,1,1 }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:8 { { 12, { 0,0,0,0, 0,0,2,1, 2,1,2,1 }, { 0,0,0,0, 0,0,0,1, }, }, { 11, { 0,0,1,1,1,1, 1,1,1,1,1 }, { 0,0,1,1, 1,1,1 }, }, { 4, { 5,5,1,1 }, }, { 0, }, { 0, } }, // bitrate:9 { { 12, { 4,4,4,4, 4,4,4,4, 4,1,4,0 }, { 0,0,0,0, 0,0,0,0, 0,1,0,0 } }, { 12, { 4,4,5,5, 5,5,5,5, 5,1,5,1 }, { 0,0,0,0, 0,0,0,0, 0,0,0,1 } }, { 6, { 5,5,5,5, 1,1 }, }, { 0, }, { 0, } }, }, // 44.1 Khz { { { 0 } }, // bitrate:0 { { 0 } }, // 1 { { 0 } }, // 2 { { 0 } }, // 3 { { 0 } }, // 4 // bitrate:5 { { 4, { 0,2,1,1 }, }, { 2, { 2,1 }, }, { 1, { 1 }, }, { 0, }, { 0, } }, // bitrate:6 { { 5, { 0,2,1,1,1 }, }, { 3, { 0,2,1, }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:7 { { 7, { 0,0,2,2,1,1,1 }, }, { 6, { 0,2,1,1,1,1 }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:8 { { 12, { 4,0,0,0, 2,2,2,3, 2,2,2,2, }, }, { 11, { 4,0,5,5, 5,1,1,1, 1,1,1 }, }, { 5, { 5,5,1,1, 1 }, }, { 0, }, { 0, } }, // bitrate:9 { { 12, { 3,3,4,4,4,4,4,0,4,0,5,4 }, }, { 12, { 3,4,5,5,5,5,5,5,5,5,5,5 }, }, { 6, { 5,5,5,5,5,1 }, }, { 0, }, { 0, } }, }, // 32 Khz { { { 0 } }, // bitrate:0 { { 0 } }, // 1 { { 0 } }, // 2 { { 0 } }, // 3 { { 0 } }, // 4 // bitrate:5 { { 6, { 0,2,2,1,1,1 }, }, { 5, { 0,2,1,1,1,1 }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:6 { { 8, { 0,0,2,1,1,1,1,1 }, }, { 6, { 0,2,1,1,1,1 }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:7 { { 12, { 0,0,2,2, 2,1,1,1, 1,1,1,1 }, }, { 10, { 0,2,1,1, 1,1,1,1, 1,1 }, { 0,0,1 } }, { 5, { 0,1,1,1, 1 }, }, { 0, }, { 0, } }, // bitrate:8 { { 12, { 4,0,0,0, 0,0,0,2, 0,2,2,5 }, }, { 12, { 0,0,1,1, 1,1,1,1, 1,1,1,1 }, { 0,0,1,1,1,1,1 } }, { 3, { 5,5,1, }, }, { 0, }, { 0, } }, // bitrate:9 { { 12, { 3,3,4,4,4,4,4,4,4,4,4,4 }, }, { 12, { 3,4,5,5,5,5,5,5,5,5,5,5 }, }, { 6, { 4,5,5,5,5,5 }, }, { 0, }, { 0, } }, }, // 24 Khz { { { 0 } }, // bitrate:0 { { 0 } }, // 1 { { 0 } }, // 2 { { 0 } }, // 3 { { 0 } }, // 4 // bitrate:5 { { 9, { 0,0,1,1, 1,1,1,1, 1 }, { 0,0,1 } }, { 6, { 0,2,1,1,1,1 }, }, { 2, { 5,1 }, }, { 0, }, { 0, } }, // bitrate:6 { { 12, { 0,0,1,1, 1,1,1,1, 1,1,1,1 }, { 0,0,1 } }, { 12, { 0,0,2,2, 2,1,1,1, 1,1,1,1 }, }, { 7, { 0,2,1,1, 1,1,1 }, }, { 0, }, { 0, } }, // bitrate:7 { { 12, { 0,0,5,5, 5,5,1,1, 1,1,1,1 }, }, { 12, { 0,0,0,0, 0,2,2,2, 2,1,2,2 }, { 0,0,0,0, 0,0,0,0, 0,1,0,0 } }, { 5, { 5,5,1,1, 1 }, }, { 0, }, { 0, } }, // bitrate:8 { { 12, { 4,4,4,4, 4,4,4,4, 0,4,4,5 }, }, { 12, { 4,4,5,5, 5,5,5,5, 5,5,5,5 }, }, { 7, { 5,5,1,1, 1 }, }, { 0, }, { 0, } }, // bitrate:9 { { 12, { 3,3,3,3,3,4,4,3,3,4,3,3 }, }, { 12, { 3,3,5,5,5,5,5,5,5,5,5,5 }, }, { 12, { 5,5,5,5,5,5,5,5,5,5,5,5 }, }, { 0, }, { 0, } }, }, }; static U8 nz_mode_correlated_selectors_pair[4][10][NUM_SELECTOR_MODES] = { { { 0,0,2,2,0,}, { 0,0,2,2,0,}, { 0,0,2,2,0,}, { 0,0,2,2,0,}, { 0,0,2,2,0,}, { 0,0,2,2,0,}, { 0,2,1,2,0,}, { 3,2,1,1,3,}, { 1,1,1,1,1,}, { 1,1,1,3,1,}, },{ { 0,2,2,2,0,}, { 0,2,2,2,0,}, { 0,2,2,2,0,}, { 0,2,2,2,0,}, { 0,2,2,2,0,}, { 0,2,2,2,0,}, { 0,2,2,1,0,}, { 3,2,1,1,3,}, { 1,1,1,1,1,}, { 1,1,1,3,1,}, },{ { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 0,2,1,1,0,}, { 3,2,1,1,3,}, { 1,1,1,1,1,}, { 1,1,1,3,1,}, },{ { 3,1,1,1,3,}, { 3,1,1,1,3,}, { 3,1,1,1,3,}, { 3,1,1,1,3,}, { 3,1,1,1,3,}, { 3,1,1,1,3,}, { 2,3,1,1,3,}, { 1,1,1,1,1,}, { 1,1,1,1,1,}, { 1,1,1,1,1,}, } }; static U8 nz_mode_correlated_selectors_big[4][10][NUM_SELECTOR_MODES] = { { { 0,0,0,1,0,}, { 0,0,0,1,0,}, { 0,0,0,1,0,}, { 0,0,0,1,0,}, { 0,0,0,1,0,}, { 0,0,0,1,0,}, { 0,0,1,1,0,}, { 0,1,1,2,0,}, { 0,1,1,2,1,}, { 1,2,2,2,1,}, },{ { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,0,1,1,0,}, { 0,1,1,2,0,}, { 0,1,2,2,1,}, { 2,2,2,3,2,}, },{ { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 0,1,2,2,0,}, { 1,2,2,2,1,}, { 2,3,3,3,2,}, },{ { 1,2,2,2,1,}, { 1,2,2,2,1,}, { 1,2,2,2,1,}, { 1,2,2,2,1,}, { 1,2,2,2,1,}, { 1,2,2,2,1,}, { 1,1,2,2,1,}, { 2,0,2,2,1,}, { 1,2,2,3,1,}, { 3,3,3,3,3,}, } }; static void set_nz_desc(radaudio_nonzero_blockmode_descriptor nz_desc[NUM_NZ_MODE], U8 nz_correlated_huffman_selectors[NUM_NZ_SELECTOR][NUM_SELECTOR_MODES], int ratemode, int quality_mode) { // quality under 5 is untuned, so just use 5 if (quality_mode < 5) quality_mode = 5; memcpy(nz_desc, nz_encode[ratemode][quality_mode], sizeof(nz_desc[0]) * NUM_NZ_MODE); memset(nz_correlated_huffman_selectors, 0, NUM_NZ_SELECTOR*NUM_SELECTOR_MODES); memcpy(nz_correlated_huffman_selectors[HS_COEFF_PAIR], nz_mode_correlated_selectors_pair[ratemode][quality_mode], NUM_SELECTOR_MODES); memcpy(nz_correlated_huffman_selectors[HS_COEFF_BIG ], nz_mode_correlated_selectors_big [ratemode][quality_mode], NUM_SELECTOR_MODES); } // returns 1 on success, or 0 if inputs are invalid or internal error size_t radaudio_encode_create_internal(radaudio_encoder *rae, U8 header[RADAUDIO_STREAM_HEADER_MAX], int num_channels, // 1..2 int sample_rate, // in HZ int qmode, // 0..9 float quality_pulse, U32 flags) // used for ratesearch during development { radaudio_encoder_state *es = (radaudio_encoder_state *) rae; int i, rate_mode; size_t pack_length, unpack_length; if (qmode > 9) qmode = 9; if (qmode < 0) qmode = 0; // need to know samprate mode before we can fill full header, so do a first conversion rate_mode = radaudio_code_sample_rate(sample_rate); if (rate_mode < 0) return 0; memset(es, 0, sizeof(*es)); es->quality_mode = (U8) qmode; es->cpu = cpu_detect(); es->num_channels = num_channels; es->sample_rate = sample_rate; es->samprate_mode = rate_mode; es->allow_mid_side = true; if (quality_pulse == 0) es->heur.pulse_quality = pulse_quality_table[es->samprate_mode][es->quality_mode]; else es->heur.pulse_quality = quality_pulse / 100.0f; for (i=0; i < 2; ++i) { es->heur.band_exponent_base [i] = band_exponent_base [rate_mode][i][qmode]; es->heur.band_count_exponent[i] = band_count_exponent[rate_mode][i][qmode]; es->heur.quality_weight_low [i] = quality_weight_low [rate_mode][i][qmode]; es->heur.large_boost_median_test[i] = extra_threshold_big [rate_mode][i][qmode]; es->heur.small_boost_median_test[i] = extra_threshold_small[rate_mode][i][qmode]; } es->heur.short_block_pulse_scale = short_block_pulses[rate_mode][qmode]; es->heur.side_exp_threshold_all = 3; es->heur.side_exp_start2_all = 6; es->heur.side_exp_threshold = 2; es->heur.side_exp_end_all = -13; es->heur.side_exp_start_all = -15; es->heur.side_exp_start = -15; es->heur.side_exp_start2 = 4; es->heur.side_exp_end = -13; es->heur.mid_side_tiny = -15; es->heur.mid_side_offset = - 4; es->heur.mid_side_threshold = -16; es->heur.mid_side_max_bad_bands = 6; es->heur.expectation_base = -16; es->heur.expectation_scale = 0.195f; es->heur.short_overlap_scale1 = 1.0f; es->heur.short_overlap_scale2 = 1.0f; set_nz_desc(es->nz_desc, es->nz_correlated_huffman_selectors, rate_mode, qmode); radaudio_stream_header_unpacked h; memset(&h, 0, sizeof(h)); h.num_channels = num_channels; h.sample_rate = sample_rate; h.version = ENCODER_VERSION; h.bytes_bias = header_size_bias[qmode]; compute_bias_set(&es->biases, h.bytes_bias); for (i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) { F32 base0,base1,decay0,decay1; if (flags & RADAUDIO_ENC_FLAG_improve_seamless_loop) { base0 = band_mantissa_base_improved_short_dc [qmode][0][i/4]; base1 = band_mantissa_base_improved_short_dc [qmode][1][i/4]; decay0 = band_mantissa_decay_improved_short_dc[qmode][0][i/4]; decay1 = band_mantissa_decay_improved_short_dc[qmode][1][i/4]; } else { base0 = band_mantissa_base [qmode][0][i/4]; base1 = band_mantissa_base [qmode][1][i/4]; decay0 = band_mantissa_decay[qmode][0][i/4]; decay1 = band_mantissa_decay[qmode][1][i/4]; } h.mantissa_param[0][i][0] = (S8) ( base0 * 8 + 0.5); h.mantissa_param[1][i][0] = (S8) ( base1 * 8 + 0.5); h.mantissa_param[0][i][1] = (S8) (decay0 * 256 + 0.5); h.mantissa_param[1][i][1] = (S8) (decay1 * 256 + 0.5); } for (i=0; i < radaudio_rateinfo[0][rate_mode].num_bands; ++i) { float pulses_per = subband_pulses_for_band[rate_mode][qmode][i]; int predicted_subband_sum = (int) (pulses_per * radaudio_rateinfo[0][rate_mode].num_subbands_for_band[i] + 0.5f); h.subband_predicted_sum[i] = (U8) RR_MIN(255, predicted_subband_sum); } for (i=0; i < NUM_NZ_MODE; ++i) { h.nzmode_num64[i] = es->nz_desc[i].num_8byte_chunks; for (int j=0; j < MAX_NZ_BLOCKS; ++j) h.nzmode_huff[i][j] = es->nz_desc[i].huffman_table_for_chunk[j] | (es->nz_desc[i].invert_chunk[j] ? NZ_MODE_INVERT : 0); } for (int j=0; j < NUM_NZ_SELECTOR; ++j) for (i=0; i < NUM_SELECTOR_MODES; ++i) h.nzmode_selectors[j][i] = es->nz_correlated_huffman_selectors[j][i]; #ifdef RADAUDIO_DEVELOPMENT radaudio_load_heuristics_from_environment_variables(es, &h); #endif pack_length = radaudio_pack_stream_header(header, &h); if (pack_length == 0) return 0; unpack_length = radaudio_unpack_stream_header(header, RADAUDIO_STREAM_HEADER_MAX, &h); if (unpack_length != pack_length) return 0; memcpy(es->subband_predicted_sum, h.subband_predicted_sum, 24); memcpy(es->mantissa_param , h.mantissa_param , sizeof(es->mantissa_param )); memcpy(es->subband_bias , h.subband_bias , 24*sizeof(es->subband_bias[0])); for (i=0; i < 2; ++i) es->info[i] = &radaudio_rateinfo[i][es->samprate_mode]; radaudio_init_nz_desc(es->nz_desc); return unpack_length; } size_t radaudio_encode_create(radaudio_encoder *es, U8 header[RADAUDIO_STREAM_HEADER_MAX], int num_channels, int sample_rate, int quality, U32 flags) { return radaudio_encode_create_internal(es, header, num_channels, sample_rate, quality, 0.0f, flags); } #ifdef RADAUDIO_DEVELOPMENT // internal use int RadAudioCompressGetProfileData(radaudio_encoder *hradaud, radaudio_eprofile_value *profile, int num_profile) { radaudio_encoder_state *es = (radaudio_encoder_state *) hradaud; int n = RR_MIN(num_profile, PROF_total_count); static const char *names[] = { #define PROF(x) #x, PROFILE_ZONES() #undef PROF }; for (int i=0; i < n; ++i) { profile[i].name = names[i]; profile[i].time = rrTicksToSeconds(es->profile_times[i]); } return n; } #else int RadAudioCompressGetProfileData(radaudio_encoder *hradaud, radaudio_eprofile_value *profile, int num_profile) { RR_UNUSED_VARIABLE(hradaud); RR_UNUSED_VARIABLE(profile); RR_UNUSED_VARIABLE(num_profile); return 0; } #endif