C++程序  |  180行  |  7.02 KB

/*
 * Copyright (C) 2010 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

// An amplitude-normalized spectrum comparison method.

#include "Fft.h"
#include "Window.h"

#include <stdio.h>
#include <stdlib.h>
#include <math.h>

/* Find the endpoints of the signal stored in data such that the rms of
   the found segment exceeds signalOnRms.  data is of length n.  The
   RMS calculations used to find the endpoints use a window of length
   step and advance step samples.  The approximate, conservative
   endpoint sample indices are returned in start and end. */
static void findEndpoints(short* data, int n, int step, float signalOnRms,
                          int* start, int* end) {
    int size = step;
    *start = *end = 0;
    int last_frame = n - size;
    for (int frame = 0; frame < last_frame; frame += step) {
        double sum = 0.0;
        for (int i=0; i < size; ++i) {
            float val = data[i + frame];
            sum += (val * val);
        }
        float rms = sqrt(sum / size);
        if (! *start) {
            if (rms >= signalOnRms) {
                *start = frame + size;
            }
            continue;
        } else {
            if (rms < signalOnRms) {
                *end = frame - size;
                // fprintf(stderr, "n:%d onset:%d offset:%d\n", n, *start, *end);
                return;
            }
        }
    }
    // Handle the case where the signal does not drop below threshold
    // after onset.
    if ((*start > 0) && (! *end)) {
        *end = n - size - 1;
    }
}

// Sum the magnitude squared spectra.
static void accumulateMagnitude(float* im, float* re, int size, double* mag) {
    for (int i = 0; i < size; ++i) {
        mag[i] += ((re[i] * re[i]) + (im[i] * im[i]));
    }
}

/* Return a pointer to 1+(fftSize/2) spectrum magnitude values
   averaged over all of the numSamples in pcm.  It is the
   responsibility of the caller to free this magnitude array.  Return
   NULL on failure.  Use 50% overlap on the spectra. An FFT of
   fftSize points is used to compute the spectra, The overall signal
   rms over all frequencies between lowestBin and highestBin is
   returned as a scalar in rms. */
double* getAverageSpectrum(short* pcm, int numSamples, int fftSize,
                           int lowestBin, int highestBin, float* rms) {
    if (numSamples < fftSize) return NULL;
    int numFrames = 1 + ((2 * (numSamples - fftSize)) / fftSize);
    int numMag = 1 + (fftSize / 2);
    float* re = new float[fftSize];
    float* im = new float[fftSize];
    double* mag = new double[numMag];
    for (int i = 0; i < numMag; ++i) {
        mag[i] = 0.0;
    }
    Window wind(fftSize);
    Fft ft;
    int pow2 = ft.fftPow2FromWindowSize(fftSize);
    ft.fftInit(pow2);
    int input_p = 0;
    for (int i = 0; i < numFrames; ++i) {
        wind.window(pcm + input_p, re, 0.0);
        for (int j = 0; j < fftSize; ++j) {
            im[j] = 0.0;
        }
        ft.fft(re,im);
        accumulateMagnitude(im, re, numMag, mag);
        input_p += fftSize / 2;
    }
    double averageEnergy = 0.0; // per frame average energy
    for (int i = 0; i < numMag; ++i) {
        double e = mag[i]/numFrames;
        if ((i >= lowestBin) && (i <= highestBin))
            averageEnergy += e;
        mag[i] = sqrt(e);
    }
    *rms = sqrt(averageEnergy / (highestBin - lowestBin + 1));
    delete [] re;
    delete [] im;
    return mag;
}

/* Compare the average magnitude spectra of the signals in pcm and
   refPcm, which are of length numSamples and numRefSamples,
   respectively; both sampled at sample_rate.  The maximum deviation
   between average spectra, expressed in dB, is returned in
   maxDeviation, and the rms of all dB variations is returned in
   rmsDeviation.  Note that a lower limit is set on the frequencies that
   are compared so as to ignore irrelevant DC and rumble components.  If
   the measurement fails for some reason, return 0; else return 1, for
   success.  Causes for failure include the amplitude of one or both of
   the signals being too low, or the duration of the signals being too
   short.

   Note that the expected signal collection scenario is that the phone
   would be stimulated with a broadband signal as in a recognition
   attempt, so that there will be some "silence" regions at the start and
   end of the pcm signals.  The preferred stimulus would be pink noise,
   but any broadband signal should work. */
int compareSpectra(short* pcm, int numSamples, short* refPcm,
                   int numRefSamples, float sample_rate,
                   float* maxDeviation, float* rmsDeviation) {
    int fftSize = 512;           // must be a power of 2
    float lowestFreq = 100.0;    // don't count DC, room rumble, etc.
    float highestFreq = 3500.0;  // ignore most effects of sloppy anti alias filters.
    int lowestBin = int(0.5 + (lowestFreq * fftSize / sample_rate));
    int highestBin = int(0.5 + (highestFreq * fftSize / sample_rate));
    float signalOnRms = 1000.0; // endpointer RMS on/off threshold
    int endpointStepSize = int(0.5 + (sample_rate * 0.02)); // 20ms setp
    float rms1 = 0.0;
    float rms2 = 0.0;
    int onset = 0;
    int offset = 0;
    findEndpoints(refPcm, numRefSamples, endpointStepSize, signalOnRms,
                   &onset, &offset);
    double* spect1 = getAverageSpectrum(refPcm + onset, offset - onset,
                                        fftSize, lowestBin, highestBin, &rms1);
    findEndpoints(pcm, numSamples, endpointStepSize, signalOnRms,
                  &onset, &offset);
    double* spect2 = getAverageSpectrum(pcm + onset, offset - onset,
                                        fftSize, lowestBin, highestBin, &rms2);
    int magSize = 1 + (fftSize/2);
    if ((rms1 <= 0.0) || (rms2 <= 0.0))
        return 0; // failure because one or both signals are too short or
                  // too low in amplitude.
    float rmsNorm = rms2 / rms1; // compensate for overall gain differences
    // fprintf(stderr, "Level normalization: %f dB\n", 20.0 * log10(rmsNorm));
    *maxDeviation = 0.0;
    float sumDevSquared = 0.0;
    for (int i = lowestBin; i <= highestBin; ++i) {
        double val = 1.0;
        if ((spect1[i] > 0.0) && (spect2[i] > 0.0)) {
            val = 20.0 * log10(rmsNorm * spect1[i] / spect2[i]);
        }
        sumDevSquared += val * val;
        if (fabs(val) > fabs(*maxDeviation)) {
            *maxDeviation = val;
        }
        // fprintf(stderr, "%d %f\n", i, val);
    }
    *rmsDeviation = sqrt(sumDevSquared / (highestBin - lowestBin + 1));
    delete [] spect1;
    delete [] spect2;
    return 1; // success
}