Merge pull request #6034 from matrix-org/travis/voicemessages/waveform-noise

Improve visible waveform for voice messages
This commit is contained in:
Travis Ralston 2021-05-14 08:12:01 -06:00 committed by GitHub
commit 8066e5402c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 14 deletions

View file

@ -75,7 +75,8 @@ export function arraySmoothingResample(input: number[], points: number): number[
for (let i = 1; i < input.length - 1; i += 2) {
const prevPoint = input[i - 1];
const nextPoint = input[i + 1];
const average = (prevPoint + nextPoint) / 2;
const currPoint = input[i];
const average = (prevPoint + nextPoint + currPoint) / 3;
samples.push(average);
}
input = samples;

View file

@ -21,6 +21,7 @@ import {SimpleObservable} from "matrix-widget-api";
import { IDestroyable } from "../utils/IDestroyable";
import { PlaybackClock } from "./PlaybackClock";
import { createAudioContext, decodeOgg } from "./compat";
import { clamp } from "../utils/numbers";
export enum PlaybackState {
Decoding = "decoding",
@ -33,9 +34,20 @@ export const PLAYBACK_WAVEFORM_SAMPLES = 39;
const DEFAULT_WAVEFORM = arraySeed(0, PLAYBACK_WAVEFORM_SAMPLES);
function makePlaybackWaveform(input: number[]): number[] {
// We use a smoothing resample to keep the rough shape of the waveform the user will be seeing. We
// then rescale so the user can see the waveform properly (loud noises == 100%).
return arrayRescale(arraySmoothingResample(input, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
// First, convert negative amplitudes to positive so we don't detect zero as "noisy".
const noiseWaveform = input.map(v => Math.abs(v));
// Next, we'll resample the waveform using a smoothing approach so we can keep the same rough shape.
// We also rescale the waveform to be 0-1 for the remaining function logic.
const resampled = arrayRescale(arraySmoothingResample(noiseWaveform, PLAYBACK_WAVEFORM_SAMPLES), 0, 1);
// Then, we'll do a high and low pass filter to isolate actual speaking volumes within the rescaled
// waveform. Most speech happens below the 0.5 mark.
const filtered = resampled.map(v => clamp(v, 0.1, 0.5));
// Finally, we'll rescale the filtered waveform (0.1-0.5 becomes 0-1 again) so the user sees something
// sensible. This is what we return to keep our contract of "values between zero and one".
return arrayRescale(filtered, 0, 1);
}
export class Playback extends EventEmitter implements IDestroyable {

View file

@ -73,10 +73,10 @@ describe('arrays', () => {
// we'd be feeding a thousand values in and seeing what a curve of 250 values looks like,
// but that's not really feasible to manually verify accuracy.
[
{input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2, 1]}, // Odd -> Even
{input: [2, 2, 0, 2, 2, 0, 2, 2, 0], output: [1, 1, 2]}, // Odd -> Odd
{input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 1, 2]}, // Even -> Odd
{input: [2, 2, 0, 2, 2, 0, 2, 2], output: [1, 2]}, // Even -> Even
{input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3, 3]}, // Odd -> Even
{input: [4, 4, 1, 4, 4, 1, 4, 4, 1], output: [3, 3, 3]}, // Odd -> Odd
{input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3, 3]}, // Even -> Odd
{input: [4, 4, 1, 4, 4, 1, 4, 4], output: [3, 3]}, // Even -> Even
].forEach((c, i) => expectSample(i, c.input, c.output, true));
});