879 lines
28 KiB
C++
879 lines
28 KiB
C++
// Copyright Epic Games, Inc. All Rights Reserved.
|
|
|
|
#include "VoiceCaptureWindows.h"
|
|
#include "VoicePrivate.h"
|
|
#include "VoiceModule.h"
|
|
#include "DSP/Dsp.h"
|
|
|
|
#if PLATFORM_SUPPORTS_VOICE_CAPTURE
|
|
|
|
#include "Windows/AllowWindowsPlatformTypes.h"
|
|
|
|
static int32 DisplayAmplitudeCvar = 0;
|
|
FAutoConsoleVariableRef CVarDisplayAmplitude(
|
|
TEXT("voice.debug.PrintAmplitude"),
|
|
DisplayAmplitudeCvar,
|
|
TEXT("when set to 1, the current incoming amplitude of the VOIP engine will be displayed on screen.\n")
|
|
TEXT("0: disabled, 1: enabled."),
|
|
ECVF_Default);
|
|
|
|
struct FVoiceCaptureWindowsVars
|
|
{
|
|
/** GUID of current voice capture device */
|
|
GUID VoiceCaptureDeviceGuid;
|
|
/** Voice capture device */
|
|
LPDIRECTSOUNDCAPTURE8 VoiceCaptureDev;
|
|
/** Voice capture device caps */
|
|
DSCCAPS VoiceCaptureDevCaps;
|
|
/** Voice capture buffer */
|
|
LPDIRECTSOUNDCAPTUREBUFFER8 VoiceCaptureBuffer8;
|
|
/** Wave format of buffer */
|
|
WAVEFORMATEX WavFormat;
|
|
/** Buffer description */
|
|
DSCBUFFERDESC VoiceCaptureBufferDesc;
|
|
/** Buffer caps */
|
|
DSCBCAPS VoiceCaptureBufferCaps8;
|
|
/** Notification events */
|
|
HANDLE StopEvent;
|
|
/** Current audio position of valid data in capture buffer */
|
|
DWORD NextCaptureOffset;
|
|
|
|
FVoiceCaptureWindowsVars() :
|
|
VoiceCaptureDev(nullptr),
|
|
VoiceCaptureBuffer8(nullptr),
|
|
NextCaptureOffset(0)
|
|
{
|
|
StopEvent = INVALID_HANDLE_VALUE;
|
|
Reset();
|
|
}
|
|
|
|
void Reset()
|
|
{
|
|
if (StopEvent != INVALID_HANDLE_VALUE)
|
|
{
|
|
CloseHandle(StopEvent);
|
|
StopEvent = INVALID_HANDLE_VALUE;
|
|
}
|
|
|
|
// Free up DirectSound resources
|
|
if (VoiceCaptureBuffer8)
|
|
{
|
|
VoiceCaptureBuffer8->Release();
|
|
VoiceCaptureBuffer8 = nullptr;
|
|
}
|
|
|
|
if (VoiceCaptureDev)
|
|
{
|
|
VoiceCaptureDev->Release();
|
|
VoiceCaptureDev = nullptr;
|
|
}
|
|
|
|
NextCaptureOffset = 0;
|
|
|
|
FMemory::Memzero(&VoiceCaptureDeviceGuid, sizeof(GUID));
|
|
FMemory::Memzero(&VoiceCaptureDevCaps, sizeof(VoiceCaptureDevCaps));
|
|
|
|
FMemory::Memzero(&WavFormat, sizeof(WavFormat));
|
|
FMemory::Memzero(&VoiceCaptureBufferDesc, sizeof(VoiceCaptureBufferDesc));
|
|
FMemory::Memzero(&VoiceCaptureBufferCaps8, sizeof(VoiceCaptureBufferCaps8));
|
|
}
|
|
};
|
|
|
|
FVoiceCaptureWindows::FVoiceCaptureWindows() :
|
|
CV(nullptr),
|
|
LastCaptureTime(0.0),
|
|
VoiceCaptureState(EVoiceCaptureState::UnInitialized)
|
|
{
|
|
CV = new FVoiceCaptureWindowsVars();
|
|
}
|
|
|
|
FVoiceCaptureWindows::~FVoiceCaptureWindows()
|
|
{
|
|
Shutdown();
|
|
|
|
FVoiceCaptureDeviceWindows* VoiceCaptureDev = FVoiceCaptureDeviceWindows::Get();
|
|
if (VoiceCaptureDev)
|
|
{
|
|
VoiceCaptureDev->FreeVoiceCaptureObject(this);
|
|
}
|
|
|
|
if (CV)
|
|
{
|
|
delete CV;
|
|
CV = nullptr;
|
|
}
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::Init(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
|
|
{
|
|
FVoiceCaptureDeviceWindows* VoiceDev = FVoiceCaptureDeviceWindows::Get();
|
|
if (!VoiceDev)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture interface."));
|
|
return false;
|
|
}
|
|
|
|
// init the sample counter to 0 on init
|
|
SampleCounter = 0;
|
|
CachedSampleStart = 0;
|
|
|
|
// set up level detector
|
|
static IConsoleVariable* SilenceDetectionAttackCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionAttackTime"));
|
|
check(SilenceDetectionAttackCVar);
|
|
static IConsoleVariable* SilenceDetectionReleaseCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionReleaseTime"));
|
|
check(SilenceDetectionReleaseCVar);
|
|
|
|
Audio::FInlineEnvelopeFollowerInitParams EnvelopeFollowerInitParams;
|
|
EnvelopeFollowerInitParams.SampleRate = SampleRate;
|
|
EnvelopeFollowerInitParams.AttackTimeMsec = SilenceDetectionAttackCVar->GetFloat();
|
|
EnvelopeFollowerInitParams.ReleaseTimeMsec = SilenceDetectionReleaseCVar->GetFloat();
|
|
EnvelopeFollowerInitParams.Mode = Audio::EPeakMode::Peak;
|
|
EnvelopeFollowerInitParams.bIsAnalog = MicSilenceDetectionConfig::IsAnalog;
|
|
|
|
MicLevelDetector.Init(EnvelopeFollowerInitParams);
|
|
|
|
const int32 AttackInSamples = SampleRate * SilenceDetectionAttackCVar->GetFloat() * 0.001f;
|
|
LookaheadBuffer.Init(AttackInSamples + 1);
|
|
LookaheadBuffer.SetDelay(AttackInSamples);
|
|
|
|
NoiseGateAttenuator.Init(SampleRate);
|
|
|
|
bIsMicActive = false;
|
|
bWasMicAboveNoiseGateThreshold = false;
|
|
|
|
return CreateCaptureBuffer(DeviceName.IsEmpty() ? VoiceDev->DefaultVoiceCaptureDevice.DeviceName : DeviceName, SampleRate, NumChannels);
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::CreateCaptureBuffer(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
|
|
{
|
|
// Free the previous buffer
|
|
FreeCaptureBuffer();
|
|
VoiceCaptureState = EVoiceCaptureState::NotCapturing;
|
|
|
|
if (SampleRate < 8000 || SampleRate > 48000)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Voice capture doesn't support %d hz"), SampleRate);
|
|
return false;
|
|
}
|
|
|
|
if (NumChannels < 0 || NumChannels > 2)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Voice capture only supports 1 or 2 channels"));
|
|
return false;
|
|
}
|
|
|
|
FVoiceCaptureDeviceWindows* VoiceDev = FVoiceCaptureDeviceWindows::Get();
|
|
if (!VoiceDev)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture interface."));
|
|
return false;
|
|
}
|
|
|
|
FVoiceCaptureDeviceWindows::FCaptureDeviceInfo* DeviceInfo = nullptr;
|
|
if (DeviceName.IsEmpty())
|
|
{
|
|
DeviceInfo = &VoiceDev->DefaultVoiceCaptureDevice;
|
|
}
|
|
else
|
|
{
|
|
DeviceInfo = VoiceDev->Devices.Find(DeviceName);
|
|
}
|
|
|
|
if (DeviceInfo)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("Creating capture %s [%d:%d]"), *DeviceInfo->DeviceName, SampleRate, NumChannels);
|
|
CV->VoiceCaptureDeviceGuid = DeviceInfo->DeviceId;
|
|
|
|
// DSDEVID_DefaultCapture WAVEINCAPS
|
|
HRESULT hr = DirectSoundCaptureCreate8(&DeviceInfo->DeviceId, &CV->VoiceCaptureDev, nullptr);
|
|
if (FAILED(hr))
|
|
{
|
|
//DSERR_ALLOCATED, DSERR_INVALIDPARAM, DSERR_NOAGGREGATION, DSERR_OUTOFMEMORY
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create capture device 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
// Device capabilities
|
|
CV->VoiceCaptureDevCaps.dwSize = sizeof(DSCCAPS);
|
|
hr = CV->VoiceCaptureDev->GetCaps(&CV->VoiceCaptureDevCaps);
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get mic device caps 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
// Wave format setup
|
|
CV->WavFormat.wFormatTag = WAVE_FORMAT_PCM;
|
|
CV->WavFormat.nChannels = NumChannels;
|
|
CV->WavFormat.wBitsPerSample = 16;
|
|
CV->WavFormat.nSamplesPerSec = SampleRate;
|
|
CV->WavFormat.nBlockAlign = (CV->WavFormat.nChannels * CV->WavFormat.wBitsPerSample) / 8;
|
|
CV->WavFormat.nAvgBytesPerSec = CV->WavFormat.nBlockAlign * CV->WavFormat.nSamplesPerSec;
|
|
CV->WavFormat.cbSize = 0;
|
|
|
|
// Buffer setup
|
|
CV->VoiceCaptureBufferDesc.dwSize = sizeof(DSCBUFFERDESC);
|
|
CV->VoiceCaptureBufferDesc.dwFlags = 0;
|
|
CV->VoiceCaptureBufferDesc.dwBufferBytes = CV->WavFormat.nAvgBytesPerSec / 2; // 0.5 sec buffer
|
|
CV->VoiceCaptureBufferDesc.dwReserved = 0;
|
|
CV->VoiceCaptureBufferDesc.lpwfxFormat = &CV->WavFormat;
|
|
CV->VoiceCaptureBufferDesc.dwFXCount = 0;
|
|
CV->VoiceCaptureBufferDesc.lpDSCFXDesc = nullptr;
|
|
|
|
LPDIRECTSOUNDCAPTUREBUFFER VoiceBuffer = nullptr;
|
|
|
|
hr = CV->VoiceCaptureDev->CreateCaptureBuffer(&CV->VoiceCaptureBufferDesc, &VoiceBuffer, nullptr);
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice capture buffer 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
hr = VoiceBuffer->QueryInterface(IID_IDirectSoundCaptureBuffer8, (LPVOID*)&CV->VoiceCaptureBuffer8);
|
|
VoiceBuffer->Release();
|
|
VoiceBuffer = nullptr;
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice capture buffer 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
CV->VoiceCaptureBufferCaps8.dwSize = sizeof(DSCBCAPS);
|
|
hr = CV->VoiceCaptureBuffer8->GetCaps(&CV->VoiceCaptureBufferCaps8);
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer caps 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
// TEST ------------------------
|
|
if (0)
|
|
{
|
|
DWORD SizeWritten8 = 0;
|
|
CV->VoiceCaptureBuffer8->GetFormat(nullptr, sizeof(WAVEFORMATEX), &SizeWritten8);
|
|
|
|
LPWAVEFORMATEX BufferFormat8 = (WAVEFORMATEX*)FMemory::Malloc(SizeWritten8);
|
|
CV->VoiceCaptureBuffer8->GetFormat(BufferFormat8, SizeWritten8, &SizeWritten8);
|
|
FMemory::Free(BufferFormat8);
|
|
}
|
|
// TEST ------------------------
|
|
|
|
if (CreateNotifications(CV->VoiceCaptureBufferCaps8.dwBufferBytes))
|
|
{
|
|
// Reset notification related values
|
|
LastCaptureTime = FPlatformTime::Seconds();
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice buffer notifications"));
|
|
return false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture device %s found."), *DeviceName);
|
|
return false;
|
|
}
|
|
|
|
UncompressedAudioBuffer.Init(0, CV->VoiceCaptureBufferDesc.dwBufferBytes);
|
|
check(UncompressedAudioBuffer.Max() >= (int32)CV->VoiceCaptureBufferCaps8.dwBufferBytes);
|
|
|
|
NumInputChannels = CV->WavFormat.nChannels;
|
|
|
|
ReleaseBuffer.Init((int32)CV->VoiceCaptureBufferCaps8.dwBufferBytes);
|
|
ReleaseBuffer.SetDelay(1);
|
|
return true;
|
|
}
|
|
|
|
void FVoiceCaptureWindows::FreeCaptureBuffer()
|
|
{
|
|
// Stop playback
|
|
Stop();
|
|
|
|
// Release all D3D8 resources
|
|
CV->Reset();
|
|
|
|
VoiceCaptureState = EVoiceCaptureState::UnInitialized;
|
|
}
|
|
|
|
void FVoiceCaptureWindows::Shutdown()
|
|
{
|
|
FreeCaptureBuffer();
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::Start()
|
|
{
|
|
check(VoiceCaptureState != EVoiceCaptureState::UnInitialized);
|
|
|
|
if (CV->VoiceCaptureBuffer8 == nullptr)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("CV->VoiceCaptureBuffer8 == nullptr"));
|
|
return false;
|
|
}
|
|
|
|
HRESULT hr = CV->VoiceCaptureBuffer8->Start(DSCBSTART_LOOPING);
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to start capture 0x%08x"), hr);
|
|
return false;
|
|
}
|
|
|
|
VoiceCaptureState = EVoiceCaptureState::NoData;
|
|
return true;
|
|
}
|
|
|
|
void FVoiceCaptureWindows::Stop()
|
|
{
|
|
if (CV->VoiceCaptureBuffer8 &&
|
|
VoiceCaptureState != EVoiceCaptureState::Stopping &&
|
|
VoiceCaptureState != EVoiceCaptureState::NotCapturing)
|
|
{
|
|
CV->VoiceCaptureBuffer8->Stop();
|
|
VoiceCaptureState = EVoiceCaptureState::Stopping;
|
|
}
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::ChangeDevice(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
|
|
{
|
|
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized)
|
|
{
|
|
return CreateCaptureBuffer(DeviceName, SampleRate, NumChannels);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Unable to change device, not initialized"));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::IsCapturing()
|
|
{
|
|
if (CV->VoiceCaptureBuffer8)
|
|
{
|
|
DWORD Status = 0;
|
|
|
|
HRESULT hr = CV->VoiceCaptureBuffer8->GetStatus(&Status);
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer status 0x%08x"), hr);
|
|
}
|
|
|
|
// Status & DSCBSTATUS_LOOPING
|
|
return Status & DSCBSTATUS_CAPTURING ? true : false;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
EVoiceCaptureState::Type FVoiceCaptureWindows::GetCaptureState(uint32& OutAvailableVoiceData) const
|
|
{
|
|
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized &&
|
|
VoiceCaptureState != EVoiceCaptureState::Error)
|
|
{
|
|
OutAvailableVoiceData = UncompressedAudioBuffer.Num();
|
|
}
|
|
else
|
|
{
|
|
OutAvailableVoiceData = 0;
|
|
}
|
|
|
|
return VoiceCaptureState;
|
|
}
|
|
|
|
void FVoiceCaptureWindows::ProcessData()
|
|
{
|
|
DWORD CurrentCapturePos = 0;
|
|
DWORD CurrentReadPos = 0;
|
|
|
|
HRESULT hr = CV->VoiceCaptureBuffer8 ? CV->VoiceCaptureBuffer8->GetCurrentPosition(&CurrentCapturePos, &CurrentReadPos) : E_FAIL;
|
|
if (FAILED(hr))
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer cursor position 0x%08x"), hr);
|
|
VoiceCaptureState = EVoiceCaptureState::Error;
|
|
return;
|
|
}
|
|
|
|
DWORD LockSize = ((CurrentReadPos - CV->NextCaptureOffset) + CV->VoiceCaptureBufferCaps8.dwBufferBytes) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
|
|
if(LockSize != 0)
|
|
{
|
|
//UE_LOG( LogVoiceCapture, Log, TEXT( "LockSize: %i, CurrentCapturePos: %i, CurrentReadPos: %i, NextCaptureOffset: %i" ), LockSize, CurrentCapturePos, CurrentReadPos, CV->NextCaptureOffset );
|
|
|
|
DWORD CaptureFlags = 0;
|
|
DWORD CaptureLength = 0;
|
|
void* CaptureData = nullptr;
|
|
DWORD CaptureLength2 = 0;
|
|
void* CaptureData2 = nullptr;
|
|
hr = CV->VoiceCaptureBuffer8->Lock(CV->NextCaptureOffset, LockSize,
|
|
&CaptureData, &CaptureLength,
|
|
&CaptureData2, &CaptureLength2, CaptureFlags);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
const DWORD OriginalCaptureLength = CaptureLength;
|
|
const DWORD OriginalCaptureLength2 = CaptureLength2;
|
|
|
|
if (UncompressedAudioBuffer.Num() + CaptureLength + CaptureLength2 > (DWORD)UncompressedAudioBuffer.Max())
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Resetting UncompressedAudioBuffer."));
|
|
UncompressedAudioBuffer.Empty(UncompressedAudioBuffer.Max());
|
|
VoiceCaptureState = EVoiceCaptureState::NoData;
|
|
}
|
|
|
|
const int32 Offset = UncompressedAudioBuffer.Num();
|
|
|
|
CaptureLength = FMath::Min(CaptureLength, (DWORD)UncompressedAudioBuffer.Max());
|
|
CaptureLength2 = FMath::Min(CaptureLength2, (DWORD)UncompressedAudioBuffer.Max() - CaptureLength);
|
|
|
|
|
|
UncompressedAudioBuffer.AddUninitialized(CaptureLength + CaptureLength2 + (ReleaseBuffer.GetBufferCount() * sizeof(int16)));
|
|
|
|
|
|
int16* AudioBuffer = (int16*)(UncompressedAudioBuffer.GetData() + Offset);
|
|
int16* InputBuffer = (int16*)CaptureData;
|
|
|
|
//First, if we have any cached audio from an onset mid-buffer, copy it in:
|
|
int32 SamplesPushedToUncompressedAudioBuffer = ReleaseBuffer.PopBufferedAudio(AudioBuffer, ReleaseBuffer.GetBufferCount());
|
|
AudioBuffer += SamplesPushedToUncompressedAudioBuffer;
|
|
|
|
static IConsoleVariable* SilenceDetectionThresholdCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionThreshold"));
|
|
check(SilenceDetectionThresholdCVar);
|
|
const float MicSilenceThreshold = SilenceDetectionThresholdCVar->GetFloat();
|
|
|
|
static IConsoleVariable* NoiseGateThresholdCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseGateThreshold"));
|
|
check(NoiseGateThresholdCVar);
|
|
const float MicNoiseGateThreshold = NoiseGateThresholdCVar->GetFloat();
|
|
|
|
static IConsoleVariable* NoiseGateAttackTimeCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseAttackTime"));
|
|
check(NoiseGateAttackTimeCVar);
|
|
const float MicNoiseGateAttackTime = NoiseGateAttackTimeCVar->GetFloat();
|
|
|
|
static IConsoleVariable* NoiseGateReleaseTimeCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseReleaseTime"));
|
|
check(NoiseGateReleaseTimeCVar);
|
|
const float MicNoiseGateReleaseTime = NoiseGateReleaseTimeCVar->GetFloat();
|
|
|
|
static IConsoleVariable* MicInputGainCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicInputGain"));
|
|
check(MicInputGainCVar);
|
|
const float MicInputGain = MicInputGainCVar->GetFloat();
|
|
|
|
static IConsoleVariable* MicStereoBiasCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicStereoBias"));
|
|
check(MicStereoBiasCVar);
|
|
const float MicStereoBias = FMath::Clamp(MicStereoBiasCVar->GetFloat(), -1.0f, 1.0f);
|
|
|
|
float LeftGain = 1.0f;
|
|
float RightGain = 1.0f;
|
|
|
|
Audio::GetStereoPan(MicStereoBias, LeftGain, RightGain);
|
|
|
|
// Since we don't interpolate the pan here, we normalize stereo gains.
|
|
const float StereoGainMax = FMath::Max(LeftGain, RightGain);
|
|
LeftGain /= StereoGainMax;
|
|
RightGain /= StereoGainMax;
|
|
|
|
bool bMicReleased = false;
|
|
|
|
CurrentSampleStart = CachedSampleStart;
|
|
const int32 TotalNumFrames = CaptureLength / sizeof(int16);
|
|
|
|
//Begin looping through the first buffer:
|
|
for (int32 FrameIndex = 0; FrameIndex < TotalNumFrames; FrameIndex += NumInputChannels)
|
|
{
|
|
int16 Temp = 0;
|
|
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
Temp += InputBuffer[FrameIndex + ChannelIndex];
|
|
}
|
|
|
|
|
|
float Envelope = MicLevelDetector.ProcessSample(Temp / 32768.f);
|
|
LookaheadBuffer.ProcessSample(Temp, Temp);
|
|
|
|
bIsMicActive = Envelope > MicSilenceThreshold;
|
|
|
|
// If we have just crossed the noise gate threshold, begin interpoloating to 1.0 or 0.0
|
|
const bool bIsMicAboveNoiseGateThreshold = Envelope > MicNoiseGateThreshold;
|
|
|
|
if (bIsMicAboveNoiseGateThreshold && !bWasMicAboveNoiseGateThreshold)
|
|
{
|
|
NoiseGateAttenuator.SetValue(1.0f, MicNoiseGateAttackTime);
|
|
}
|
|
else if (!bIsMicAboveNoiseGateThreshold && bWasMicAboveNoiseGateThreshold)
|
|
{
|
|
NoiseGateAttenuator.SetValue(0.0f, MicNoiseGateReleaseTime);
|
|
}
|
|
|
|
bWasMicAboveNoiseGateThreshold = bIsMicAboveNoiseGateThreshold;
|
|
|
|
if (bIsMicActive)
|
|
{
|
|
if (bMicReleased)
|
|
{
|
|
// Apply noise gate attenuation.
|
|
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
|
|
|
|
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
|
|
SampleRef = SampleRef * BiasedMicGain;
|
|
}
|
|
|
|
ReleaseBuffer.PushFrame(&InputBuffer[FrameIndex], NumInputChannels);
|
|
|
|
|
|
if (!bSampleStartCached)
|
|
{
|
|
CachedSampleStart = SampleCounter;
|
|
bSampleStartCached = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Apply noise gate attenuation.
|
|
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
|
|
|
|
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
|
|
SampleRef = SampleRef * BiasedMicGain;
|
|
}
|
|
|
|
FMemory::Memcpy(&AudioBuffer[FrameIndex], &InputBuffer[FrameIndex], sizeof(int16) * NumInputChannels);
|
|
SamplesPushedToUncompressedAudioBuffer += NumInputChannels;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bMicReleased = true;
|
|
}
|
|
SampleCounter++;
|
|
}
|
|
|
|
//Set up second buffer and loop through that:
|
|
AudioBuffer += TotalNumFrames;
|
|
InputBuffer = (int16*)CaptureData2;
|
|
const int32 TotalNumFrames2 = CaptureLength2 / sizeof(int16);
|
|
for (int32 FrameIndex = 0; FrameIndex < TotalNumFrames2; FrameIndex += NumInputChannels)
|
|
{
|
|
int16 Temp = 0;
|
|
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
CA_SUPPRESS(6385);
|
|
Temp += InputBuffer[FrameIndex + ChannelIndex];
|
|
}
|
|
|
|
|
|
float Envelope = MicLevelDetector.ProcessSample(static_cast<float>(Temp) / 32768.f);
|
|
LookaheadBuffer.ProcessSample(Temp, Temp);
|
|
|
|
bIsMicActive = Envelope > MicSilenceThreshold;
|
|
|
|
// If we have just crossed the noise gate threshold, begin interpoloating to 1.0 or 0.0
|
|
const bool bIsMicAboveNoiseGateThreshold = Envelope > MicNoiseGateThreshold;
|
|
|
|
if (bIsMicAboveNoiseGateThreshold && !bWasMicAboveNoiseGateThreshold)
|
|
{
|
|
NoiseGateAttenuator.SetValue(1.0f, MicNoiseGateAttackTime);
|
|
}
|
|
else if (!bIsMicAboveNoiseGateThreshold && bWasMicAboveNoiseGateThreshold)
|
|
{
|
|
NoiseGateAttenuator.SetValue(0.0f, MicNoiseGateReleaseTime);
|
|
}
|
|
|
|
bWasMicAboveNoiseGateThreshold = bIsMicAboveNoiseGateThreshold;
|
|
|
|
if (bIsMicActive)
|
|
{
|
|
if (bMicReleased)
|
|
{
|
|
// Apply noise gate attenuation.
|
|
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
|
|
|
|
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
|
|
SampleRef = SampleRef * BiasedMicGain;
|
|
}
|
|
|
|
ReleaseBuffer.PushFrame(&InputBuffer[FrameIndex], NumInputChannels);
|
|
|
|
if (!bSampleStartCached)
|
|
{
|
|
CachedSampleStart = SampleCounter;
|
|
bSampleStartCached = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Apply noise gate attenuation.
|
|
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
|
|
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
|
|
{
|
|
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
|
|
|
|
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
|
|
SampleRef = SampleRef * BiasedMicGain;
|
|
}
|
|
|
|
FMemory::Memcpy(&AudioBuffer[FrameIndex], &InputBuffer[FrameIndex], sizeof(int16) * NumInputChannels);
|
|
SamplesPushedToUncompressedAudioBuffer += NumInputChannels;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
bMicReleased = true;
|
|
}
|
|
SampleCounter++;
|
|
}
|
|
|
|
if (!bSampleStartCached)
|
|
{
|
|
CachedSampleStart = SampleCounter;
|
|
}
|
|
|
|
bSampleStartCached = false;
|
|
|
|
UncompressedAudioBuffer.SetNum(Offset + (SamplesPushedToUncompressedAudioBuffer * sizeof(int16)), EAllowShrinking::No);
|
|
|
|
CA_SUPPRESS(6385);
|
|
CV->VoiceCaptureBuffer8->Unlock(CaptureData, OriginalCaptureLength, CaptureData2, OriginalCaptureLength2);
|
|
|
|
// Move the capture offset forward.
|
|
CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
|
|
CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength2) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
|
|
|
|
|
|
if (SamplesPushedToUncompressedAudioBuffer > 0)
|
|
{
|
|
VoiceCaptureState = EVoiceCaptureState::Ok;
|
|
}
|
|
else
|
|
{
|
|
VoiceCaptureState = EVoiceCaptureState::NoData;
|
|
}
|
|
|
|
#if !UE_BUILD_SHIPPING
|
|
// TODO: look at actually using something like this for time stamping
|
|
const double NewTime = FPlatformTime::Seconds();
|
|
UE_LOG(LogVoiceCapture, VeryVerbose, TEXT("LastCapture: %f %s"), (NewTime - LastCaptureTime) * 1000.0, EVoiceCaptureState::ToString(VoiceCaptureState));
|
|
LastCaptureTime = NewTime;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to lock voice buffer 0x%08x"), hr);
|
|
VoiceCaptureState = EVoiceCaptureState::Error;
|
|
}
|
|
}
|
|
}
|
|
|
|
EVoiceCaptureState::Type FVoiceCaptureWindows::GetVoiceData(uint8* OutVoiceBuffer, const uint32 InVoiceBufferSize, uint32& OutBytesWritten, uint64& OutSampleClockCounter)
|
|
{
|
|
EVoiceCaptureState::Type NewMicState = VoiceCaptureState;
|
|
OutBytesWritten = 0;
|
|
|
|
if (VoiceCaptureState == EVoiceCaptureState::Ok ||
|
|
VoiceCaptureState == EVoiceCaptureState::Stopping)
|
|
{
|
|
|
|
if (InVoiceBufferSize >= (uint32) UncompressedAudioBuffer.Num())
|
|
{
|
|
OutBytesWritten = UncompressedAudioBuffer.Num();
|
|
FMemory::Memcpy(OutVoiceBuffer, UncompressedAudioBuffer.GetData(), OutBytesWritten);
|
|
VoiceCaptureState = EVoiceCaptureState::NoData;
|
|
UncompressedAudioBuffer.Reset();
|
|
|
|
OutSampleClockCounter = CurrentSampleStart;
|
|
}
|
|
else
|
|
{
|
|
NewMicState = EVoiceCaptureState::BufferTooSmall;
|
|
}
|
|
}
|
|
|
|
// If we have any sends for this microphones output, push to them here.
|
|
if (MicrophoneOutput.Num() > 0)
|
|
{
|
|
// Convert our buffer from int16 to float:
|
|
int16* OutputData = reinterpret_cast<int16*>(OutVoiceBuffer);
|
|
uint32 NumSamples = OutBytesWritten / sizeof(int16);
|
|
ConversionBuffer.Reset();
|
|
// Note: Sample rate is unused for this operation.
|
|
ConversionBuffer.Append(OutputData, NumSamples, NumInputChannels, 16000);
|
|
|
|
if (NumInputChannels > 1)
|
|
{
|
|
// For consistency, mixdown to mono.
|
|
ConversionBuffer.MixBufferToChannels(1);
|
|
}
|
|
|
|
MicrophoneOutput.PushAudio(ConversionBuffer.GetData(), ConversionBuffer.GetNumSamples());
|
|
}
|
|
|
|
// print debug string with current amplitude:
|
|
if (DisplayAmplitudeCvar && GEngine)
|
|
{
|
|
static double TimeLastPrinted = FPlatformTime::Seconds();
|
|
|
|
static const double AmplitudeStringDisplayRate = 0.05;
|
|
static const int32 TotalNumTicks = 32;
|
|
|
|
if (FPlatformTime::Seconds() - TimeLastPrinted > AmplitudeStringDisplayRate)
|
|
{
|
|
const float MicLevel = MicLevelDetector.GetValue();
|
|
FString PrintString = FString::Printf(TEXT("Mic Amp: %.2f"), MicLevel);
|
|
|
|
int32 NumTicks = FMath::FloorToInt(MicLevel * TotalNumTicks);
|
|
|
|
for (int32 Iteration = 0; Iteration < NumTicks; Iteration++)
|
|
{
|
|
PrintString.AppendChar(TCHAR('|'));
|
|
}
|
|
|
|
FColor TextColor = FLinearColor::LerpUsingHSV(FLinearColor::Green, FLinearColor::Red, MicLevel).ToFColor(true);
|
|
|
|
GEngine->AddOnScreenDebugMessage(30, AmplitudeStringDisplayRate, TextColor, PrintString, false);
|
|
TimeLastPrinted = FPlatformTime::Seconds();
|
|
}
|
|
}
|
|
|
|
return NewMicState;
|
|
}
|
|
|
|
EVoiceCaptureState::Type FVoiceCaptureWindows::GetVoiceData(uint8* OutVoiceBuffer, uint32 InVoiceBufferSize, uint32& OutAvailableVoiceData)
|
|
{
|
|
uint64 UnusedSampleCounter = 0;
|
|
return GetVoiceData(OutVoiceBuffer, InVoiceBufferSize, OutAvailableVoiceData, UnusedSampleCounter);
|
|
}
|
|
|
|
int32 FVoiceCaptureWindows::GetBufferSize() const
|
|
{
|
|
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized)
|
|
{
|
|
return CV->VoiceCaptureBufferCaps8.dwBufferBytes;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::CreateNotifications(uint32 BufferSize)
|
|
{
|
|
bool bSuccess = false;
|
|
|
|
LPDIRECTSOUNDNOTIFY8 NotifyInt = nullptr;
|
|
|
|
HRESULT hr = CV->VoiceCaptureBuffer8->QueryInterface(IID_IDirectSoundNotify, (LPVOID*)&NotifyInt);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
// Create stop event
|
|
CV->StopEvent = CreateEvent(nullptr, true, false, nullptr);
|
|
if (CV->StopEvent == NULL || CV->StopEvent == INVALID_HANDLE_VALUE)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Error creating stop event"));
|
|
}
|
|
else
|
|
{
|
|
DSBPOSITIONNOTIFY StopEvent;
|
|
|
|
// when buffer stops
|
|
StopEvent.dwOffset = DSBPN_OFFSETSTOP;
|
|
StopEvent.hEventNotify = CV->StopEvent;
|
|
|
|
hr = NotifyInt->SetNotificationPositions(1, &StopEvent);
|
|
if (SUCCEEDED(hr))
|
|
{
|
|
bSuccess = true;
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to set stop notifications 0x%08x"), hr);
|
|
|
|
CloseHandle(CV->StopEvent);
|
|
CV->StopEvent = INVALID_HANDLE_VALUE;
|
|
}
|
|
}
|
|
|
|
NotifyInt->Release();
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice notification interface 0x%08x"), hr);
|
|
}
|
|
|
|
return bSuccess;
|
|
}
|
|
|
|
bool FVoiceCaptureWindows::Tick(float DeltaTime)
|
|
{
|
|
QUICK_SCOPE_CYCLE_COUNTER(STAT_FVoiceCaptureWindows_Tick);
|
|
|
|
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized &&
|
|
VoiceCaptureState != EVoiceCaptureState::NotCapturing)
|
|
{
|
|
ProcessData();
|
|
|
|
if (CV->StopEvent != INVALID_HANDLE_VALUE && WaitForSingleObject(CV->StopEvent, 0) == WAIT_OBJECT_0)
|
|
{
|
|
UE_LOG(LogVoiceCapture, Verbose, TEXT("Voice capture stopped"));
|
|
ResetEvent(CV->StopEvent);
|
|
VoiceCaptureState = EVoiceCaptureState::NotCapturing;
|
|
UncompressedAudioBuffer.Empty(UncompressedAudioBuffer.Max());
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
float FVoiceCaptureWindows::GetCurrentAmplitude() const
|
|
{
|
|
return MicLevelDetector.GetValue();
|
|
}
|
|
|
|
void FVoiceCaptureWindows::DumpState() const
|
|
{
|
|
#if !NO_LOGGING
|
|
if (CV)
|
|
{
|
|
extern FString PrintMSGUID(LPGUID Guid);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("Device %s"), *PrintMSGUID(&CV->VoiceCaptureDeviceGuid));
|
|
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("CaptureDev: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureDev);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("CaptureBuffer: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureBuffer8);
|
|
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("Capture Format"));
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Tag: %d"), CV->WavFormat.wFormatTag);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Channels: %d"), CV->WavFormat.nChannels);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- BitsPerSample: %d"), CV->WavFormat.wBitsPerSample);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- SamplesPerSec: %d"), CV->WavFormat.nSamplesPerSec);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- BlockAlign: %d"), CV->WavFormat.nBlockAlign);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- AvgBytesPerSec: %d"), CV->WavFormat.nAvgBytesPerSec);
|
|
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("Capture Buffer"));
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureBufferDesc.dwFlags);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- BufferBytes: %d"), CV->VoiceCaptureBufferDesc.dwBufferBytes);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Format: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureBufferDesc.lpwfxFormat);
|
|
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("Device Caps"));
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Size: %d"), CV->VoiceCaptureDevCaps.dwSize);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureDevCaps.dwFlags);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Formats: %d"), CV->VoiceCaptureDevCaps.dwFormats);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Channels: %d"), CV->VoiceCaptureDevCaps.dwChannels);
|
|
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("D3D8 Caps"));
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Size: %d"), CV->VoiceCaptureBufferCaps8.dwSize);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureBufferCaps8.dwFlags);
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("- BufferBytes: %d"), CV->VoiceCaptureBufferCaps8.dwBufferBytes);
|
|
}
|
|
else
|
|
{
|
|
UE_LOG(LogVoiceCapture, Display, TEXT("No capture device to dump state"));
|
|
}
|
|
#endif // !NO_LOGGING
|
|
}
|
|
|
|
#include "Windows/HideWindowsPlatformTypes.h"
|
|
|
|
#endif // PLATFORM_SUPPORTS_VOICE_CAPTURE
|