Files
Brandyn / Techy fcc1b09210 init
2026-04-04 15:40:51 -05:00

879 lines
28 KiB
C++

// Copyright Epic Games, Inc. All Rights Reserved.
#include "VoiceCaptureWindows.h"
#include "VoicePrivate.h"
#include "VoiceModule.h"
#include "DSP/Dsp.h"
#if PLATFORM_SUPPORTS_VOICE_CAPTURE
#include "Windows/AllowWindowsPlatformTypes.h"
static int32 DisplayAmplitudeCvar = 0;
FAutoConsoleVariableRef CVarDisplayAmplitude(
TEXT("voice.debug.PrintAmplitude"),
DisplayAmplitudeCvar,
TEXT("when set to 1, the current incoming amplitude of the VOIP engine will be displayed on screen.\n")
TEXT("0: disabled, 1: enabled."),
ECVF_Default);
struct FVoiceCaptureWindowsVars
{
/** GUID of current voice capture device */
GUID VoiceCaptureDeviceGuid;
/** Voice capture device */
LPDIRECTSOUNDCAPTURE8 VoiceCaptureDev;
/** Voice capture device caps */
DSCCAPS VoiceCaptureDevCaps;
/** Voice capture buffer */
LPDIRECTSOUNDCAPTUREBUFFER8 VoiceCaptureBuffer8;
/** Wave format of buffer */
WAVEFORMATEX WavFormat;
/** Buffer description */
DSCBUFFERDESC VoiceCaptureBufferDesc;
/** Buffer caps */
DSCBCAPS VoiceCaptureBufferCaps8;
/** Notification events */
HANDLE StopEvent;
/** Current audio position of valid data in capture buffer */
DWORD NextCaptureOffset;
FVoiceCaptureWindowsVars() :
VoiceCaptureDev(nullptr),
VoiceCaptureBuffer8(nullptr),
NextCaptureOffset(0)
{
StopEvent = INVALID_HANDLE_VALUE;
Reset();
}
void Reset()
{
if (StopEvent != INVALID_HANDLE_VALUE)
{
CloseHandle(StopEvent);
StopEvent = INVALID_HANDLE_VALUE;
}
// Free up DirectSound resources
if (VoiceCaptureBuffer8)
{
VoiceCaptureBuffer8->Release();
VoiceCaptureBuffer8 = nullptr;
}
if (VoiceCaptureDev)
{
VoiceCaptureDev->Release();
VoiceCaptureDev = nullptr;
}
NextCaptureOffset = 0;
FMemory::Memzero(&VoiceCaptureDeviceGuid, sizeof(GUID));
FMemory::Memzero(&VoiceCaptureDevCaps, sizeof(VoiceCaptureDevCaps));
FMemory::Memzero(&WavFormat, sizeof(WavFormat));
FMemory::Memzero(&VoiceCaptureBufferDesc, sizeof(VoiceCaptureBufferDesc));
FMemory::Memzero(&VoiceCaptureBufferCaps8, sizeof(VoiceCaptureBufferCaps8));
}
};
FVoiceCaptureWindows::FVoiceCaptureWindows() :
CV(nullptr),
LastCaptureTime(0.0),
VoiceCaptureState(EVoiceCaptureState::UnInitialized)
{
CV = new FVoiceCaptureWindowsVars();
}
FVoiceCaptureWindows::~FVoiceCaptureWindows()
{
Shutdown();
FVoiceCaptureDeviceWindows* VoiceCaptureDev = FVoiceCaptureDeviceWindows::Get();
if (VoiceCaptureDev)
{
VoiceCaptureDev->FreeVoiceCaptureObject(this);
}
if (CV)
{
delete CV;
CV = nullptr;
}
}
bool FVoiceCaptureWindows::Init(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
{
FVoiceCaptureDeviceWindows* VoiceDev = FVoiceCaptureDeviceWindows::Get();
if (!VoiceDev)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture interface."));
return false;
}
// init the sample counter to 0 on init
SampleCounter = 0;
CachedSampleStart = 0;
// set up level detector
static IConsoleVariable* SilenceDetectionAttackCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionAttackTime"));
check(SilenceDetectionAttackCVar);
static IConsoleVariable* SilenceDetectionReleaseCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionReleaseTime"));
check(SilenceDetectionReleaseCVar);
Audio::FInlineEnvelopeFollowerInitParams EnvelopeFollowerInitParams;
EnvelopeFollowerInitParams.SampleRate = SampleRate;
EnvelopeFollowerInitParams.AttackTimeMsec = SilenceDetectionAttackCVar->GetFloat();
EnvelopeFollowerInitParams.ReleaseTimeMsec = SilenceDetectionReleaseCVar->GetFloat();
EnvelopeFollowerInitParams.Mode = Audio::EPeakMode::Peak;
EnvelopeFollowerInitParams.bIsAnalog = MicSilenceDetectionConfig::IsAnalog;
MicLevelDetector.Init(EnvelopeFollowerInitParams);
const int32 AttackInSamples = SampleRate * SilenceDetectionAttackCVar->GetFloat() * 0.001f;
LookaheadBuffer.Init(AttackInSamples + 1);
LookaheadBuffer.SetDelay(AttackInSamples);
NoiseGateAttenuator.Init(SampleRate);
bIsMicActive = false;
bWasMicAboveNoiseGateThreshold = false;
return CreateCaptureBuffer(DeviceName.IsEmpty() ? VoiceDev->DefaultVoiceCaptureDevice.DeviceName : DeviceName, SampleRate, NumChannels);
}
bool FVoiceCaptureWindows::CreateCaptureBuffer(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
{
// Free the previous buffer
FreeCaptureBuffer();
VoiceCaptureState = EVoiceCaptureState::NotCapturing;
if (SampleRate < 8000 || SampleRate > 48000)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Voice capture doesn't support %d hz"), SampleRate);
return false;
}
if (NumChannels < 0 || NumChannels > 2)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Voice capture only supports 1 or 2 channels"));
return false;
}
FVoiceCaptureDeviceWindows* VoiceDev = FVoiceCaptureDeviceWindows::Get();
if (!VoiceDev)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture interface."));
return false;
}
FVoiceCaptureDeviceWindows::FCaptureDeviceInfo* DeviceInfo = nullptr;
if (DeviceName.IsEmpty())
{
DeviceInfo = &VoiceDev->DefaultVoiceCaptureDevice;
}
else
{
DeviceInfo = VoiceDev->Devices.Find(DeviceName);
}
if (DeviceInfo)
{
UE_LOG(LogVoiceCapture, Display, TEXT("Creating capture %s [%d:%d]"), *DeviceInfo->DeviceName, SampleRate, NumChannels);
CV->VoiceCaptureDeviceGuid = DeviceInfo->DeviceId;
// DSDEVID_DefaultCapture WAVEINCAPS
HRESULT hr = DirectSoundCaptureCreate8(&DeviceInfo->DeviceId, &CV->VoiceCaptureDev, nullptr);
if (FAILED(hr))
{
//DSERR_ALLOCATED, DSERR_INVALIDPARAM, DSERR_NOAGGREGATION, DSERR_OUTOFMEMORY
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create capture device 0x%08x"), hr);
return false;
}
// Device capabilities
CV->VoiceCaptureDevCaps.dwSize = sizeof(DSCCAPS);
hr = CV->VoiceCaptureDev->GetCaps(&CV->VoiceCaptureDevCaps);
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get mic device caps 0x%08x"), hr);
return false;
}
// Wave format setup
CV->WavFormat.wFormatTag = WAVE_FORMAT_PCM;
CV->WavFormat.nChannels = NumChannels;
CV->WavFormat.wBitsPerSample = 16;
CV->WavFormat.nSamplesPerSec = SampleRate;
CV->WavFormat.nBlockAlign = (CV->WavFormat.nChannels * CV->WavFormat.wBitsPerSample) / 8;
CV->WavFormat.nAvgBytesPerSec = CV->WavFormat.nBlockAlign * CV->WavFormat.nSamplesPerSec;
CV->WavFormat.cbSize = 0;
// Buffer setup
CV->VoiceCaptureBufferDesc.dwSize = sizeof(DSCBUFFERDESC);
CV->VoiceCaptureBufferDesc.dwFlags = 0;
CV->VoiceCaptureBufferDesc.dwBufferBytes = CV->WavFormat.nAvgBytesPerSec / 2; // 0.5 sec buffer
CV->VoiceCaptureBufferDesc.dwReserved = 0;
CV->VoiceCaptureBufferDesc.lpwfxFormat = &CV->WavFormat;
CV->VoiceCaptureBufferDesc.dwFXCount = 0;
CV->VoiceCaptureBufferDesc.lpDSCFXDesc = nullptr;
LPDIRECTSOUNDCAPTUREBUFFER VoiceBuffer = nullptr;
hr = CV->VoiceCaptureDev->CreateCaptureBuffer(&CV->VoiceCaptureBufferDesc, &VoiceBuffer, nullptr);
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice capture buffer 0x%08x"), hr);
return false;
}
hr = VoiceBuffer->QueryInterface(IID_IDirectSoundCaptureBuffer8, (LPVOID*)&CV->VoiceCaptureBuffer8);
VoiceBuffer->Release();
VoiceBuffer = nullptr;
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice capture buffer 0x%08x"), hr);
return false;
}
CV->VoiceCaptureBufferCaps8.dwSize = sizeof(DSCBCAPS);
hr = CV->VoiceCaptureBuffer8->GetCaps(&CV->VoiceCaptureBufferCaps8);
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer caps 0x%08x"), hr);
return false;
}
// TEST ------------------------
if (0)
{
DWORD SizeWritten8 = 0;
CV->VoiceCaptureBuffer8->GetFormat(nullptr, sizeof(WAVEFORMATEX), &SizeWritten8);
LPWAVEFORMATEX BufferFormat8 = (WAVEFORMATEX*)FMemory::Malloc(SizeWritten8);
CV->VoiceCaptureBuffer8->GetFormat(BufferFormat8, SizeWritten8, &SizeWritten8);
FMemory::Free(BufferFormat8);
}
// TEST ------------------------
if (CreateNotifications(CV->VoiceCaptureBufferCaps8.dwBufferBytes))
{
// Reset notification related values
LastCaptureTime = FPlatformTime::Seconds();
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice buffer notifications"));
return false;
}
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("No voice capture device %s found."), *DeviceName);
return false;
}
UncompressedAudioBuffer.Init(0, CV->VoiceCaptureBufferDesc.dwBufferBytes);
check(UncompressedAudioBuffer.Max() >= (int32)CV->VoiceCaptureBufferCaps8.dwBufferBytes);
NumInputChannels = CV->WavFormat.nChannels;
ReleaseBuffer.Init((int32)CV->VoiceCaptureBufferCaps8.dwBufferBytes);
ReleaseBuffer.SetDelay(1);
return true;
}
void FVoiceCaptureWindows::FreeCaptureBuffer()
{
// Stop playback
Stop();
// Release all D3D8 resources
CV->Reset();
VoiceCaptureState = EVoiceCaptureState::UnInitialized;
}
void FVoiceCaptureWindows::Shutdown()
{
FreeCaptureBuffer();
}
bool FVoiceCaptureWindows::Start()
{
check(VoiceCaptureState != EVoiceCaptureState::UnInitialized);
if (CV->VoiceCaptureBuffer8 == nullptr)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("CV->VoiceCaptureBuffer8 == nullptr"));
return false;
}
HRESULT hr = CV->VoiceCaptureBuffer8->Start(DSCBSTART_LOOPING);
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to start capture 0x%08x"), hr);
return false;
}
VoiceCaptureState = EVoiceCaptureState::NoData;
return true;
}
void FVoiceCaptureWindows::Stop()
{
if (CV->VoiceCaptureBuffer8 &&
VoiceCaptureState != EVoiceCaptureState::Stopping &&
VoiceCaptureState != EVoiceCaptureState::NotCapturing)
{
CV->VoiceCaptureBuffer8->Stop();
VoiceCaptureState = EVoiceCaptureState::Stopping;
}
}
bool FVoiceCaptureWindows::ChangeDevice(const FString& DeviceName, int32 SampleRate, int32 NumChannels)
{
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized)
{
return CreateCaptureBuffer(DeviceName, SampleRate, NumChannels);
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Unable to change device, not initialized"));
return false;
}
}
bool FVoiceCaptureWindows::IsCapturing()
{
if (CV->VoiceCaptureBuffer8)
{
DWORD Status = 0;
HRESULT hr = CV->VoiceCaptureBuffer8->GetStatus(&Status);
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer status 0x%08x"), hr);
}
// Status & DSCBSTATUS_LOOPING
return Status & DSCBSTATUS_CAPTURING ? true : false;
}
return false;
}
EVoiceCaptureState::Type FVoiceCaptureWindows::GetCaptureState(uint32& OutAvailableVoiceData) const
{
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized &&
VoiceCaptureState != EVoiceCaptureState::Error)
{
OutAvailableVoiceData = UncompressedAudioBuffer.Num();
}
else
{
OutAvailableVoiceData = 0;
}
return VoiceCaptureState;
}
void FVoiceCaptureWindows::ProcessData()
{
DWORD CurrentCapturePos = 0;
DWORD CurrentReadPos = 0;
HRESULT hr = CV->VoiceCaptureBuffer8 ? CV->VoiceCaptureBuffer8->GetCurrentPosition(&CurrentCapturePos, &CurrentReadPos) : E_FAIL;
if (FAILED(hr))
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to get voice buffer cursor position 0x%08x"), hr);
VoiceCaptureState = EVoiceCaptureState::Error;
return;
}
DWORD LockSize = ((CurrentReadPos - CV->NextCaptureOffset) + CV->VoiceCaptureBufferCaps8.dwBufferBytes) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
if(LockSize != 0)
{
//UE_LOG( LogVoiceCapture, Log, TEXT( "LockSize: %i, CurrentCapturePos: %i, CurrentReadPos: %i, NextCaptureOffset: %i" ), LockSize, CurrentCapturePos, CurrentReadPos, CV->NextCaptureOffset );
DWORD CaptureFlags = 0;
DWORD CaptureLength = 0;
void* CaptureData = nullptr;
DWORD CaptureLength2 = 0;
void* CaptureData2 = nullptr;
hr = CV->VoiceCaptureBuffer8->Lock(CV->NextCaptureOffset, LockSize,
&CaptureData, &CaptureLength,
&CaptureData2, &CaptureLength2, CaptureFlags);
if (SUCCEEDED(hr))
{
const DWORD OriginalCaptureLength = CaptureLength;
const DWORD OriginalCaptureLength2 = CaptureLength2;
if (UncompressedAudioBuffer.Num() + CaptureLength + CaptureLength2 > (DWORD)UncompressedAudioBuffer.Max())
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Resetting UncompressedAudioBuffer."));
UncompressedAudioBuffer.Empty(UncompressedAudioBuffer.Max());
VoiceCaptureState = EVoiceCaptureState::NoData;
}
const int32 Offset = UncompressedAudioBuffer.Num();
CaptureLength = FMath::Min(CaptureLength, (DWORD)UncompressedAudioBuffer.Max());
CaptureLength2 = FMath::Min(CaptureLength2, (DWORD)UncompressedAudioBuffer.Max() - CaptureLength);
UncompressedAudioBuffer.AddUninitialized(CaptureLength + CaptureLength2 + (ReleaseBuffer.GetBufferCount() * sizeof(int16)));
int16* AudioBuffer = (int16*)(UncompressedAudioBuffer.GetData() + Offset);
int16* InputBuffer = (int16*)CaptureData;
//First, if we have any cached audio from an onset mid-buffer, copy it in:
int32 SamplesPushedToUncompressedAudioBuffer = ReleaseBuffer.PopBufferedAudio(AudioBuffer, ReleaseBuffer.GetBufferCount());
AudioBuffer += SamplesPushedToUncompressedAudioBuffer;
static IConsoleVariable* SilenceDetectionThresholdCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.SilenceDetectionThreshold"));
check(SilenceDetectionThresholdCVar);
const float MicSilenceThreshold = SilenceDetectionThresholdCVar->GetFloat();
static IConsoleVariable* NoiseGateThresholdCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseGateThreshold"));
check(NoiseGateThresholdCVar);
const float MicNoiseGateThreshold = NoiseGateThresholdCVar->GetFloat();
static IConsoleVariable* NoiseGateAttackTimeCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseAttackTime"));
check(NoiseGateAttackTimeCVar);
const float MicNoiseGateAttackTime = NoiseGateAttackTimeCVar->GetFloat();
static IConsoleVariable* NoiseGateReleaseTimeCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicNoiseReleaseTime"));
check(NoiseGateReleaseTimeCVar);
const float MicNoiseGateReleaseTime = NoiseGateReleaseTimeCVar->GetFloat();
static IConsoleVariable* MicInputGainCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicInputGain"));
check(MicInputGainCVar);
const float MicInputGain = MicInputGainCVar->GetFloat();
static IConsoleVariable* MicStereoBiasCVar = IConsoleManager::Get().FindConsoleVariable(TEXT("voice.MicStereoBias"));
check(MicStereoBiasCVar);
const float MicStereoBias = FMath::Clamp(MicStereoBiasCVar->GetFloat(), -1.0f, 1.0f);
float LeftGain = 1.0f;
float RightGain = 1.0f;
Audio::GetStereoPan(MicStereoBias, LeftGain, RightGain);
// Since we don't interpolate the pan here, we normalize stereo gains.
const float StereoGainMax = FMath::Max(LeftGain, RightGain);
LeftGain /= StereoGainMax;
RightGain /= StereoGainMax;
bool bMicReleased = false;
CurrentSampleStart = CachedSampleStart;
const int32 TotalNumFrames = CaptureLength / sizeof(int16);
//Begin looping through the first buffer:
for (int32 FrameIndex = 0; FrameIndex < TotalNumFrames; FrameIndex += NumInputChannels)
{
int16 Temp = 0;
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
Temp += InputBuffer[FrameIndex + ChannelIndex];
}
float Envelope = MicLevelDetector.ProcessSample(Temp / 32768.f);
LookaheadBuffer.ProcessSample(Temp, Temp);
bIsMicActive = Envelope > MicSilenceThreshold;
// If we have just crossed the noise gate threshold, begin interpoloating to 1.0 or 0.0
const bool bIsMicAboveNoiseGateThreshold = Envelope > MicNoiseGateThreshold;
if (bIsMicAboveNoiseGateThreshold && !bWasMicAboveNoiseGateThreshold)
{
NoiseGateAttenuator.SetValue(1.0f, MicNoiseGateAttackTime);
}
else if (!bIsMicAboveNoiseGateThreshold && bWasMicAboveNoiseGateThreshold)
{
NoiseGateAttenuator.SetValue(0.0f, MicNoiseGateReleaseTime);
}
bWasMicAboveNoiseGateThreshold = bIsMicAboveNoiseGateThreshold;
if (bIsMicActive)
{
if (bMicReleased)
{
// Apply noise gate attenuation.
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
SampleRef = SampleRef * BiasedMicGain;
}
ReleaseBuffer.PushFrame(&InputBuffer[FrameIndex], NumInputChannels);
if (!bSampleStartCached)
{
CachedSampleStart = SampleCounter;
bSampleStartCached = true;
}
}
else
{
// Apply noise gate attenuation.
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
SampleRef = SampleRef * BiasedMicGain;
}
FMemory::Memcpy(&AudioBuffer[FrameIndex], &InputBuffer[FrameIndex], sizeof(int16) * NumInputChannels);
SamplesPushedToUncompressedAudioBuffer += NumInputChannels;
}
}
else
{
bMicReleased = true;
}
SampleCounter++;
}
//Set up second buffer and loop through that:
AudioBuffer += TotalNumFrames;
InputBuffer = (int16*)CaptureData2;
const int32 TotalNumFrames2 = CaptureLength2 / sizeof(int16);
for (int32 FrameIndex = 0; FrameIndex < TotalNumFrames2; FrameIndex += NumInputChannels)
{
int16 Temp = 0;
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
CA_SUPPRESS(6385);
Temp += InputBuffer[FrameIndex + ChannelIndex];
}
float Envelope = MicLevelDetector.ProcessSample(static_cast<float>(Temp) / 32768.f);
LookaheadBuffer.ProcessSample(Temp, Temp);
bIsMicActive = Envelope > MicSilenceThreshold;
// If we have just crossed the noise gate threshold, begin interpoloating to 1.0 or 0.0
const bool bIsMicAboveNoiseGateThreshold = Envelope > MicNoiseGateThreshold;
if (bIsMicAboveNoiseGateThreshold && !bWasMicAboveNoiseGateThreshold)
{
NoiseGateAttenuator.SetValue(1.0f, MicNoiseGateAttackTime);
}
else if (!bIsMicAboveNoiseGateThreshold && bWasMicAboveNoiseGateThreshold)
{
NoiseGateAttenuator.SetValue(0.0f, MicNoiseGateReleaseTime);
}
bWasMicAboveNoiseGateThreshold = bIsMicAboveNoiseGateThreshold;
if (bIsMicActive)
{
if (bMicReleased)
{
// Apply noise gate attenuation.
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
SampleRef = SampleRef * BiasedMicGain;
}
ReleaseBuffer.PushFrame(&InputBuffer[FrameIndex], NumInputChannels);
if (!bSampleStartCached)
{
CachedSampleStart = SampleCounter;
bSampleStartCached = true;
}
}
else
{
// Apply noise gate attenuation.
const float TotalMicGain = MicInputGain * NoiseGateAttenuator.GetNextValue();
for (int32 ChannelIndex = 0; ChannelIndex < NumInputChannels; ChannelIndex++)
{
const float BiasedMicGain = (ChannelIndex % 2 == 0) ? (TotalMicGain * LeftGain) : (TotalMicGain * RightGain);
Audio::TSampleRef<int16> SampleRef(InputBuffer[FrameIndex + ChannelIndex]);
SampleRef = SampleRef * BiasedMicGain;
}
FMemory::Memcpy(&AudioBuffer[FrameIndex], &InputBuffer[FrameIndex], sizeof(int16) * NumInputChannels);
SamplesPushedToUncompressedAudioBuffer += NumInputChannels;
}
}
else
{
bMicReleased = true;
}
SampleCounter++;
}
if (!bSampleStartCached)
{
CachedSampleStart = SampleCounter;
}
bSampleStartCached = false;
UncompressedAudioBuffer.SetNum(Offset + (SamplesPushedToUncompressedAudioBuffer * sizeof(int16)), EAllowShrinking::No);
CA_SUPPRESS(6385);
CV->VoiceCaptureBuffer8->Unlock(CaptureData, OriginalCaptureLength, CaptureData2, OriginalCaptureLength2);
// Move the capture offset forward.
CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
CV->NextCaptureOffset = (CV->NextCaptureOffset + CaptureLength2) % CV->VoiceCaptureBufferCaps8.dwBufferBytes;
if (SamplesPushedToUncompressedAudioBuffer > 0)
{
VoiceCaptureState = EVoiceCaptureState::Ok;
}
else
{
VoiceCaptureState = EVoiceCaptureState::NoData;
}
#if !UE_BUILD_SHIPPING
// TODO: look at actually using something like this for time stamping
const double NewTime = FPlatformTime::Seconds();
UE_LOG(LogVoiceCapture, VeryVerbose, TEXT("LastCapture: %f %s"), (NewTime - LastCaptureTime) * 1000.0, EVoiceCaptureState::ToString(VoiceCaptureState));
LastCaptureTime = NewTime;
#endif
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to lock voice buffer 0x%08x"), hr);
VoiceCaptureState = EVoiceCaptureState::Error;
}
}
}
EVoiceCaptureState::Type FVoiceCaptureWindows::GetVoiceData(uint8* OutVoiceBuffer, const uint32 InVoiceBufferSize, uint32& OutBytesWritten, uint64& OutSampleClockCounter)
{
EVoiceCaptureState::Type NewMicState = VoiceCaptureState;
OutBytesWritten = 0;
if (VoiceCaptureState == EVoiceCaptureState::Ok ||
VoiceCaptureState == EVoiceCaptureState::Stopping)
{
if (InVoiceBufferSize >= (uint32) UncompressedAudioBuffer.Num())
{
OutBytesWritten = UncompressedAudioBuffer.Num();
FMemory::Memcpy(OutVoiceBuffer, UncompressedAudioBuffer.GetData(), OutBytesWritten);
VoiceCaptureState = EVoiceCaptureState::NoData;
UncompressedAudioBuffer.Reset();
OutSampleClockCounter = CurrentSampleStart;
}
else
{
NewMicState = EVoiceCaptureState::BufferTooSmall;
}
}
// If we have any sends for this microphones output, push to them here.
if (MicrophoneOutput.Num() > 0)
{
// Convert our buffer from int16 to float:
int16* OutputData = reinterpret_cast<int16*>(OutVoiceBuffer);
uint32 NumSamples = OutBytesWritten / sizeof(int16);
ConversionBuffer.Reset();
// Note: Sample rate is unused for this operation.
ConversionBuffer.Append(OutputData, NumSamples, NumInputChannels, 16000);
if (NumInputChannels > 1)
{
// For consistency, mixdown to mono.
ConversionBuffer.MixBufferToChannels(1);
}
MicrophoneOutput.PushAudio(ConversionBuffer.GetData(), ConversionBuffer.GetNumSamples());
}
// print debug string with current amplitude:
if (DisplayAmplitudeCvar && GEngine)
{
static double TimeLastPrinted = FPlatformTime::Seconds();
static const double AmplitudeStringDisplayRate = 0.05;
static const int32 TotalNumTicks = 32;
if (FPlatformTime::Seconds() - TimeLastPrinted > AmplitudeStringDisplayRate)
{
const float MicLevel = MicLevelDetector.GetValue();
FString PrintString = FString::Printf(TEXT("Mic Amp: %.2f"), MicLevel);
int32 NumTicks = FMath::FloorToInt(MicLevel * TotalNumTicks);
for (int32 Iteration = 0; Iteration < NumTicks; Iteration++)
{
PrintString.AppendChar(TCHAR('|'));
}
FColor TextColor = FLinearColor::LerpUsingHSV(FLinearColor::Green, FLinearColor::Red, MicLevel).ToFColor(true);
GEngine->AddOnScreenDebugMessage(30, AmplitudeStringDisplayRate, TextColor, PrintString, false);
TimeLastPrinted = FPlatformTime::Seconds();
}
}
return NewMicState;
}
EVoiceCaptureState::Type FVoiceCaptureWindows::GetVoiceData(uint8* OutVoiceBuffer, uint32 InVoiceBufferSize, uint32& OutAvailableVoiceData)
{
uint64 UnusedSampleCounter = 0;
return GetVoiceData(OutVoiceBuffer, InVoiceBufferSize, OutAvailableVoiceData, UnusedSampleCounter);
}
int32 FVoiceCaptureWindows::GetBufferSize() const
{
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized)
{
return CV->VoiceCaptureBufferCaps8.dwBufferBytes;
}
return 0;
}
bool FVoiceCaptureWindows::CreateNotifications(uint32 BufferSize)
{
bool bSuccess = false;
LPDIRECTSOUNDNOTIFY8 NotifyInt = nullptr;
HRESULT hr = CV->VoiceCaptureBuffer8->QueryInterface(IID_IDirectSoundNotify, (LPVOID*)&NotifyInt);
if (SUCCEEDED(hr))
{
// Create stop event
CV->StopEvent = CreateEvent(nullptr, true, false, nullptr);
if (CV->StopEvent == NULL || CV->StopEvent == INVALID_HANDLE_VALUE)
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Error creating stop event"));
}
else
{
DSBPOSITIONNOTIFY StopEvent;
// when buffer stops
StopEvent.dwOffset = DSBPN_OFFSETSTOP;
StopEvent.hEventNotify = CV->StopEvent;
hr = NotifyInt->SetNotificationPositions(1, &StopEvent);
if (SUCCEEDED(hr))
{
bSuccess = true;
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to set stop notifications 0x%08x"), hr);
CloseHandle(CV->StopEvent);
CV->StopEvent = INVALID_HANDLE_VALUE;
}
}
NotifyInt->Release();
}
else
{
UE_LOG(LogVoiceCapture, Warning, TEXT("Failed to create voice notification interface 0x%08x"), hr);
}
return bSuccess;
}
bool FVoiceCaptureWindows::Tick(float DeltaTime)
{
QUICK_SCOPE_CYCLE_COUNTER(STAT_FVoiceCaptureWindows_Tick);
if (VoiceCaptureState != EVoiceCaptureState::UnInitialized &&
VoiceCaptureState != EVoiceCaptureState::NotCapturing)
{
ProcessData();
if (CV->StopEvent != INVALID_HANDLE_VALUE && WaitForSingleObject(CV->StopEvent, 0) == WAIT_OBJECT_0)
{
UE_LOG(LogVoiceCapture, Verbose, TEXT("Voice capture stopped"));
ResetEvent(CV->StopEvent);
VoiceCaptureState = EVoiceCaptureState::NotCapturing;
UncompressedAudioBuffer.Empty(UncompressedAudioBuffer.Max());
}
}
return true;
}
float FVoiceCaptureWindows::GetCurrentAmplitude() const
{
return MicLevelDetector.GetValue();
}
void FVoiceCaptureWindows::DumpState() const
{
#if !NO_LOGGING
if (CV)
{
extern FString PrintMSGUID(LPGUID Guid);
UE_LOG(LogVoiceCapture, Display, TEXT("Device %s"), *PrintMSGUID(&CV->VoiceCaptureDeviceGuid));
UE_LOG(LogVoiceCapture, Display, TEXT("CaptureDev: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureDev);
UE_LOG(LogVoiceCapture, Display, TEXT("CaptureBuffer: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureBuffer8);
UE_LOG(LogVoiceCapture, Display, TEXT("Capture Format"));
UE_LOG(LogVoiceCapture, Display, TEXT("- Tag: %d"), CV->WavFormat.wFormatTag);
UE_LOG(LogVoiceCapture, Display, TEXT("- Channels: %d"), CV->WavFormat.nChannels);
UE_LOG(LogVoiceCapture, Display, TEXT("- BitsPerSample: %d"), CV->WavFormat.wBitsPerSample);
UE_LOG(LogVoiceCapture, Display, TEXT("- SamplesPerSec: %d"), CV->WavFormat.nSamplesPerSec);
UE_LOG(LogVoiceCapture, Display, TEXT("- BlockAlign: %d"), CV->WavFormat.nBlockAlign);
UE_LOG(LogVoiceCapture, Display, TEXT("- AvgBytesPerSec: %d"), CV->WavFormat.nAvgBytesPerSec);
UE_LOG(LogVoiceCapture, Display, TEXT("Capture Buffer"));
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureBufferDesc.dwFlags);
UE_LOG(LogVoiceCapture, Display, TEXT("- BufferBytes: %d"), CV->VoiceCaptureBufferDesc.dwBufferBytes);
UE_LOG(LogVoiceCapture, Display, TEXT("- Format: 0x%08" UPTRINT_x_FMT), CV->VoiceCaptureBufferDesc.lpwfxFormat);
UE_LOG(LogVoiceCapture, Display, TEXT("Device Caps"));
UE_LOG(LogVoiceCapture, Display, TEXT("- Size: %d"), CV->VoiceCaptureDevCaps.dwSize);
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureDevCaps.dwFlags);
UE_LOG(LogVoiceCapture, Display, TEXT("- Formats: %d"), CV->VoiceCaptureDevCaps.dwFormats);
UE_LOG(LogVoiceCapture, Display, TEXT("- Channels: %d"), CV->VoiceCaptureDevCaps.dwChannels);
UE_LOG(LogVoiceCapture, Display, TEXT("D3D8 Caps"));
UE_LOG(LogVoiceCapture, Display, TEXT("- Size: %d"), CV->VoiceCaptureBufferCaps8.dwSize);
UE_LOG(LogVoiceCapture, Display, TEXT("- Flags: 0x%08x"), CV->VoiceCaptureBufferCaps8.dwFlags);
UE_LOG(LogVoiceCapture, Display, TEXT("- BufferBytes: %d"), CV->VoiceCaptureBufferCaps8.dwBufferBytes);
}
else
{
UE_LOG(LogVoiceCapture, Display, TEXT("No capture device to dump state"));
}
#endif // !NO_LOGGING
}
#include "Windows/HideWindowsPlatformTypes.h"
#endif // PLATFORM_SUPPORTS_VOICE_CAPTURE