From f14f77724d6a06b759280c0885ce1102388faa38 Mon Sep 17 00:00:00 2001 From: Kae <80987908+Novaenia@users.noreply.github.com> Date: Thu, 13 Jul 2023 20:47:53 +1000 Subject: [PATCH] more voice work!!! --- .../application/StarMainApplication_sdl.cpp | 22 +- source/client/StarClientApplication.cpp | 5 + source/core/StarCurve25519.cpp | 2 - source/frontend/StarVoice.cpp | 197 +++++++++++++++++- source/frontend/StarVoice.hpp | 36 +++- 5 files changed, 241 insertions(+), 21 deletions(-) diff --git a/source/application/StarMainApplication_sdl.cpp b/source/application/StarMainApplication_sdl.cpp index 8f47eeb..86b9a10 100644 --- a/source/application/StarMainApplication_sdl.cpp +++ b/source/application/StarMainApplication_sdl.cpp @@ -248,9 +248,15 @@ public: if (SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER)) throw ApplicationException(strf("Couldn't initialize SDL Controller: {}", SDL_GetError())); - Logger::info("Application: Initializing SDL Sound"); +#ifdef STAR_SYSTEM_WINDOWS // Newer SDL is defaulting to xaudio2, which does not support audio capture + SDL_setenv("SDL_AUDIODRIVER", "directsound", 1); +#endif + + Logger::info("Application: Initializing SDL Audio"); if (SDL_InitSubSystem(SDL_INIT_AUDIO)) - throw ApplicationException(strf("Couldn't initialize SDL Sound: {}", SDL_GetError())); + throw ApplicationException(strf("Couldn't initialize SDL Audio: {}", SDL_GetError())); + + Logger::info("Application: using Audio Driver '{}'", SDL_GetCurrentAudioDriver()); SDL_JoystickEventState(SDL_ENABLE); @@ -336,17 +342,23 @@ public: closeAudioInputDevice(); SDL_AudioSpec obtained = {}; - return (m_sdlAudioInputDevice = SDL_OpenAudioDevice(name, 1, &desired, &obtained, 0)) != 0; + m_sdlAudioInputDevice = SDL_OpenAudioDevice(name, 1, &desired, &obtained, 0); + + if (m_sdlAudioInputDevice) + Logger::info("Opened audio input device '{}'", SDL_GetAudioDeviceName(m_sdlAudioInputDevice, 1)); + else + Logger::info("Failed to open audio input device: {}", SDL_GetError()); + + return m_sdlAudioInputDevice != 0; } bool closeAudioInputDevice() { if (m_sdlAudioInputDevice) { + Logger::info("Closing audio input device '{}'", SDL_GetAudioDeviceName(m_sdlAudioInputDevice, 1)); SDL_CloseAudioDevice(m_sdlAudioInputDevice); m_sdlAudioInputDevice = 0; - return true; } - return false; } diff --git a/source/client/StarClientApplication.cpp b/source/client/StarClientApplication.cpp index c58b6b5..f1c7595 100644 --- a/source/client/StarClientApplication.cpp +++ b/source/client/StarClientApplication.cpp @@ -208,6 +208,11 @@ void ClientApplication::applicationInit(ApplicationControllerPtr appController) appController->setMaxFrameSkip(assets->json("/client.config:maxFrameSkip").toUInt()); appController->setUpdateTrackWindow(assets->json("/client.config:updateTrackWindow").toFloat()); + + if (auto jVoice = configuration->get("voice")) + m_voice->loadJson(jVoice.toObject()); + + m_voice->init(); } void ClientApplication::renderInit(RendererPtr renderer) { diff --git a/source/core/StarCurve25519.cpp b/source/core/StarCurve25519.cpp index 3fdddea..f00c193 100644 --- a/source/core/StarCurve25519.cpp +++ b/source/core/StarCurve25519.cpp @@ -20,8 +20,6 @@ struct KeySet { secret[31] |= 64; ed25519_CreateKeyPair(publicKey.data(), privateKey.data(), nullptr, secret.data()); - - Logger::info("Generated Curve25519 key-pair"); } }; diff --git a/source/frontend/StarVoice.cpp b/source/frontend/StarVoice.cpp index 436461b..e5cb299 100644 --- a/source/frontend/StarVoice.cpp +++ b/source/frontend/StarVoice.cpp @@ -1,8 +1,11 @@ #include "StarVoice.hpp" #include "StarFormat.hpp" #include "StarApplicationController.hpp" +#include "StarTime.hpp" +#include "StarRoot.hpp" #include "opus/include/opus.h" +#include #include "SDL.h" constexpr int VOICE_SAMPLE_RATE = 48000; @@ -25,10 +28,129 @@ EnumMap const VoiceChannelModeNames{ {VoiceChannelMode::Stereo, "Stereo"} }; +float getAudioChunkLoudness(int16_t* data, size_t samples) { + if (!samples) + return 0.f; + + double rms = 0.; + for (size_t i = 0; i != samples; ++i) { + float sample = (float)data[i] / 32767.f; + rms += (double)(sample * sample); + } + + float fRms = sqrtf((float)(rms / samples)); + + if (fRms > 0) + return std::clamp(20.f * log10f(fRms), -127.f, 0.f); + else + return -127.f; +} + +float getAudioLoudness(int16_t* data, size_t samples) { + constexpr size_t CHUNK_SIZE = 50; + + float highest = -127.f; + for (size_t i = 0; i < samples; i += CHUNK_SIZE) { + float level = getAudioChunkLoudness(data + i, std::min(i + CHUNK_SIZE, samples) - i); + if (level > highest) + highest = level; + } + + return highest; +} + +struct VoiceAudioChunk { + std::unique_ptr data; + size_t remaining; + size_t offset = 0; + + VoiceAudioChunk(int16_t* ptr, size_t size) { + data.reset(ptr); + remaining = size; + offset = 0; + } + + inline size_t takeSamples(std::vector& out, size_t count) { + size_t toRead = std::min(count, remaining); + int16_t* start = data.get() + offset; + out.insert(out.end(), start, start + toRead); + offset += toRead; + remaining -= toRead; + return toRead; + } + + //this one's unsafe + inline int16_t takeSample() { + --remaining; + return *(data.get() + offset++); + } + + inline bool exhausted() { + return remaining == 0; + } +}; + +struct VoiceAudioStream { + // TODO: This should really be a ring buffer instead. + std::queue chunks{}; + size_t samples = 0; + atomic muted = false; + atomic playing = false; + atomic decibelLevel = 0.0f; + atomic> channelVolumes = Array::filled(1.0f); + + Mutex mutex; + + inline int16_t getSample() { + int16_t sample = 0; + while (!chunks.empty()) { + auto& front = chunks.front(); + if (front.exhausted()) { + chunks.pop(); + continue; + } + --samples; + return front.takeSample(); + } + return 0; + } + + void nukeSamples(size_t count) { + while (!chunks.empty() && count > 0) { + auto& front = chunks.front(); + if (count >= front.remaining) { + count -= front.remaining; + samples -= front.remaining; + chunks.pop(); + } + else { + for (size_t i = 0; i != count; ++i) { + --samples; + front.takeSample(); + } + break; + } + } + } + + inline bool empty() { return chunks.empty(); } + + void take(int16_t* ptr, size_t size) { + MutexLocker lock(mutex); + while (samples > 22050 && !chunks.empty()) { + samples -= chunks.front().remaining; + chunks.pop(); + } + chunks.emplace(ptr, size); + samples += size; + } +}; + Voice::Speaker::Speaker(SpeakerId id) : decoderMono (createDecoder(1), opus_decoder_destroy) , decoderStereo(createDecoder(2), opus_decoder_destroy) { speakerId = id; + audioStream = make_shared(); } Voice* Voice::s_singleton; @@ -53,19 +175,58 @@ Voice::Voice(ApplicationControllerPtr appController) : m_encoder(nullptr, opus_e m_channelMode = VoiceChannelMode::Mono; m_applicationController = appController; - resetEncoder(); s_singleton = this; } Voice::~Voice() { + save(); + s_singleton = nullptr; } -void Voice::load(Json const& config) { - // do stuff +void Voice::init() { + resetEncoder(); + if (m_inputEnabled) + openDevice(); } -Json Voice::save() const { - return JsonObject{}; + +void Voice::loadJson(Json const& config) { + m_enabled = config.getBool("enabled", m_enabled); + m_inputEnabled = config.getBool("inputEnabled", m_inputEnabled); + m_deviceName = config.optQueryString("inputDevice"); + m_threshold = config.getFloat("threshold", m_threshold); + m_inputVolume = config.getFloat("inputVolume", m_inputVolume); + m_outputVolume = config.getFloat("outputVolume", m_outputVolume); + m_inputMode = VoiceInputModeNames.getLeft(config.getString("inputMode", "pushToTalk")); + m_channelMode = VoiceChannelModeNames.getLeft(config.getString("channelMode", "mono")); +} + + + +Json Voice::saveJson() const { + return JsonObject{ + {"enabled", m_enabled}, + {"inputEnabled", m_inputEnabled}, + {"inputDevice", m_deviceName ? *m_deviceName : Json()}, + {"threshold", m_threshold}, + {"inputVolume", m_inputVolume}, + {"outputVolume", m_outputVolume}, + {"inputMode", VoiceInputModeNames.getRight(m_inputMode)}, + {"channelMode", VoiceChannelModeNames.getRight(m_channelMode)}, + {"version", 1} + }; +} + +void Voice::save() const { + if (Root* root = Root::singletonPtr()) { + if (auto config = root->configuration()) + config->set("voice", saveJson()); + } +} + +void Voice::scheduleSave() { + if (nextSaveTime == 0.0) + nextSaveTime = Time::monotonicTime() + 2.0; } Voice::SpeakerPtr Voice::setLocalSpeaker(SpeakerId speakerId) { @@ -87,6 +248,10 @@ Voice::SpeakerPtr Voice::speaker(SpeakerId speakerId) { } } +void Voice::getAudioData(uint8_t* stream, int len) { + +} + void Voice::mix(int16_t* buffer, size_t frames, unsigned channels) { } @@ -95,13 +260,29 @@ void Voice::update(PositionalAttenuationFunction positionalAttenuationFunction) if (positionalAttenuationFunction) { for (auto& entry : m_speakers) { if (SpeakerPtr& speaker = entry.second) { - speaker->channelVolumes = { + speaker->audioStream->channelVolumes = { positionalAttenuationFunction(0, speaker->position, 1.0f), positionalAttenuationFunction(1, speaker->position, 1.0f) }; } } } + + auto now = Time::monotonicTime(); + if (now > nextSaveTime) { + nextSaveTime = 0.0; + save(); + } +} + + +void Voice::setDeviceName(Maybe deviceName) { + if (m_deviceName == deviceName) + return; + + m_deviceName = deviceName; + if (m_deviceOpen) + openDevice(); } OpusDecoder* Voice::createDecoder(int channels) { @@ -131,6 +312,10 @@ void Voice::resetEncoder() { void Voice::openDevice() { closeDevice(); + m_applicationController->openAudioInputDevice(m_deviceName ? m_deviceName->utf8Ptr() : nullptr, VOICE_SAMPLE_RATE, encoderChannels(), this, [](void* userdata, uint8_t* stream, int len) { + ((Voice*)(userdata))->getAudioData(stream, len); + }); + m_deviceOpen = true; } diff --git a/source/frontend/StarVoice.hpp b/source/frontend/StarVoice.hpp index 0d485db..269adb4 100644 --- a/source/frontend/StarVoice.hpp +++ b/source/frontend/StarVoice.hpp @@ -5,6 +5,7 @@ #include "StarException.hpp" #include "StarGameTypes.hpp" #include "StarMaybe.hpp" +#include "StarThread.hpp" #include "StarApplicationController.hpp" struct OpusDecoder; @@ -23,6 +24,7 @@ enum class VoiceChannelMode: uint8_t { Mono = 1, Stereo = 2 }; extern EnumMap const VoiceChannelModeNames; STAR_CLASS(Voice); +STAR_CLASS(VoiceAudioStream); STAR_CLASS(ApplicationController); class Voice { @@ -30,7 +32,8 @@ public: // Individual speakers are represented by their connection ID. typedef ConnectionId SpeakerId; - struct Speaker { + class Speaker { + public: SpeakerId speakerId = 0; EntityId entityId = 0; @@ -39,10 +42,8 @@ public: OpusDecoderPtr decoderMono; OpusDecoderPtr decoderStereo; - - atomic active = false; - atomic currentLoudness = 0.0f; - atomic> channelVolumes = Array::filled(1.0f); + VoiceAudioStreamPtr audioStream; + Mutex mutex; Speaker(SpeakerId speakerId); }; @@ -63,19 +64,29 @@ public: Voice(Voice const&) = delete; Voice& operator=(Voice const&) = delete; - void load(Json const& config); - Json save() const; - + void init(); + + void loadJson(Json const& config); + Json saveJson() const; + + void save() const; + void scheduleSave(); + // Sets the local speaker ID and returns the local speaker. Must be called upon loading into a world. SpeakerPtr setLocalSpeaker(SpeakerId speakerId); SpeakerPtr speaker(SpeakerId speakerId); + // Called when receiving input audio data from SDL, on its own thread. + void getAudioData(uint8_t* stream, int len); + // Called to mix voice audio with the game. void mix(int16_t* buffer, size_t frames, unsigned channels); typedef function PositionalAttenuationFunction; void update(PositionalAttenuationFunction positionalAttenuationFunction = {}); + void setDeviceName(Maybe device); + inline int encoderChannels() const { return m_channelMode == VoiceChannelMode::Mono ? 1 : 2; } @@ -96,12 +107,21 @@ private: OpusEncoderPtr m_encoder; + float m_outputVolume = 1.0f; + float m_inputVolume = 1.0f; + float m_threshold = -50.0f; + + bool m_enabled = true; + bool m_inputEnabled = true; + bool m_deviceOpen = false; Maybe m_deviceName; VoiceInputMode m_inputMode; VoiceChannelMode m_channelMode; ApplicationControllerPtr m_applicationController; + + double nextSaveTime = 0.0f; }; }