more voice work!!!

2023-07-13 20:47:53 +10:00 · 2023-07-13 20:47:53 +10:00 · f14f77724d
commit f14f77724d
parent 212de6b876
5 changed files with 241 additions and 21 deletions
--- a/source/application/StarMainApplication_sdl.cpp
+++ b/source/application/StarMainApplication_sdl.cpp
@ -248,9 +248,15 @@ public:
    if (SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER))
      throw ApplicationException(strf("Couldn't initialize SDL Controller: {}", SDL_GetError()));
-    Logger::info("Application: Initializing SDL Sound");
+#ifdef STAR_SYSTEM_WINDOWS // Newer SDL is defaulting to xaudio2, which does not support audio capture
    SDL_setenv("SDL_AUDIODRIVER", "directsound", 1);
 #endif
    Logger::info("Application: Initializing SDL Audio");
    if (SDL_InitSubSystem(SDL_INIT_AUDIO))
-      throw ApplicationException(strf("Couldn't initialize SDL Sound: {}", SDL_GetError()));
+      throw ApplicationException(strf("Couldn't initialize SDL Audio: {}", SDL_GetError()));
    Logger::info("Application: using Audio Driver '{}'", SDL_GetCurrentAudioDriver());
    SDL_JoystickEventState(SDL_ENABLE);
@ -336,17 +342,23 @@ public:
    closeAudioInputDevice();
    SDL_AudioSpec obtained = {};
-    return (m_sdlAudioInputDevice = SDL_OpenAudioDevice(name, 1, &desired, &obtained, 0)) != 0;
+    m_sdlAudioInputDevice = SDL_OpenAudioDevice(name, 1, &desired, &obtained, 0);
    if (m_sdlAudioInputDevice)
      Logger::info("Opened audio input device '{}'", SDL_GetAudioDeviceName(m_sdlAudioInputDevice, 1));
    else
      Logger::info("Failed to open audio input device: {}", SDL_GetError());
    return m_sdlAudioInputDevice != 0;
  }
  bool closeAudioInputDevice() {
    if (m_sdlAudioInputDevice) {
      Logger::info("Closing audio input device '{}'", SDL_GetAudioDeviceName(m_sdlAudioInputDevice, 1));
      SDL_CloseAudioDevice(m_sdlAudioInputDevice);
      m_sdlAudioInputDevice = 0;
      return true;
    }
    return false;
  }
--- a/source/client/StarClientApplication.cpp
+++ b/source/client/StarClientApplication.cpp
@ -208,6 +208,11 @@ void ClientApplication::applicationInit(ApplicationControllerPtr appController)
  appController->setMaxFrameSkip(assets->json("/client.config:maxFrameSkip").toUInt());
  appController->setUpdateTrackWindow(assets->json("/client.config:updateTrackWindow").toFloat());
  if (auto jVoice = configuration->get("voice"))
    m_voice->loadJson(jVoice.toObject());
  m_voice->init();
 }
 void ClientApplication::renderInit(RendererPtr renderer) {
--- a/source/core/StarCurve25519.cpp
+++ b/source/core/StarCurve25519.cpp
@ -20,8 +20,6 @@ struct KeySet {
    secret[31] |= 64;
    ed25519_CreateKeyPair(publicKey.data(), privateKey.data(), nullptr, secret.data());
    Logger::info("Generated Curve25519 key-pair");
  }
 };
--- a/source/frontend/StarVoice.cpp
+++ b/source/frontend/StarVoice.cpp
@ -1,8 +1,11 @@
 #include "StarVoice.hpp"
 #include "StarFormat.hpp"
 #include "StarApplicationController.hpp"
 #include "StarTime.hpp"
 #include "StarRoot.hpp"
 #include "opus/include/opus.h"
 #include <queue>
 #include "SDL.h"
 constexpr int VOICE_SAMPLE_RATE = 48000;
@ -25,10 +28,129 @@ EnumMap<VoiceChannelMode> const VoiceChannelModeNames{
  {VoiceChannelMode::Stereo, "Stereo"}
 };
 float getAudioChunkLoudness(int16_t* data, size_t samples) {
 	if (!samples)
 		return 0.f;
 	double rms = 0.;
 	for (size_t i = 0; i != samples; ++i) {
 		float sample = (float)data[i] / 32767.f;
 		rms += (double)(sample * sample);
 	}
  float fRms = sqrtf((float)(rms / samples));
 	if (fRms > 0)
 		return std::clamp<float>(20.f * log10f(fRms), -127.f, 0.f);
 	else
 		return -127.f;
 }
 float getAudioLoudness(int16_t* data, size_t samples) {
 	constexpr size_t CHUNK_SIZE = 50;
 	float highest = -127.f;
 	for (size_t i = 0; i < samples; i += CHUNK_SIZE) {
 		float level = getAudioChunkLoudness(data + i, std::min<size_t>(i + CHUNK_SIZE, samples) - i);
 		if (level > highest)
      highest = level;
 	}
 	return highest;
 }
 struct VoiceAudioChunk {
 	std::unique_ptr<int16_t[]> data;
 	size_t remaining;
 	size_t offset = 0;
  VoiceAudioChunk(int16_t* ptr, size_t size) {
 		data.reset(ptr);
 		remaining = size;
 		offset = 0;
 	}
 	inline size_t takeSamples(std::vector<int16_t>& out, size_t count) {
 		size_t toRead = std::min<size_t>(count, remaining);
 		int16_t* start = data.get() + offset;
 		out.insert(out.end(), start, start + toRead);
 		offset += toRead;
 		remaining -= toRead;
 		return toRead;
 	}
 	//this one's unsafe
 	inline int16_t takeSample() {
 		--remaining;
 		return *(data.get() + offset++);
 	}
 	inline bool exhausted() {
 		return remaining == 0;
 	}
 };
 struct VoiceAudioStream {
  // TODO: This should really be a ring buffer instead.
  std::queue<VoiceAudioChunk> chunks{};
  size_t samples = 0;
  atomic<bool> muted = false;
  atomic<bool> playing = false;
  atomic<float> decibelLevel = 0.0f;
  atomic<Array<float, 2>> channelVolumes = Array<float, 2>::filled(1.0f);
  Mutex mutex;
  inline int16_t getSample() {
 		int16_t sample = 0;
 		while (!chunks.empty()) {
 			auto& front = chunks.front();
 			if (front.exhausted()) {
        chunks.pop();
 				continue;
 			}
 			--samples;
 			return front.takeSample();
 		}
 		return 0;
 	}
 	void nukeSamples(size_t count) {
 		while (!chunks.empty() && count > 0) {
 			auto& front = chunks.front();
 			if (count >= front.remaining) {
 				count -= front.remaining;
 				samples -= front.remaining;
        chunks.pop();
 			}
 			else {
 				for (size_t i = 0; i != count; ++i) {
 					--samples;
 					front.takeSample();
 				}
 				break;
 			}
 		}
 	}
 	inline bool empty() { return chunks.empty(); }
 	void take(int16_t* ptr, size_t size) {
    MutexLocker lock(mutex);
 	  while (samples > 22050 && !chunks.empty()) {
 		  samples -= chunks.front().remaining;
 		  chunks.pop();
    }
    chunks.emplace(ptr, size);
 		samples += size;
 	}
 };
 Voice::Speaker::Speaker(SpeakerId id)
  : decoderMono  (createDecoder(1), opus_decoder_destroy)
  , decoderStereo(createDecoder(2), opus_decoder_destroy) {
  speakerId = id;
  audioStream = make_shared<VoiceAudioStream>();
 }
 Voice* Voice::s_singleton;
@ -53,19 +175,58 @@ Voice::Voice(ApplicationControllerPtr appController) : m_encoder(nullptr, opus_e
  m_channelMode = VoiceChannelMode::Mono;
  m_applicationController = appController;
  resetEncoder();
  s_singleton = this;
 }
 Voice::~Voice() {
  save();
  s_singleton = nullptr;
 }
-void Voice::load(Json const& config) {
+void Voice::init() {
-  // do stuff
+  resetEncoder();
  if (m_inputEnabled)
    openDevice();
 }
-Json Voice::save() const {
+
-  return JsonObject{};
+void Voice::loadJson(Json const& config) {
  m_enabled      = config.getBool("enabled",         m_enabled);
  m_inputEnabled = config.getBool("inputEnabled",    m_inputEnabled);
  m_deviceName   = config.optQueryString("inputDevice");
  m_threshold    = config.getFloat("threshold", m_threshold);
  m_inputVolume  = config.getFloat("inputVolume", m_inputVolume);
  m_outputVolume = config.getFloat("outputVolume",   m_outputVolume);
  m_inputMode    = VoiceInputModeNames.getLeft(config.getString("inputMode", "pushToTalk"));
  m_channelMode  = VoiceChannelModeNames.getLeft(config.getString("channelMode", "mono"));
 }
 Json Voice::saveJson() const {
  return JsonObject{
    {"enabled",      m_enabled},
    {"inputEnabled", m_inputEnabled},
    {"inputDevice",  m_deviceName ? *m_deviceName : Json()},
    {"threshold",    m_threshold},
    {"inputVolume",  m_inputVolume},
    {"outputVolume", m_outputVolume},
    {"inputMode",    VoiceInputModeNames.getRight(m_inputMode)},
    {"channelMode",  VoiceChannelModeNames.getRight(m_channelMode)},
    {"version",      1}
  };
 }
 void Voice::save() const {
  if (Root* root = Root::singletonPtr()) {
    if (auto config = root->configuration())
      config->set("voice", saveJson());
  }
 }
 void Voice::scheduleSave() {
  if (nextSaveTime == 0.0)
    nextSaveTime = Time::monotonicTime() + 2.0;
 }
 Voice::SpeakerPtr Voice::setLocalSpeaker(SpeakerId speakerId) {
@ -87,6 +248,10 @@ Voice::SpeakerPtr Voice::speaker(SpeakerId speakerId) {
  }
 }
 void Voice::getAudioData(uint8_t* stream, int len) {
 }
 void Voice::mix(int16_t* buffer, size_t frames, unsigned channels) {
 }
@ -95,13 +260,29 @@ void Voice::update(PositionalAttenuationFunction positionalAttenuationFunction)
  if (positionalAttenuationFunction) {
    for (auto& entry : m_speakers) {
      if (SpeakerPtr& speaker = entry.second) {
-        speaker->channelVolumes = {
+        speaker->audioStream->channelVolumes = {
          positionalAttenuationFunction(0, speaker->position, 1.0f),
          positionalAttenuationFunction(1, speaker->position, 1.0f)
        };
      }
    }
  }
  auto now = Time::monotonicTime();
  if (now > nextSaveTime) {
    nextSaveTime = 0.0;
    save();
  }
 }
 void Voice::setDeviceName(Maybe<String> deviceName) {
  if (m_deviceName == deviceName)
    return;
  m_deviceName = deviceName;
  if (m_deviceOpen)
    openDevice();
 }
 OpusDecoder* Voice::createDecoder(int channels) {
@ -131,6 +312,10 @@ void Voice::resetEncoder() {
 void Voice::openDevice() {
  closeDevice();
  m_applicationController->openAudioInputDevice(m_deviceName ? m_deviceName->utf8Ptr() : nullptr, VOICE_SAMPLE_RATE, encoderChannels(), this, [](void* userdata, uint8_t* stream, int len) {
    ((Voice*)(userdata))->getAudioData(stream, len);
  });
  m_deviceOpen = true;
 }
--- a/source/frontend/StarVoice.hpp
+++ b/source/frontend/StarVoice.hpp
@ -5,6 +5,7 @@
 #include "StarException.hpp"
 #include "StarGameTypes.hpp"
 #include "StarMaybe.hpp"
 #include "StarThread.hpp"
 #include "StarApplicationController.hpp"
 struct OpusDecoder;
@ -23,6 +24,7 @@ enum class VoiceChannelMode: uint8_t { Mono = 1, Stereo = 2 };
 extern EnumMap<VoiceChannelMode> const VoiceChannelModeNames;
 STAR_CLASS(Voice);
 STAR_CLASS(VoiceAudioStream);
 STAR_CLASS(ApplicationController);
 class Voice {
@ -30,7 +32,8 @@ public:
  // Individual speakers are represented by their connection ID.
  typedef ConnectionId SpeakerId;
-  struct Speaker {
+  class Speaker {
  public:
    SpeakerId speakerId = 0;
    EntityId entityId = 0;
@ -39,10 +42,8 @@ public:
    OpusDecoderPtr decoderMono;
    OpusDecoderPtr decoderStereo;
-
+    VoiceAudioStreamPtr audioStream;
-    atomic<bool> active = false;
+    Mutex mutex;
    atomic<float> currentLoudness = 0.0f;
    atomic<Array<float, 2>> channelVolumes = Array<float, 2>::filled(1.0f);
    Speaker(SpeakerId speakerId);
  };
@ -63,19 +64,29 @@ public:
  Voice(Voice const&) = delete;
  Voice& operator=(Voice const&) = delete;
-  void load(Json const& config);
+  void init();
-  Json save() const;
+
-  
+  void loadJson(Json const& config);
  Json saveJson() const;
  void save() const;
  void scheduleSave();
  // Sets the local speaker ID and returns the local speaker. Must be called upon loading into a world.
  SpeakerPtr setLocalSpeaker(SpeakerId speakerId);
  SpeakerPtr speaker(SpeakerId speakerId);
  // Called when receiving input audio data from SDL, on its own thread.
  void getAudioData(uint8_t* stream, int len);
  // Called to mix voice audio with the game.
  void mix(int16_t* buffer, size_t frames, unsigned channels);
  typedef function<float(unsigned, Vec2F, float)> PositionalAttenuationFunction;
  void update(PositionalAttenuationFunction positionalAttenuationFunction = {});
  void setDeviceName(Maybe<String> device);
  inline int encoderChannels() const {
    return m_channelMode == VoiceChannelMode::Mono ? 1 : 2;
  }
@ -96,12 +107,21 @@ private:
  OpusEncoderPtr m_encoder;
  float m_outputVolume = 1.0f;
  float m_inputVolume = 1.0f;
  float m_threshold = -50.0f;
  bool m_enabled = true;
  bool m_inputEnabled = true;
  bool m_deviceOpen = false;
  Maybe<String> m_deviceName;
  VoiceInputMode m_inputMode;
  VoiceChannelMode m_channelMode;
  ApplicationControllerPtr m_applicationController;
  double nextSaveTime = 0.0f;
 };
 }