diff --git a/source/frontend/StarVoice.cpp b/source/frontend/StarVoice.cpp index 978bc2c..8f1b329 100644 --- a/source/frontend/StarVoice.cpp +++ b/source/frontend/StarVoice.cpp @@ -31,71 +31,71 @@ EnumMap const VoiceChannelModeNames{ }; inline float getAudioChunkLoudness(int16_t* data, size_t samples, float volume) { - if (!samples) - return 0.f; + if (!samples) + return 0.f; - double rms = 0.; - for (size_t i = 0; i != samples; ++i) { - float sample = ((float)data[i] / 32767.f) * volume; - rms += (double)(sample * sample); - } + double rms = 0.; + for (size_t i = 0; i != samples; ++i) { + float sample = ((float)data[i] / 32767.f) * volume; + rms += (double)(sample * sample); + } float fRms = sqrtf((float)(rms / samples)); - if (fRms > 0) - return std::clamp(20.f * log10f(fRms), -127.f, 0.f); - else - return -127.f; + if (fRms > 0) + return std::clamp(20.f * log10f(fRms), -127.f, 0.f); + else + return -127.f; } float getAudioLoudness(int16_t* data, size_t samples, float volume = 1.0f) { - constexpr size_t CHUNK_SIZE = 50; + constexpr size_t CHUNK_SIZE = 50; - float highest = -127.f; - for (size_t i = 0; i < samples; i += CHUNK_SIZE) { - float level = getAudioChunkLoudness(data + i, std::min(i + CHUNK_SIZE, samples) - i, volume); - if (level > highest) + float highest = -127.f; + for (size_t i = 0; i < samples; i += CHUNK_SIZE) { + float level = getAudioChunkLoudness(data + i, std::min(i + CHUNK_SIZE, samples) - i, volume); + if (level > highest) highest = level; - } + } - return highest; + return highest; } class VoiceAudioStream { public: // TODO: This should really be a ring buffer instead. std::queue samples; - SDL_AudioStream* sdlAudioStreamMono; - SDL_AudioStream* sdlAudioStreamStereo; + SDL_AudioStream* sdlAudioStreamMono; + SDL_AudioStream* sdlAudioStreamStereo; Mutex mutex; - VoiceAudioStream() - : sdlAudioStreamMono (SDL_NewAudioStream(AUDIO_S16, 1, 48000, AUDIO_S16SYS, 1, 44100)) - , sdlAudioStreamStereo(SDL_NewAudioStream(AUDIO_S16, 2, 48000, AUDIO_S16SYS, 2, 44100)) {}; - ~VoiceAudioStream() { - SDL_FreeAudioStream(sdlAudioStreamMono); - SDL_FreeAudioStream(sdlAudioStreamStereo); - } + VoiceAudioStream() + : sdlAudioStreamMono (SDL_NewAudioStream(AUDIO_S16, 1, 48000, AUDIO_S16SYS, 1, 44100)) + , sdlAudioStreamStereo(SDL_NewAudioStream(AUDIO_S16, 2, 48000, AUDIO_S16SYS, 2, 44100)) {}; + ~VoiceAudioStream() { + SDL_FreeAudioStream(sdlAudioStreamMono); + SDL_FreeAudioStream(sdlAudioStreamStereo); + } - inline int16_t take() { - int16_t sample = 0; - if (!samples.empty()) { - sample = samples.front(); - samples.pop(); - } - return sample; - } + inline int16_t take() { + int16_t sample = 0; + if (!samples.empty()) { + sample = samples.front(); + samples.pop(); + } + return sample; + } - size_t resample(int16_t* in, size_t inSamples, std::vector& out, bool mono) { - SDL_AudioStream* stream = mono ? sdlAudioStreamMono : sdlAudioStreamStereo; - SDL_AudioStreamPut(stream, in, inSamples * sizeof(int16_t)); - if (int available = SDL_AudioStreamAvailable(stream)) { - out.resize(available / 2); - SDL_AudioStreamGet(stream, out.data(), available); - return available; - } - return 0; - } + size_t resample(int16_t* in, size_t inSamples, std::vector& out, bool mono) { + SDL_AudioStream* stream = mono ? sdlAudioStreamMono : sdlAudioStreamStereo; + SDL_AudioStreamPut(stream, in, inSamples * sizeof(int16_t)); + if (int available = SDL_AudioStreamAvailable(stream)) { + out.resize(available / 2); + SDL_AudioStreamGet(stream, out.data(), available); + return available; + } + return 0; + } }; Voice::Speaker::Speaker(SpeakerId id) @@ -106,16 +106,16 @@ Voice::Speaker::Speaker(SpeakerId id) } Json Voice::Speaker::toJson() const { - return JsonObject{ - {"speakerId", speakerId}, - {"entityId", entityId }, - {"name", name }, - {"playing", (bool)playing}, - {"muted", (bool)muted }, - {"decibels", (float)decibelLevel}, - {"smoothDecibels", (float)smoothDb }, - {"position", jsonFromVec2F(position)} - }; + return JsonObject{ + {"speakerId", speakerId}, + {"entityId", entityId }, + {"name", name }, + {"playing", (bool)playing}, + {"muted", (bool)muted }, + {"decibels", (float)decibelLevel}, + {"smoothDecibels", (float)smoothDb }, + {"position", jsonFromVec2F(position)} + }; } Voice* Voice::s_singleton; @@ -140,99 +140,99 @@ Voice::Voice(ApplicationControllerPtr appController) : m_encoder(nullptr, opus_e m_channelMode = VoiceChannelMode::Mono; m_applicationController = appController; - m_stopThread = false; - m_thread = Thread::invoke("Voice::thread", mem_fn(&Voice::thread), this); + m_stopThread = false; + m_thread = Thread::invoke("Voice::thread", mem_fn(&Voice::thread), this); s_singleton = this; } Voice::~Voice() { - m_stopThread = true; + m_stopThread = true; - { - MutexLocker locker(m_threadMutex); - m_threadCond.broadcast(); - } + { + MutexLocker locker(m_threadMutex); + m_threadCond.broadcast(); + } - m_thread.finish(); + m_thread.finish(); - if (m_nextSaveTime) - save(); + if (m_nextSaveTime) + save(); - closeDevice(); + closeDevice(); s_singleton = nullptr; } void Voice::init() { - resetEncoder(); - resetDevice(); + resetEncoder(); + resetDevice(); } template inline bool change(T& value, T newValue, bool& out) { - bool changed = value != newValue; - out |= changed; - value = std::move(newValue); - return changed; + bool changed = value != newValue; + out |= changed; + value = std::move(newValue); + return changed; } void Voice::loadJson(Json const& config, bool skipSave) { // Not all keys are required - bool changed = false; + bool changed = false; - { - bool enabled = shouldEnableInput(); - m_enabled = config.getBool("enabled", m_enabled); - m_inputEnabled = config.getBool("inputEnabled", m_inputEnabled); - if (shouldEnableInput() != enabled) { - changed = true; - resetDevice(); - } - } + { + bool enabled = shouldEnableInput(); + m_enabled = config.getBool("enabled", m_enabled); + m_inputEnabled = config.getBool("inputEnabled", m_inputEnabled); + if (shouldEnableInput() != enabled) { + changed = true; + resetDevice(); + } + } - if (config.contains("deviceName") // Make sure null-type key exists - && change(m_deviceName, config.optString("deviceName"), changed)) - resetDevice(); + if (config.contains("deviceName") // Make sure null-type key exists + && change(m_deviceName, config.optString("deviceName"), changed)) + resetDevice(); m_threshold = config.getFloat("threshold", m_threshold); m_inputAmplitude = perceptualToAmplitude( - m_inputVolume = config.getFloat("inputVolume", m_inputVolume)); + m_inputVolume = config.getFloat("inputVolume", m_inputVolume)); m_outputAmplitude = perceptualToAmplitude( - m_outputVolume = config.getFloat("outputVolume", m_outputVolume)); - - if (change(m_loopback, config.getBool("loopback", m_loopback), changed)) - m_clientSpeaker->playing = false; + m_outputVolume = config.getFloat("outputVolume", m_outputVolume)); + + if (change(m_loopback, config.getBool("loopback", m_loopback), changed)) + m_clientSpeaker->playing = false; - if (auto inputMode = config.optString("inputMode")) { - if (change(m_inputMode, VoiceInputModeNames.getLeft(*inputMode), changed)) - m_lastInputTime = 0; - } + if (auto inputMode = config.optString("inputMode")) { + if (change(m_inputMode, VoiceInputModeNames.getLeft(*inputMode), changed)) + m_lastInputTime = 0; + } - bool shouldResetEncoder = false; - if (auto channelMode = config.optString("channelMode")) { - if (change(m_channelMode, VoiceChannelModeNames.getLeft(*channelMode), changed)) { - closeDevice(); - shouldResetEncoder = true; - resetDevice(); - } - } + bool shouldResetEncoder = false; + if (auto channelMode = config.optString("channelMode")) { + if (change(m_channelMode, VoiceChannelModeNames.getLeft(*channelMode), changed)) { + closeDevice(); + shouldResetEncoder = true; + resetDevice(); + } + } - // not saving this setting to disk, as it's just for audiophiles - // don't want someone fudging their bitrate from the intended defaults and forgetting - if (auto bitrate = config.opt("bitrate")) { + // not saving this setting to disk, as it's just for audiophiles + // don't want someone fudging their bitrate from the intended defaults and forgetting + if (auto bitrate = config.opt("bitrate")) { unsigned newBitrate = bitrate->canConvert(Json::Type::Int) - ? clamp((unsigned)bitrate->toUInt(), 6000u, 510000u) : 0; + ? clamp((unsigned)bitrate->toUInt(), 6000u, 510000u) : 0; shouldResetEncoder |= change(m_bitrate, newBitrate, changed); } if (shouldResetEncoder) resetEncoder(); - if (changed && !skipSave) - scheduleSave(); + if (changed && !skipSave) + scheduleSave(); } @@ -240,14 +240,14 @@ void Voice::loadJson(Json const& config, bool skipSave) { Json Voice::saveJson() const { return JsonObject{ {"enabled", m_enabled}, - {"deviceName", m_deviceName ? *m_deviceName : Json()}, + {"deviceName", m_deviceName ? *m_deviceName : Json()}, {"inputEnabled", m_inputEnabled}, {"threshold", m_threshold}, {"inputVolume", m_inputVolume}, {"outputVolume", m_outputVolume}, {"inputMode", VoiceInputModeNames.getRight(m_inputMode)}, {"channelMode", VoiceChannelModeNames.getRight(m_channelMode)}, - {"loopback", m_loopback}, + {"loopback", m_loopback}, {"version", 1} }; } @@ -261,7 +261,7 @@ void Voice::save() const { void Voice::scheduleSave() { if (!m_nextSaveTime) - m_nextSaveTime = Time::monotonicMilliseconds() + 2000; + m_nextSaveTime = Time::monotonicMilliseconds() + 2000; } Voice::SpeakerPtr Voice::setLocalSpeaker(SpeakerId speakerId) { @@ -273,7 +273,7 @@ Voice::SpeakerPtr Voice::setLocalSpeaker(SpeakerId speakerId) { } Voice::SpeakerPtr Voice::localSpeaker() { - return m_clientSpeaker; + return m_clientSpeaker; } Voice::SpeakerPtr Voice::speaker(SpeakerId speakerId) { @@ -288,176 +288,181 @@ Voice::SpeakerPtr Voice::speaker(SpeakerId speakerId) { } HashMap& Voice::speakers() { - return m_speakers; + return m_speakers; } List Voice::sortedSpeakers(bool onlyPlaying) { - List result; + List result; - auto sorter = [](SpeakerPtr const& a, SpeakerPtr const& b) -> bool { - if (a->lastPlayTime != b->lastPlayTime) - return a->lastPlayTime < b->lastPlayTime; - else - return a->speakerId < b->speakerId; - }; + auto sorter = [](SpeakerPtr const& a, SpeakerPtr const& b) -> bool { + if (a->lastPlayTime != b->lastPlayTime) + return a->lastPlayTime < b->lastPlayTime; + else + return a->speakerId < b->speakerId; + }; - for (auto& p : m_speakers) { - if (!onlyPlaying || p.second->playing) - result.insertSorted(p.second, sorter); - } + for (auto& p : m_speakers) { + if (!onlyPlaying || p.second->playing) + result.insertSorted(p.second, sorter); + } - return result; + return result; } void Voice::clearSpeakers() { - auto it = m_speakers.begin(); - while (it != m_speakers.end()) { - if (it->second == m_clientSpeaker) - it = ++it; - else - it = m_speakers.erase(it); - } + auto it = m_speakers.begin(); + while (it != m_speakers.end()) { + if (it->second == m_clientSpeaker) + it = ++it; + else + it = m_speakers.erase(it); + } } void Voice::readAudioData(uint8_t* stream, int len) { - auto now = Time::monotonicMilliseconds(); - bool active = m_encoder && m_encodedChunksLength < 2048 + auto now = Time::monotonicMilliseconds(); + bool active = m_encoder && m_encodedChunksLength < 2048 && (m_inputMode == VoiceInputMode::VoiceActivity || now < m_lastInputTime); - size_t sampleCount = len / 2; + size_t sampleCount = len / 2; - if (active) { - float decibels = getAudioLoudness((int16_t*)stream, sampleCount); + if (active) { + float decibels = getAudioLoudness((int16_t*)stream, sampleCount); - if (m_inputMode == VoiceInputMode::VoiceActivity) { - if (decibels > m_threshold) - m_lastThresholdTime = now; - active = now - m_lastThresholdTime < 50; - } - } + if (m_inputMode == VoiceInputMode::VoiceActivity) { + if (decibels > m_threshold) + m_lastThresholdTime = now; + active = now - m_lastThresholdTime < 50; + } + } - m_clientSpeaker->decibelLevel = getAudioLoudness((int16_t*)stream, sampleCount, m_inputAmplitude); + m_clientSpeaker->decibelLevel = getAudioLoudness((int16_t*)stream, sampleCount, m_inputAmplitude); - if (!m_loopback) { - if (active && !m_clientSpeaker->playing) - m_clientSpeaker->lastPlayTime = now; + if (!m_loopback) { + if (active && !m_clientSpeaker->playing) + m_clientSpeaker->lastPlayTime = now; - m_clientSpeaker->playing = active; - } + m_clientSpeaker->playing = active; + } - MutexLocker captureLock(m_captureMutex); - if (active) { - m_capturedChunksFrames += sampleCount / m_deviceChannels; - auto data = (opus_int16*)malloc(len); - memcpy(data, stream, len); - m_capturedChunks.emplace(data, sampleCount); // takes ownership - m_threadCond.signal(); - } - else { // Clear out any residual data so they don't manifest at the start of the next encode, whenever that is - while (!m_capturedChunks.empty()) - m_capturedChunks.pop(); + MutexLocker captureLock(m_captureMutex); + if (active) { + m_capturedChunksFrames += sampleCount / m_deviceChannels; + auto data = (opus_int16*)malloc(len); + memcpy(data, stream, len); + m_capturedChunks.emplace(data, sampleCount); // takes ownership + m_threadCond.signal(); + } + else { // Clear out any residual data so they don't manifest at the start of the next encode, whenever that is + while (!m_capturedChunks.empty()) + m_capturedChunks.pop(); - m_capturedChunksFrames = 0; - } + m_capturedChunksFrames = 0; + } } void Voice::mix(int16_t* buffer, size_t frameCount, unsigned channels) { - size_t samples = frameCount * channels; - static std::vector finalBuffer, speakerBuffer; - static std::vector sharedBuffer; //int32 to reduce clipping - speakerBuffer.resize(samples); - sharedBuffer.resize(samples); + size_t samples = frameCount * channels; + static std::vector finalBuffer, speakerBuffer; + static std::vector sharedBuffer; //int32 to reduce clipping + speakerBuffer.resize(samples); + sharedBuffer.resize(samples); - bool mix = false; - { - MutexLocker lock(m_activeSpeakersMutex); - auto it = m_activeSpeakers.begin(); - while (it != m_activeSpeakers.end()) { - SpeakerPtr const& speaker = *it; - VoiceAudioStream* audio = speaker->audioStream.get(); - MutexLocker audioLock(audio->mutex); - if (speaker->playing && !audio->samples.empty()) { - for (size_t i = 0; i != samples; ++i) - speakerBuffer[i] = audio->take(); + bool mix = false; + { + MutexLocker lock(m_activeSpeakersMutex); + auto it = m_activeSpeakers.begin(); + while (it != m_activeSpeakers.end()) { + SpeakerPtr const& speaker = *it; + VoiceAudioStream* audio = speaker->audioStream.get(); + MutexLocker audioLock(audio->mutex); + if (speaker->playing && !audio->samples.empty()) { + for (size_t i = 0; i != samples; ++i) + speakerBuffer[i] = audio->take(); - if (speaker != m_clientSpeaker) - speaker->decibelLevel = getAudioLoudness(speakerBuffer.data(), samples); + if (speaker != m_clientSpeaker) + speaker->decibelLevel = getAudioLoudness(speakerBuffer.data(), samples); - if (!speaker->muted) { - mix = true; + if (!speaker->muted) { + mix = true; - float volume = speaker->volume; - std::array levels = speaker->channelVolumes; - for (size_t i = 0; i != samples; ++i) - sharedBuffer[i] += (int32_t)(speakerBuffer[i]) * levels[i % 2] * volume; - //Blends the weaker channel into the stronger one, - /* unused, is a bit too strong on stereo music. - float maxLevel = max(levels[0], levels[1]); - float leftToRight = maxLevel != 0.0f ? 1.0f - (levels[0] / maxLevel) : 0.0f; - float rightToLeft = maxLevel != 0.0f ? 1.0f - (levels[1] / maxLevel) : 0.0f; + float volume = speaker->volume; + std::array levels; + { + MutexLocker locker(speaker->mutex); + levels = speaker->channelVolumes; + } + for (size_t i = 0; i != samples; ++i) + sharedBuffer[i] += (int32_t)(speakerBuffer[i]) * levels[i % 2] * volume; + //Blends the weaker channel into the stronger one, + /* unused, is a bit too strong on stereo music. + float maxLevel = max(levels[0], levels[1]); + float leftToRight = maxLevel != 0.0f ? 1.0f - (levels[0] / maxLevel) : 0.0f; + float rightToLeft = maxLevel != 0.0f ? 1.0f - (levels[1] / maxLevel) : 0.0f; - int16_t* speakerData = speakerBuffer.data(); - int32_t* sharedData = sharedBuffer.data(); - for (size_t i = 0; i != frameCount; ++i) { - auto leftSample = (float)*speakerData++; - auto rightSample = (float)*speakerData++; - - if (rightToLeft != 0.0f) - leftSample = ( leftSample + rightSample * rightToLeft) / (1.0f + rightToLeft); - if (leftToRight != 0.0f) - rightSample = (rightSample + leftSample * leftToRight) / (1.0f + leftToRight); + int16_t* speakerData = speakerBuffer.data(); + int32_t* sharedData = sharedBuffer.data(); + for (size_t i = 0; i != frameCount; ++i) { + auto leftSample = (float)*speakerData++; + auto rightSample = (float)*speakerData++; + + if (rightToLeft != 0.0f) + leftSample = ( leftSample + rightSample * rightToLeft) / (1.0f + rightToLeft); + if (leftToRight != 0.0f) + rightSample = (rightSample + leftSample * leftToRight) / (1.0f + leftToRight); - *sharedData++ += (int32_t)leftSample * levels[0]; - *sharedData++ += (int32_t)rightSample * levels[1]; - } - //*/ - } - ++it; - } - else { - speaker->playing = false; - if (speaker != m_clientSpeaker) - speaker->decibelLevel = -96.0f; - it = m_activeSpeakers.erase(it); - } - } - } + *sharedData++ += (int32_t)leftSample * levels[0]; + *sharedData++ += (int32_t)rightSample * levels[1]; + } + //*/ + } + ++it; + } + else { + speaker->playing = false; + if (speaker != m_clientSpeaker) + speaker->decibelLevel = -96.0f; + it = m_activeSpeakers.erase(it); + } + } + } - if (mix) { - finalBuffer.resize(sharedBuffer.size(), 0); + if (mix) { + finalBuffer.resize(sharedBuffer.size(), 0); - float vol = m_outputAmplitude; - for (size_t i = 0; i != sharedBuffer.size(); ++i) - finalBuffer[i] = (int16_t)clamp(sharedBuffer[i] * vol, INT16_MIN, INT16_MAX); + float vol = m_outputAmplitude; + for (size_t i = 0; i != sharedBuffer.size(); ++i) + finalBuffer[i] = (int16_t)clamp(sharedBuffer[i] * vol, INT16_MIN, INT16_MAX); - SDL_MixAudioFormat((Uint8*)buffer, (Uint8*)finalBuffer.data(), AUDIO_S16, finalBuffer.size() * sizeof(int16_t), SDL_MIX_MAXVOLUME); - memset(sharedBuffer.data(), 0, sharedBuffer.size() * sizeof(int32_t)); - } + SDL_MixAudioFormat((Uint8*)buffer, (Uint8*)finalBuffer.data(), AUDIO_S16, finalBuffer.size() * sizeof(int16_t), SDL_MIX_MAXVOLUME); + memset(sharedBuffer.data(), 0, sharedBuffer.size() * sizeof(int32_t)); + } } void Voice::update(float, PositionalAttenuationFunction positionalAttenuationFunction) { for (auto& entry : m_speakers) { if (SpeakerPtr& speaker = entry.second) { - if (positionalAttenuationFunction) { - speaker->channelVolumes = { + Vec2F newChannelVolumes = positionalAttenuationFunction ? Vec2F{ 1.0f - positionalAttenuationFunction(0, speaker->position, 1.0f), - 1.0f - positionalAttenuationFunction(1, speaker->position, 1.0f)}; - } else - speaker->channelVolumes = {1.0f, 1.0f}; - - auto& dbHistory = speaker->dbHistory; - memmove(&dbHistory[1], &dbHistory[0], (dbHistory.size() - 1) * sizeof(float)); - dbHistory[0] = speaker->decibelLevel; - float smoothDb = 0.0f; - for (float dB : dbHistory) - smoothDb += dB; + 1.0f - positionalAttenuationFunction(1, speaker->position, 1.0f) + } : Vec2F{1.0f, 1.0f}; + { + MutexLocker locker(speaker->mutex); + speaker->channelVolumes = newChannelVolumes; + } + auto& dbHistory = speaker->dbHistory; + memmove(&dbHistory[1], &dbHistory[0], (dbHistory.size() - 1) * sizeof(float)); + dbHistory[0] = speaker->decibelLevel; + float smoothDb = 0.0f; + for (float dB : dbHistory) + smoothDb += dB; - speaker->smoothDb = smoothDb / dbHistory.size(); + speaker->smoothDb = smoothDb / dbHistory.size(); } } if (m_nextSaveTime && Time::monotonicMilliseconds() > m_nextSaveTime) { - m_nextSaveTime = 0; + m_nextSaveTime = 0; save(); } } @@ -473,122 +478,122 @@ void Voice::setDeviceName(Maybe deviceName) { } StringList Voice::availableDevices() { - int devices = SDL_GetNumAudioDevices(1); - StringList deviceList; - if (devices > 0) { - deviceList.reserve(devices); - for (int i = 0; i != devices; ++i) - deviceList.emplace_back(SDL_GetAudioDeviceName(i, 1)); - } - deviceList.sort(); - return deviceList; + int devices = SDL_GetNumAudioDevices(1); + StringList deviceList; + if (devices > 0) { + deviceList.reserve(devices); + for (int i = 0; i != devices; ++i) + deviceList.emplace_back(SDL_GetAudioDeviceName(i, 1)); + } + deviceList.sort(); + return deviceList; } int Voice::send(DataStreamBuffer& out, size_t budget) { - out.setByteOrder(ByteOrder::LittleEndian); - out.write(VOICE_VERSION); + out.setByteOrder(ByteOrder::LittleEndian); + out.write(VOICE_VERSION); - MutexLocker encodeLock(m_encodeMutex); + MutexLocker encodeLock(m_encodeMutex); - if (m_encodedChunks.empty()) - return 0; + if (m_encodedChunks.empty()) + return 0; - std::vector encodedChunks = std::move(m_encodedChunks); - m_encodedChunksLength = 0; + std::vector encodedChunks = std::move(m_encodedChunks); + m_encodedChunksLength = 0; - encodeLock.unlock(); + encodeLock.unlock(); - for (auto& chunk : encodedChunks) { - out.write(chunk.size()); - out.writeBytes(chunk); - if (budget && (budget -= min(budget, chunk.size())) == 0) - break; - } + for (auto& chunk : encodedChunks) { + out.write(chunk.size()); + out.writeBytes(chunk); + if (budget && (budget -= min(budget, chunk.size())) == 0) + break; + } - m_lastSentTime = Time::monotonicMilliseconds(); - if (m_loopback) - receive(m_clientSpeaker, { out.ptr(), out.size() }); - return 1; + m_lastSentTime = Time::monotonicMilliseconds(); + if (m_loopback) + receive(m_clientSpeaker, { out.ptr(), out.size() }); + return 1; } bool Voice::receive(SpeakerPtr speaker, std::string_view view) { - if (!m_enabled || !speaker || view.empty()) - return false; + if (!m_enabled || !speaker || view.empty()) + return false; - try { - DataStreamExternalBuffer reader(view.data(), view.size()); - reader.setByteOrder(ByteOrder::LittleEndian); + try { + DataStreamExternalBuffer reader(view.data(), view.size()); + reader.setByteOrder(ByteOrder::LittleEndian); - if (reader.read() > VOICE_VERSION) - return false; + if (reader.read() > VOICE_VERSION) + return false; - uint32_t opusLength = 0; - while (!reader.atEnd()) { - reader >> opusLength; - if (reader.pos() + opusLength > reader.size()) - throw VoiceException("Opus packet length goes past end of buffer"s, false); - auto opusData = (unsigned char*)reader.ptr() + reader.pos(); - reader.seek(opusLength, IOSeek::Relative); + uint32_t opusLength = 0; + while (!reader.atEnd()) { + reader >> opusLength; + if (reader.pos() + opusLength > reader.size()) + throw VoiceException("Opus packet length goes past end of buffer"s, false); + auto opusData = (unsigned char*)reader.ptr() + reader.pos(); + reader.seek(opusLength, IOSeek::Relative); - int channels = opus_packet_get_nb_channels(opusData); - if (channels == OPUS_INVALID_PACKET) - continue; + int channels = opus_packet_get_nb_channels(opusData); + if (channels == OPUS_INVALID_PACKET) + continue; - bool mono = channels == 1; - OpusDecoder* decoder = mono ? speaker->decoderMono.get() : speaker->decoderStereo.get(); - int samples = opus_decoder_get_nb_samples(decoder, opusData, opusLength); - if (samples < 0) - throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false); + bool mono = channels == 1; + OpusDecoder* decoder = mono ? speaker->decoderMono.get() : speaker->decoderStereo.get(); + int samples = opus_decoder_get_nb_samples(decoder, opusData, opusLength); + if (samples < 0) + throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false); - m_decodeBuffer.resize(samples * (size_t)channels); + m_decodeBuffer.resize(samples * (size_t)channels); - int decodedSamples = opus_decode(decoder, opusData, opusLength, m_decodeBuffer.data(), m_decodeBuffer.size() * sizeof(int16_t), 0); - if (decodedSamples <= 0) { - if (decodedSamples < 0) - throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false); - return true; - } + int decodedSamples = opus_decode(decoder, opusData, opusLength, m_decodeBuffer.data(), m_decodeBuffer.size() * sizeof(int16_t), 0); + if (decodedSamples <= 0) { + if (decodedSamples < 0) + throw VoiceException(strf("Decoder error: {}", opus_strerror(samples)), false); + return true; + } - //Logger::info("Voice: decoded Opus chunk {} bytes -> {} samples", opusLength, decodedSamples * channels); + //Logger::info("Voice: decoded Opus chunk {} bytes -> {} samples", opusLength, decodedSamples * channels); - speaker->audioStream->resample(m_decodeBuffer.data(), (size_t)decodedSamples * channels, m_resampleBuffer, mono); + speaker->audioStream->resample(m_decodeBuffer.data(), (size_t)decodedSamples * channels, m_resampleBuffer, mono); - { - MutexLocker lock(speaker->audioStream->mutex); - auto& samples = speaker->audioStream->samples; + { + MutexLocker lock(speaker->audioStream->mutex); + auto& samples = speaker->audioStream->samples; - auto now = Time::monotonicMilliseconds(); - if (now - speaker->lastReceiveTime < 1000) { - auto limit = (size_t)speaker->minimumPlaySamples + 22050; - if (samples.size() > limit) { // skip ahead if we're getting too far - for (size_t i = samples.size(); i >= limit; --i) - samples.pop(); - } - } - else - samples = std::queue(); + auto now = Time::monotonicMilliseconds(); + if (now - speaker->lastReceiveTime < 1000) { + auto limit = (size_t)speaker->minimumPlaySamples + 22050; + if (samples.size() > limit) { // skip ahead if we're getting too far + for (size_t i = samples.size(); i >= limit; --i) + samples.pop(); + } + } + else + samples = std::queue(); - speaker->lastReceiveTime = now; + speaker->lastReceiveTime = now; - if (mono) { - for (int16_t sample : m_resampleBuffer) { - samples.push(sample); - samples.push(sample); - } - } - else { - for (int16_t sample : m_resampleBuffer) - samples.push(sample); - } - } - playSpeaker(speaker, channels); - } - return true; - } - catch (StarException const& e) { - Logger::error("Voice: Error receiving voice data for speaker #{} ('{}'): {}", speaker->speakerId, speaker->name, e.what()); - return false; - } + if (mono) { + for (int16_t sample : m_resampleBuffer) { + samples.push(sample); + samples.push(sample); + } + } + else { + for (int16_t sample : m_resampleBuffer) + samples.push(sample); + } + } + playSpeaker(speaker, channels); + } + return true; + } + catch (StarException const& e) { + Logger::error("Voice: Error receiving voice data for speaker #{} ('{}'): {}", speaker->speakerId, speaker->name, e.what()); + return false; + } } void Voice::setInput(bool input) { @@ -615,31 +620,31 @@ OpusEncoder* Voice::createEncoder(int channels) { void Voice::resetEncoder() { int channels = encoderChannels(); - MutexLocker locker(m_threadMutex); + MutexLocker locker(m_threadMutex); m_encoder.reset(createEncoder(channels)); - int bitrate = m_bitrate > 0 ? (int)m_bitrate : (channels == 2 ? 50000 : 24000); + int bitrate = m_bitrate > 0 ? (int)m_bitrate : (channels == 2 ? 50000 : 24000); opus_encoder_ctl(m_encoder.get(), OPUS_SET_BITRATE(bitrate)); } void Voice::resetDevice() { - if (shouldEnableInput()) - openDevice(); - else - closeDevice(); + if (shouldEnableInput()) + openDevice(); + else + closeDevice(); } void Voice::openDevice() { closeDevice(); m_applicationController->openAudioInputDevice( - m_deviceName ? m_deviceName->utf8Ptr() : nullptr, - VOICE_SAMPLE_RATE, - m_deviceChannels = encoderChannels(), - this, - [](void* userdata, uint8_t* stream, int len) { + m_deviceName ? m_deviceName->utf8Ptr() : nullptr, + VOICE_SAMPLE_RATE, + m_deviceChannels = encoderChannels(), + this, + [](void* userdata, uint8_t* stream, int len) { ((Voice*)(userdata))->readAudioData(stream, len); } - ); + ); m_deviceOpen = true; } @@ -649,81 +654,81 @@ void Voice::closeDevice() { return; m_applicationController->closeAudioInputDevice(); - m_clientSpeaker->playing = false; - m_clientSpeaker->decibelLevel = -96.0f; + m_clientSpeaker->playing = false; + m_clientSpeaker->decibelLevel = -96.0f; m_deviceOpen = false; } bool Voice::playSpeaker(SpeakerPtr const& speaker, int) { - if (speaker->playing || speaker->audioStream->samples.size() < speaker->minimumPlaySamples) - return false; + if (speaker->playing || speaker->audioStream->samples.size() < speaker->minimumPlaySamples) + return false; - if (!speaker->playing) { - speaker->lastPlayTime = Time::monotonicMilliseconds(); - speaker->playing = true; - MutexLocker lock(m_activeSpeakersMutex); - m_activeSpeakers.insert(speaker); - } - return true; + if (!speaker->playing) { + speaker->lastPlayTime = Time::monotonicMilliseconds(); + speaker->playing = true; + MutexLocker lock(m_activeSpeakersMutex); + m_activeSpeakers.insert(speaker); + } + return true; } void Voice::thread() { - while (true) { - MutexLocker locker(m_threadMutex); + while (true) { + MutexLocker locker(m_threadMutex); - m_threadCond.wait(m_threadMutex); - if (m_stopThread) - return; + m_threadCond.wait(m_threadMutex); + if (m_stopThread) + return; - { - MutexLocker locker(m_captureMutex); - ByteArray encoded(VOICE_MAX_PACKET_SIZE, 0); - size_t frameSamples = VOICE_FRAME_SIZE * (size_t)m_deviceChannels; - while (m_capturedChunksFrames >= VOICE_FRAME_SIZE) { - std::vector samples; - samples.reserve(frameSamples); - size_t samplesLeft = frameSamples; - while (samplesLeft && !m_capturedChunks.empty()) { - auto& front = m_capturedChunks.front(); - if (front.exhausted()) - m_capturedChunks.pop(); - else - samplesLeft -= front.takeSamples(samples, samplesLeft); - } - m_capturedChunksFrames -= VOICE_FRAME_SIZE; + { + MutexLocker locker(m_captureMutex); + ByteArray encoded(VOICE_MAX_PACKET_SIZE, 0); + size_t frameSamples = VOICE_FRAME_SIZE * (size_t)m_deviceChannels; + while (m_capturedChunksFrames >= VOICE_FRAME_SIZE) { + std::vector samples; + samples.reserve(frameSamples); + size_t samplesLeft = frameSamples; + while (samplesLeft && !m_capturedChunks.empty()) { + auto& front = m_capturedChunks.front(); + if (front.exhausted()) + m_capturedChunks.pop(); + else + samplesLeft -= front.takeSamples(samples, samplesLeft); + } + m_capturedChunksFrames -= VOICE_FRAME_SIZE; - if (m_inputAmplitude != 1.0f) { - for (size_t i = 0; i != samples.size(); ++i) - samples[i] *= m_inputAmplitude; - } + if (m_inputAmplitude != 1.0f) { + for (size_t i = 0; i != samples.size(); ++i) + samples[i] *= m_inputAmplitude; + } - if (int encodedSize = opus_encode(m_encoder.get(), samples.data(), VOICE_FRAME_SIZE, (unsigned char*)encoded.ptr(), encoded.size())) { - if (encodedSize == 1) - continue; + if (int encodedSize = opus_encode(m_encoder.get(), samples.data(), VOICE_FRAME_SIZE, (unsigned char*)encoded.ptr(), encoded.size())) { + if (encodedSize == 1) + continue; - encoded.resize(encodedSize); + encoded.resize(encodedSize); - { - MutexLocker lock(m_encodeMutex); - m_encodedChunks.emplace_back(std::move(encoded)); - m_encodedChunksLength += encodedSize; + { + MutexLocker locker(m_encodeMutex); + m_encodedChunks.emplace_back(std::move(encoded)); + m_encodedChunksLength += encodedSize; - encoded = ByteArray(VOICE_MAX_PACKET_SIZE, 0); - } + encoded = ByteArray(VOICE_MAX_PACKET_SIZE, 0); + } - //Logger::info("Voice: encoded Opus chunk {} samples -> {} bytes", frameSamples, encodedSize); - } - else if (encodedSize < 0) - Logger::error("Voice: Opus encode error {}", opus_strerror(encodedSize)); - } - } + //Logger::info("Voice: encoded Opus chunk {} samples -> {} bytes", frameSamples, encodedSize); + } + else if (encodedSize < 0) + Logger::error("Voice: Opus encode error {}", opus_strerror(encodedSize)); + } + } - continue; + continue; - locker.unlock(); - Thread::yield(); - } - return; + locker.unlock(); + Thread::yield(); + } + return; } } \ No newline at end of file diff --git a/source/frontend/StarVoice.hpp b/source/frontend/StarVoice.hpp index 08d01d1..59daeaa 100644 --- a/source/frontend/StarVoice.hpp +++ b/source/frontend/StarVoice.hpp @@ -89,7 +89,7 @@ public: atomic playing = 0; atomic decibelLevel = -96.0f; atomic volume = 1.0f; - atomic> channelVolumes = std::array{1.0f, 1.0f}; + Vec2F channelVolumes = Vec2F::filled(1.f); unsigned int minimumPlaySamples = 4096;