diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index abf297e..a7c5179 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -104,14 +104,12 @@ target_sources(tomato PUBLIC
 	./chat_gui4.cpp
 
 	./content/content.hpp
-	#./content/stream_reader.hpp
-	#./content/stream_reader_sdl_audio.hpp
-	#./content/stream_reader_sdl_audio.cpp
-	#./content/stream_reader_sdl_video.hpp
-	#./content/stream_reader_sdl_video.cpp
 	./content/frame_stream2.hpp
 	./content/sdl_video_frame_stream2.hpp
 	./content/sdl_video_frame_stream2.cpp
+	./content/audio_stream.hpp
+	./content/sdl_audio_frame_stream2.hpp
+	./content/sdl_audio_frame_stream2.cpp
 )
 
 if (TOMATO_TOX_AV)
diff --git a/src/content/audio_stream.hpp b/src/content/audio_stream.hpp
new file mode 100644
index 0000000..9098a3c
--- /dev/null
+++ b/src/content/audio_stream.hpp
@@ -0,0 +1,69 @@
+#pragma once
+
+#include "./frame_stream2.hpp"
+
+#include <solanaceae/util/span.hpp>
+
+#include <cstdint>
+#include <variant>
+#include <vector>
+
+// raw audio
+// channels make samples interleaved,
+// planar channels are not supported
+struct AudioFrame {
+	// sequence number, to detect gaps
+	uint32_t seq {0};
+	// TODO: maybe use ts instead to discard old?
+	// since buffer size is variable, some timestamp would be needed to estimate the lost time
+
+	// samples per second
+	uint32_t sample_rate {48'000};
+
+	size_t channels {0};
+	std::variant<
+		std::vector<int16_t>, // S16, platform endianess
+		Span<int16_t>, // non owning variant, for direct consumption
+
+		std::vector<float>, // f32
+		Span<float> // non owning variant, for direct consumption
+	> buffer;
+
+	// helpers
+
+	bool isS16(void) const {
+		return
+			std::holds_alternative<std::vector<int16_t>>(buffer) ||
+			std::holds_alternative<Span<int16_t>>(buffer)
+		;
+	}
+	bool isF32(void) const {
+		return
+			std::holds_alternative<std::vector<float>>(buffer) ||
+			std::holds_alternative<Span<float>>(buffer)
+		;
+	}
+	template<typename T>
+	Span<T> getSpan(void) const {
+		static_assert(std::is_same_v<int16_t, T> || std::is_same_v<float, T>);
+		if constexpr (std::is_same_v<int16_t, T>) {
+			assert(isS16());
+			if (std::holds_alternative<std::vector<int16_t>>(buffer)) {
+				return Span<int16_t>{std::get<std::vector<int16_t>>(buffer)};
+			} else {
+				return std::get<Span<int16_t>>(buffer);
+			}
+		} else if constexpr (std::is_same_v<float, T>) {
+			assert(isF32());
+			if (std::holds_alternative<std::vector<float>>(buffer)) {
+				return Span<float>{std::get<std::vector<float>>(buffer)};
+			} else {
+				return std::get<Span<float>>(buffer);
+			}
+		}
+		return {};
+	}
+};
+
+using AudioFrameStream2I = FrameStream2I<AudioFrame>;
+
diff --git a/src/content/frame_stream2.hpp b/src/content/frame_stream2.hpp
index 85c5523..c6fa3b2 100644
--- a/src/content/frame_stream2.hpp
+++ b/src/content/frame_stream2.hpp
@@ -18,7 +18,7 @@
 //};
 
 template<typename FrameType>
-struct FrameStream2 {
+struct FrameStream2I {
 	// get number of available frames
 	[[nodiscard]] virtual int32_t size(void) = 0;
 
@@ -34,7 +34,7 @@ struct FrameStream2 {
 // needs count frames queue size
 // having ~1-2sec buffer size is often sufficent
 template<typename FrameType>
-struct QueuedFrameStream2 : public FrameStream2<FrameType> {
+struct QueuedFrameStream2 : public FrameStream2I<FrameType> {
 	using frame_type = FrameType;
 
 	rigtorp::SPSCQueue<FrameType> _queue;
@@ -74,9 +74,9 @@ struct QueuedFrameStream2 : public FrameStream2<FrameType> {
 	}
 };
 
-template<typename FrameType>
-struct QueuedFrameStream2Multiplexer : public FrameStream2<FrameType> {
-	using ReaderType = QueuedFrameStream2<FrameType>;
+template<typename FrameType, typename ReaderType = QueuedFrameStream2<FrameType>>
+struct QueuedFrameStream2Multiplexer : public FrameStream2I<FrameType> {
+	using reader_type_t = ReaderType;
 
 	// pointer stability
 	std::vector<std::unique_ptr<ReaderType>> _readers;
diff --git a/src/content/sdl_audio_frame_stream2.cpp b/src/content/sdl_audio_frame_stream2.cpp
new file mode 100644
index 0000000..d79b003
--- /dev/null
+++ b/src/content/sdl_audio_frame_stream2.cpp
@@ -0,0 +1,79 @@
+#include "./sdl_audio_frame_stream2.hpp"
+#include "SDL_audio.h"
+
+#include <iostream>
+#include <vector>
+
+SDLAudioInputDevice::SDLAudioInputDevice(void) : _stream{nullptr, &SDL_DestroyAudioStream} {
+	constexpr SDL_AudioSpec spec = { SDL_AUDIO_S16, 1, 48000 };
+
+	_stream = {
+		SDL_OpenAudioDeviceStream(SDL_AUDIO_DEVICE_DEFAULT_CAPTURE, &spec, nullptr, nullptr),
+		&SDL_DestroyAudioStream
+	};
+
+	if (!static_cast<bool>(_stream)) {
+		std::cerr << "SDL open audio device failed!\n";
+	}
+
+	const auto audio_device_id = SDL_GetAudioStreamDevice(_stream.get());
+	SDL_ResumeAudioDevice(audio_device_id);
+
+	static constexpr size_t buffer_size {512}; // in samples
+	const auto interval_ms {buffer_size/(spec.freq * 1000)};
+
+	_thread = std::thread([this, interval_ms, spec](void) {
+		while (!_thread_should_quit) {
+			//static std::vector<int16_t> buffer(buffer_size);
+			static AudioFrame tmp_frame {
+				0, // TODO: seq
+				spec.freq, spec.channels,
+				std::vector<int16_t>(buffer_size)
+			};
+
+			auto& buffer = std::get<std::vector<int16_t>>(tmp_frame.buffer);
+			buffer.resize(buffer_size);
+
+			const auto read_bytes = SDL_GetAudioStreamData(
+				_stream.get(),
+				buffer.data(),
+				buffer.size()*sizeof(int16_t)
+			);
+			//if (read_bytes != 0) {
+				//std::cerr << "read " << read_bytes << "/" << buffer.size()*sizeof(int16_t) << " audio bytes\n";
+			//}
+
+			// no new frame yet, or error
+			if (read_bytes <= 0) {
+				// only sleep 1/5, we expected a frame
+				std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(interval_ms/5)));
+				continue;
+			}
+
+			buffer.resize(read_bytes/sizeof(int16_t)); // this might be costly?
+
+			bool someone_listening {false};
+			someone_listening = push(tmp_frame);
+
+			if (someone_listening) {
+				// double the interval on acquire
+				std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(interval_ms/2)));
+			} else {
+				std::cerr << "i guess no one is listening\n";
+				// we just sleep 32x as long, bc no one is listening
+				// with the hardcoded settings, this is ~320ms
+				// TODO: just hardcode something like 500ms?
+				// TODO: suspend
+				std::this_thread::sleep_for(std::chrono::milliseconds(int64_t(interval_ms*32)));
+			}
+		}
+	});
+}
+
+SDLAudioInputDevice::~SDLAudioInputDevice(void) {
+	// TODO: pause audio device?
+	_thread_should_quit = true;
+	_thread.join();
+	// TODO: what to do if readers are still present?
+}
+
diff --git a/src/content/sdl_audio_frame_stream2.hpp b/src/content/sdl_audio_frame_stream2.hpp
new file mode 100644
index 0000000..d27d829
--- /dev/null
+++ b/src/content/sdl_audio_frame_stream2.hpp
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "./frame_stream2.hpp"
+#include "./audio_stream.hpp"
+
+#include <SDL3/SDL.h>
+
+#include <cstdint>
+#include <variant>
+#include <vector>
+#include <thread>
+
+// we dont have to multiplex ourself, because sdl streams and virtual devices already do this, but we do it anyway
+using SDLAudioInputFrameStream2Multiplexer = QueuedFrameStream2Multiplexer<AudioFrame>;
+using SDLAudioInputFrameStream2 = SDLAudioInputFrameStream2Multiplexer::reader_type_t; // just use the default for now
+
+// object components?
+struct SDLAudioInputDevice : protected SDLAudioInputFrameStream2Multiplexer {
+	std::unique_ptr<SDL_AudioStream, decltype(&SDL_DestroyAudioStream)> _stream;
+
+	std::atomic<bool> _thread_should_quit {false};
+	std::thread _thread;
+
+	// construct source and start thread
+	// TODO: optimize so the thread is not always running
+	SDLAudioInputDevice(void);
+
+	// stops the thread and closes the device?
+	~SDLAudioInputDevice(void);
+
+	using SDLAudioInputFrameStream2Multiplexer::aquireReader;
+	using SDLAudioInputFrameStream2Multiplexer::releaseReader;
+};
+
+struct SDLAudioOutputDevice {
+};
+
diff --git a/src/content/sdl_video_frame_stream2.hpp b/src/content/sdl_video_frame_stream2.hpp
index 56a62e5..6448036 100644
--- a/src/content/sdl_video_frame_stream2.hpp
+++ b/src/content/sdl_video_frame_stream2.hpp
@@ -43,7 +43,7 @@ struct SDLVideoFrame {
 };
 
 using SDLVideoFrameStream2Multiplexer = QueuedFrameStream2Multiplexer<SDLVideoFrame>;
-using SDLVideoFrameStream2 = SDLVideoFrameStream2Multiplexer::ReaderType;
+using SDLVideoFrameStream2 = SDLVideoFrameStream2Multiplexer::reader_type_t; // just use the default for now
 
 struct SDLVideoCameraContent : protected SDLVideoFrameStream2Multiplexer {
 	// meh, empty default
diff --git a/src/main.cpp b/src/main.cpp
index 38e0acf..d57b54b 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -77,7 +77,23 @@ int main(int argc, char** argv) {
 	// optionally init audio and camera
 	if (SDL_Init(SDL_INIT_AUDIO) < 0) {
 		std::cerr << "SDL_Init AUDIO failed (" << SDL_GetError() << ")\n";
+	} else {
+		SDLAudioInputDevice aid;
+		auto* reader = aid.aquireReader();
+
+		for (size_t i = 0; i < 20; i++) {
+			std::this_thread::sleep_for(std::chrono::milliseconds(10));
+			auto new_frame_opt = reader->pop();
+			if (new_frame_opt.has_value()) {
+				std::cout << "audio frame was seq:" << new_frame_opt.value().seq << " sr:" << new_frame_opt.value().sample_rate << " " << (new_frame_opt.value().isS16()?"S16":"F32") << " l:" << (new_frame_opt.value().isS16()?new_frame_opt.value().getSpan<int16_t>().size:new_frame_opt.value().getSpan<float>().size) << "\n";
+			} else {
+				std::cout << "no audio frame\n";
+			}
+		}
+
+		aid.releaseReader(reader);
 	}
+
 	if (SDL_Init(SDL_INIT_CAMERA) < 0) {
 		std::cerr << "SDL_Init CAMERA failed (" << SDL_GetError() << ")\n";
 	} else { // HACK