Skip to content

Commit a51daff

Browse files
Reduce concurrent thread usage in media (#17567)
Follow on from #17558 Basically, we want to reduce the number of threads we want to use at a time, i.e. reduce the number of threads that are paused/blocked. We do this by returning from the thread when the consumer pauses the producer, rather than pausing in the thread. --------- Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com>
1 parent b05b2e1 commit a51daff

File tree

3 files changed

+90
-39
lines changed

3 files changed

+90
-39
lines changed

changelog.d/17567.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up responding to media requests.

synapse/media/_base.py

+46-39
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
import logging
2424
import os
25-
import threading
2625
import urllib
2726
from abc import ABC, abstractmethod
2827
from types import TracebackType
@@ -56,6 +55,7 @@
5655
run_in_background,
5756
)
5857
from synapse.util import Clock
58+
from synapse.util.async_helpers import DeferredEvent
5959
from synapse.util.stringutils import is_ascii
6060

6161
if TYPE_CHECKING:
@@ -620,10 +620,13 @@ class ThreadedFileSender:
620620
A producer that sends the contents of a file to a consumer, reading from the
621621
file on a thread.
622622
623-
This works by spawning a loop in a threadpool that repeatedly reads from the
624-
file and sends it to the consumer. The main thread communicates with the
625-
loop via two `threading.Event`, which controls when to start/pause reading
626-
and when to terminate.
623+
This works by having a loop in a threadpool repeatedly reading from the
624+
file, until the consumer pauses the producer. There is then a loop in the
625+
main thread that waits until the consumer resumes the producer and then
626+
starts reading in the threadpool again.
627+
628+
This is done to ensure that we're never waiting in the threadpool, as
629+
otherwise its easy to starve it of threads.
627630
"""
628631

629632
# How much data to read in one go.
@@ -643,12 +646,11 @@ def __init__(self, hs: "HomeServer") -> None:
643646

644647
# Signals if the thread should keep reading/sending data. Set means
645648
# continue, clear means pause.
646-
self.wakeup_event = threading.Event()
649+
self.wakeup_event = DeferredEvent(self.reactor)
647650

648651
# Signals if the thread should terminate, e.g. because the consumer has
649-
# gone away. Both this and `wakeup_event` should be set to terminate the
650-
# loop (otherwise the thread will block on `wakeup_event`).
651-
self.stop_event = threading.Event()
652+
# gone away.
653+
self.stop_writing = False
652654

653655
def beginFileTransfer(
654656
self, file: BinaryIO, consumer: interfaces.IConsumer
@@ -663,12 +665,7 @@ def beginFileTransfer(
663665

664666
# We set the wakeup signal as we should start producing immediately.
665667
self.wakeup_event.set()
666-
run_in_background(
667-
defer_to_threadpool,
668-
self.reactor,
669-
self.thread_pool,
670-
self._on_thread_read_loop,
671-
)
668+
run_in_background(self.start_read_loop)
672669

673670
return make_deferred_yieldable(self.deferred)
674671

@@ -686,50 +683,60 @@ def stopProducing(self) -> None:
686683
# Unregister the consumer so we don't try and interact with it again.
687684
self.consumer = None
688685

689-
# Terminate the thread loop.
686+
# Terminate the loop.
687+
self.stop_writing = True
690688
self.wakeup_event.set()
691-
self.stop_event.set()
692689

693690
if not self.deferred.called:
694691
self.deferred.errback(Exception("Consumer asked us to stop producing"))
695692

696-
def _on_thread_read_loop(self) -> None:
697-
"""This is the loop that happens on a thread."""
698-
693+
async def start_read_loop(self) -> None:
694+
"""This is the loop that drives reading/writing"""
699695
try:
700-
while not self.stop_event.is_set():
701-
# We wait for the producer to signal that the consumer wants
702-
# more data (or we should abort)
696+
while not self.stop_writing:
697+
# Start the loop in the threadpool to read data.
698+
more_data = await defer_to_threadpool(
699+
self.reactor, self.thread_pool, self._on_thread_read_loop
700+
)
701+
if not more_data:
702+
# Reached EOF, we can just return.
703+
return
704+
703705
if not self.wakeup_event.is_set():
704-
ret = self.wakeup_event.wait(self.TIMEOUT_SECONDS)
706+
ret = await self.wakeup_event.wait(self.TIMEOUT_SECONDS)
705707
if not ret:
706708
raise Exception("Timed out waiting to resume")
709+
except Exception:
710+
self._error(Failure())
711+
finally:
712+
self._finish()
707713

708-
# Check if we were woken up so that we abort the download
709-
if self.stop_event.is_set():
710-
return
714+
def _on_thread_read_loop(self) -> bool:
715+
"""This is the loop that happens on a thread.
711716
712-
# The file should always have been set before we get here.
713-
assert self.file is not None
717+
Returns:
718+
Whether there is more data to send.
719+
"""
714720

715-
chunk = self.file.read(self.CHUNK_SIZE)
716-
if not chunk:
717-
return
721+
while not self.stop_writing and self.wakeup_event.is_set():
722+
# The file should always have been set before we get here.
723+
assert self.file is not None
718724

719-
self.reactor.callFromThread(self._write, chunk)
725+
chunk = self.file.read(self.CHUNK_SIZE)
726+
if not chunk:
727+
return False
720728

721-
except Exception:
722-
self.reactor.callFromThread(self._error, Failure())
723-
finally:
724-
self.reactor.callFromThread(self._finish)
729+
self.reactor.callFromThread(self._write, chunk)
730+
731+
return True
725732

726733
def _write(self, chunk: bytes) -> None:
727734
"""Called from the thread to write a chunk of data"""
728735
if self.consumer:
729736
self.consumer.write(chunk)
730737

731738
def _error(self, failure: Failure) -> None:
732-
"""Called from the thread when there was a fatal error"""
739+
"""Called when there was a fatal error"""
733740
if self.consumer:
734741
self.consumer.unregisterProducer()
735742
self.consumer = None
@@ -738,7 +745,7 @@ def _error(self, failure: Failure) -> None:
738745
self.deferred.errback(failure)
739746

740747
def _finish(self) -> None:
741-
"""Called from the thread when it finishes (either on success or
748+
"""Called when we have finished writing (either on success or
742749
failure)."""
743750
if self.file:
744751
self.file.close()

synapse/util/async_helpers.py

+43
Original file line numberDiff line numberDiff line change
@@ -885,3 +885,46 @@ async def sleep(self, name: str, delay_ms: int) -> None:
885885
# Cancel the sleep if we were woken up
886886
if call.active():
887887
call.cancel()
888+
889+
890+
class DeferredEvent:
891+
"""Like threading.Event but for async code"""
892+
893+
def __init__(self, reactor: IReactorTime) -> None:
894+
self._reactor = reactor
895+
self._deferred: "defer.Deferred[None]" = defer.Deferred()
896+
897+
def set(self) -> None:
898+
if not self._deferred.called:
899+
self._deferred.callback(None)
900+
901+
def clear(self) -> None:
902+
if self._deferred.called:
903+
self._deferred = defer.Deferred()
904+
905+
def is_set(self) -> bool:
906+
return self._deferred.called
907+
908+
async def wait(self, timeout_seconds: float) -> bool:
909+
if self.is_set():
910+
return True
911+
912+
# Create a deferred that gets called in N seconds
913+
sleep_deferred: "defer.Deferred[None]" = defer.Deferred()
914+
call = self._reactor.callLater(timeout_seconds, sleep_deferred.callback, None)
915+
916+
try:
917+
await make_deferred_yieldable(
918+
defer.DeferredList(
919+
[sleep_deferred, self._deferred],
920+
fireOnOneCallback=True,
921+
fireOnOneErrback=True,
922+
consumeErrors=True,
923+
)
924+
)
925+
finally:
926+
# Cancel the sleep if we were woken up
927+
if call.active():
928+
call.cancel()
929+
930+
return self.is_set()

0 commit comments

Comments
 (0)