Files
orchid/src/libs/scheduler/thread_pool.rs
Lawrence Bethlenfalvy ed0d64d52e Almost Alpha
Massive improvements across the board.

One day I'll adopt incremental commits.
2024-02-24 00:31:00 +00:00

173 lines
6.0 KiB
Rust

//! A thread pool for executing tasks in parallel, spawning threads as workload
//! increases and terminating them as tasks finish. This is not terribly
//! efficient, its main design goal is to parallelize blocking I/O calls.
//!
//! This is the abstract implementation of the scheduler.
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{sync_channel, SyncSender};
use std::sync::{Arc, Mutex};
use std::thread::spawn;
/// A trait for a task dispatched on a [ThreadPool]. The task owns all relevant
/// data, is safe to pass between threads and is executed only once.
pub trait Task: Send + 'static {
/// Execute the task. At a minimum, this involves signaling some other thread,
/// otherwise the task has no effect.
fn run(self);
}
impl<F: FnOnce() + Send + 'static> Task for F {
fn run(self) { self() }
}
/// An async unit of work that produces some result, see [Task]. This can be
/// wrapped in a generic reporter to create a task.
pub trait Query: Send + 'static {
/// The value produced by the query
type Result: Send + 'static;
/// Execute the query, producing some value which can then be sent to another
/// thread
fn run(self) -> Self::Result;
/// Associate the query with a reporter expressed in a plain function.
/// Note that because every lambda has a distinct type and every thread pool
/// runs exactly one type of task, this can appear only once in the code for
/// a given thread pool. It is practical in a narrow set of cases, most of the
/// time however you are better off defining an explicit reporter.
fn then<F: FnOnce(Self::Result) + Send + 'static>(self, callback: F) -> QueryTask<Self, F>
where Self: Sized {
QueryTask { query: self, callback }
}
}
impl<F: FnOnce() -> R + Send + 'static, R: Send + 'static> Query for F {
type Result = R;
fn run(self) -> Self::Result { self() }
}
/// A reporter that calls a statically known function with the result of a
/// query. Constructed with [Query::then]
pub struct QueryTask<Q: Query, F: FnOnce(Q::Result) + Send + 'static> {
query: Q,
callback: F,
}
impl<Q: Query, F: FnOnce(Q::Result) + Send + 'static> Task for QueryTask<Q, F> {
fn run(self) { (self.callback)(self.query.run()) }
}
enum Message<T: Task> {
Stop,
Task(T),
}
struct ThreadPoolData<T: Task> {
rdv_point: Mutex<Option<SyncSender<Message<T>>>>,
stopping: AtomicBool,
}
/// A thread pool to execute blocking I/O operations in parallel.
/// This thread pool is pretty inefficient for CPU-bound operations because it
/// spawns an unbounded number of concurrent threads and destroys them eagerly.
/// It is assumed that the tasks at hand are substnatially but not incomparably
/// more expensive than spawning a new thread.
///
/// If multiple threads finish their tasks, one waiting thread is kept, the
/// rest exit. If all threads are busy, new threads are spawned when tasks
/// arrive. To get rid of the last waiting thread, drop the thread pool.
///
/// ```
/// use orchidlang::libs::scheduler::thread_pool::{Task, ThreadPool};
///
/// struct MyTask(&'static str);
/// impl Task for MyTask {
/// fn run(self) { println!("{}", self.0) }
/// }
///
/// let pool = ThreadPool::new();
///
/// // spawns first thread
/// pool.submit(MyTask("foo"));
/// // probably spawns second thread
/// pool.submit(MyTask("bar"));
/// // either spawns third thread or reuses first
/// pool.submit(MyTask("baz"));
/// ```
pub struct ThreadPool<T: Task> {
data: Arc<ThreadPoolData<T>>,
}
impl<T: Task> ThreadPool<T> {
/// Create a new thread pool. This just initializes the threadsafe
/// datastructures used to synchronize tasks and doesn't spawn any threads.
/// The first submission spawns the first thread.
pub fn new() -> Self {
Self {
data: Arc::new(ThreadPoolData {
rdv_point: Mutex::new(None),
stopping: AtomicBool::new(false),
}),
}
}
/// Submit a task to the thread pool. This tries to send the task to the
/// waiting thread, or spawn a new one. If a thread is done with its task
/// and finds that it another thread is already waiting, it exits.
pub fn submit(&self, task: T) {
let mut standby = self.data.rdv_point.lock().unwrap();
if let Some(port) = standby.take() {
(port.try_send(Message::Task(task))).expect(
"This channel cannot be disconnected unless the receiver crashes
between registering the sender and blocking for receive, and it cannot
be full because it's taken before insertion",
);
} else {
drop(standby);
let data = self.data.clone();
// worker thread created if all current ones are busy
spawn(move || {
let mut cur_task = task;
loop {
// Handle the task
cur_task.run();
// Apply for a new task if no other thread is doing so already
let mut standby_spot = data.rdv_point.lock().unwrap();
if standby_spot.is_some() {
return; // exit if we would be the second in line
}
let (sender, receiver) = sync_channel(1);
*standby_spot = Some(sender);
drop(standby_spot);
if data.stopping.load(Ordering::SeqCst) {
return; // exit if the pool was dropped before we applied
}
// Wait for the next event on the pool
let msg = (receiver.recv()).expect("We are holding a reference");
match msg {
// repeat with next task
Message::Task(task) => cur_task = task,
// exit if the pool is dropped
Message::Stop => return,
}
}
});
}
}
}
impl<T: Task> Default for ThreadPool<T> {
fn default() -> Self { Self::new() }
}
impl<T: Task> Drop for ThreadPool<T> {
// Ensure all threads exit properly
fn drop(&mut self) {
self.data.stopping.store(true, Ordering::SeqCst);
let mut rdv_point = self.data.rdv_point.lock().unwrap();
if let Some(pending) = rdv_point.take() {
// the worker has read the value of `stopping`
let _ = pending.send(Message::Stop);
}
}
}