Skip to content

Commit cd15f08

Browse files
committed
ChafaBatch: Improve thread allocation
This new version is much simpler and does away with the locking.
1 parent 9d344ce commit cd15f08

File tree

1 file changed

+26
-40
lines changed

1 file changed

+26
-40
lines changed

chafa/internal/chafa-batch.c

Lines changed: 26 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -25,66 +25,52 @@
2525
#include "chafa.h"
2626
#include "internal/chafa-batch.h"
2727

28-
#define DEBUG(x)
29-
30-
#define N_THREADS_OVERSHOOT 2
31-
32-
static GMutex chafa_batch_mutex;
33-
static GCond chafa_batch_cond;
34-
static gint chafa_batch_n_threads_waiting;
35-
static gint chafa_batch_n_threads_active;
28+
static gint chafa_batch_n_threads_global;
3629

3730
static gint
3831
allocate_threads (gint max_threads, gint n_batches)
3932
{
40-
gint n_threads_inactive;
33+
gint prev_n_threads;
4134
gint n_threads;
4235

4336
/* GThreadPool supports sharing threads between pools, but there is no
4437
* mechanism to manage the global thread count. If the batch API is being
4538
* called from multiple threads, we risk creating N * M workers, which
4639
* can result in hundreds of threads.
4740
*
48-
* Therefore, we use a mutex to gatekeep the global thread count. We
49-
* allocate threads greedily to maximize intra-batch parallelism (good for
50-
* animations and few/intermittent stills), with a small overshoot factor
51-
* to permute the allocation over time if there are lots of incoming
52-
* batches. In the latter case, most batches will eventually be assigned a
53-
* single thread each, which is ideal for inter-batch parallelism. */
54-
55-
g_atomic_int_inc (&chafa_batch_n_threads_waiting);
56-
g_mutex_lock (&chafa_batch_mutex);
41+
* Therefore, we maintain a global count of active threads and allocate
42+
* each caller's allotment from that. The minimum allocation is 1 thread,
43+
* in which case the operation is performed in the calling thread. Single-
44+
* threaded tasks are allowed to overshoot the maximum, so maximum
45+
* concurrency will be N + M - 1, where N is the number of calling threads
46+
* and M is the requisition from chafa_get_n_actual_threads(). For typical
47+
* workloads, average concurrency will likely be close to M. */
5748

58-
while (chafa_batch_n_threads_active >= max_threads + N_THREADS_OVERSHOOT)
59-
g_cond_wait (&chafa_batch_cond, &chafa_batch_mutex);
49+
prev_n_threads = 0;
50+
n_threads = MIN (max_threads, n_batches);
6051

61-
n_threads_inactive = max_threads - chafa_batch_n_threads_active
62-
- chafa_batch_n_threads_waiting + 1;
63-
n_threads_inactive = MAX (n_threads_inactive, 1);
64-
n_threads = MIN (n_threads_inactive, n_batches);
52+
/* Geometric backoff: The first iteration adds to the global, subsequent
53+
* iterations subtract until we're happy. */
6554

66-
chafa_batch_n_threads_active += n_threads;
67-
DEBUG (g_printerr ("ChafaBatch active threads: %d (+%d)\n", chafa_batch_n_threads_active, n_threads));
55+
for (;;)
56+
{
57+
gint next_n_global = n_threads
58+
+ g_atomic_int_add (&chafa_batch_n_threads_global,
59+
n_threads - prev_n_threads);
60+
if (next_n_global <= max_threads || n_threads == 1)
61+
break;
6862

69-
g_atomic_int_dec_and_test (&chafa_batch_n_threads_waiting);
70-
g_mutex_unlock (&chafa_batch_mutex);
63+
prev_n_threads = n_threads;
64+
n_threads /= 2;
65+
}
7166

7267
return n_threads;
7368
}
7469

7570
static void
76-
deallocate_threads (gint max_threads, gint n_threads)
71+
deallocate_threads (gint n_threads)
7772
{
78-
g_mutex_lock (&chafa_batch_mutex);
79-
chafa_batch_n_threads_active -= n_threads;
80-
81-
if (chafa_batch_n_threads_active + n_threads >= max_threads
82-
&& chafa_batch_n_threads_active < max_threads)
83-
{
84-
g_cond_broadcast (&chafa_batch_cond);
85-
}
86-
87-
g_mutex_unlock (&chafa_batch_mutex);
73+
g_atomic_int_add (&chafa_batch_n_threads_global, -n_threads);
8874
}
8975

9076
void
@@ -191,5 +177,5 @@ chafa_process_batches (gpointer ctx, GFunc batch_func, GFunc post_func, gint n_r
191177
}
192178

193179
g_free (batches);
194-
deallocate_threads (max_threads, n_threads);
180+
deallocate_threads (n_threads);
195181
}

0 commit comments

Comments
 (0)