|
25 | 25 | #include "chafa.h"
|
26 | 26 | #include "internal/chafa-batch.h"
|
27 | 27 |
|
28 |
| -#define DEBUG(x) |
29 |
| - |
30 |
| -#define N_THREADS_OVERSHOOT 2 |
31 |
| - |
32 |
| -static GMutex chafa_batch_mutex; |
33 |
| -static GCond chafa_batch_cond; |
34 |
| -static gint chafa_batch_n_threads_waiting; |
35 |
| -static gint chafa_batch_n_threads_active; |
| 28 | +static gint chafa_batch_n_threads_global; |
36 | 29 |
|
37 | 30 | static gint
|
38 | 31 | allocate_threads (gint max_threads, gint n_batches)
|
39 | 32 | {
|
40 |
| - gint n_threads_inactive; |
| 33 | + gint prev_n_threads; |
41 | 34 | gint n_threads;
|
42 | 35 |
|
43 | 36 | /* GThreadPool supports sharing threads between pools, but there is no
|
44 | 37 | * mechanism to manage the global thread count. If the batch API is being
|
45 | 38 | * called from multiple threads, we risk creating N * M workers, which
|
46 | 39 | * can result in hundreds of threads.
|
47 | 40 | *
|
48 |
| - * Therefore, we use a mutex to gatekeep the global thread count. We |
49 |
| - * allocate threads greedily to maximize intra-batch parallelism (good for |
50 |
| - * animations and few/intermittent stills), with a small overshoot factor |
51 |
| - * to permute the allocation over time if there are lots of incoming |
52 |
| - * batches. In the latter case, most batches will eventually be assigned a |
53 |
| - * single thread each, which is ideal for inter-batch parallelism. */ |
54 |
| - |
55 |
| - g_atomic_int_inc (&chafa_batch_n_threads_waiting); |
56 |
| - g_mutex_lock (&chafa_batch_mutex); |
| 41 | + * Therefore, we maintain a global count of active threads and allocate |
| 42 | + * each caller's allotment from that. The minimum allocation is 1 thread, |
| 43 | + * in which case the operation is performed in the calling thread. Single- |
| 44 | + * threaded tasks are allowed to overshoot the maximum, so maximum |
| 45 | + * concurrency will be N + M - 1, where N is the number of calling threads |
| 46 | + * and M is the requisition from chafa_get_n_actual_threads(). For typical |
| 47 | + * workloads, average concurrency will likely be close to M. */ |
57 | 48 |
|
58 |
| - while (chafa_batch_n_threads_active >= max_threads + N_THREADS_OVERSHOOT) |
59 |
| - g_cond_wait (&chafa_batch_cond, &chafa_batch_mutex); |
| 49 | + prev_n_threads = 0; |
| 50 | + n_threads = MIN (max_threads, n_batches); |
60 | 51 |
|
61 |
| - n_threads_inactive = max_threads - chafa_batch_n_threads_active |
62 |
| - - chafa_batch_n_threads_waiting + 1; |
63 |
| - n_threads_inactive = MAX (n_threads_inactive, 1); |
64 |
| - n_threads = MIN (n_threads_inactive, n_batches); |
| 52 | + /* Geometric backoff: The first iteration adds to the global, subsequent |
| 53 | + * iterations subtract until we're happy. */ |
65 | 54 |
|
66 |
| - chafa_batch_n_threads_active += n_threads; |
67 |
| - DEBUG (g_printerr ("ChafaBatch active threads: %d (+%d)\n", chafa_batch_n_threads_active, n_threads)); |
| 55 | + for (;;) |
| 56 | + { |
| 57 | + gint next_n_global = n_threads |
| 58 | + + g_atomic_int_add (&chafa_batch_n_threads_global, |
| 59 | + n_threads - prev_n_threads); |
| 60 | + if (next_n_global <= max_threads || n_threads == 1) |
| 61 | + break; |
68 | 62 |
|
69 |
| - g_atomic_int_dec_and_test (&chafa_batch_n_threads_waiting); |
70 |
| - g_mutex_unlock (&chafa_batch_mutex); |
| 63 | + prev_n_threads = n_threads; |
| 64 | + n_threads /= 2; |
| 65 | + } |
71 | 66 |
|
72 | 67 | return n_threads;
|
73 | 68 | }
|
74 | 69 |
|
75 | 70 | static void
|
76 |
| -deallocate_threads (gint max_threads, gint n_threads) |
| 71 | +deallocate_threads (gint n_threads) |
77 | 72 | {
|
78 |
| - g_mutex_lock (&chafa_batch_mutex); |
79 |
| - chafa_batch_n_threads_active -= n_threads; |
80 |
| - |
81 |
| - if (chafa_batch_n_threads_active + n_threads >= max_threads |
82 |
| - && chafa_batch_n_threads_active < max_threads) |
83 |
| - { |
84 |
| - g_cond_broadcast (&chafa_batch_cond); |
85 |
| - } |
86 |
| - |
87 |
| - g_mutex_unlock (&chafa_batch_mutex); |
| 73 | + g_atomic_int_add (&chafa_batch_n_threads_global, -n_threads); |
88 | 74 | }
|
89 | 75 |
|
90 | 76 | void
|
@@ -191,5 +177,5 @@ chafa_process_batches (gpointer ctx, GFunc batch_func, GFunc post_func, gint n_r
|
191 | 177 | }
|
192 | 178 |
|
193 | 179 | g_free (batches);
|
194 |
| - deallocate_threads (max_threads, n_threads); |
| 180 | + deallocate_threads (n_threads); |
195 | 181 | }
|
0 commit comments