On Mon, Jun 20, 2005 at 09:31:26PM +0530, Suparna Bhattacharya wrote: > > (1) Updating AIO to use wait-bit based filtered wakeups (me/wli) > > Status: Updated to 2.6.12-rc6, needs review > Enable wait bit based filtered wakeups to work for AIO. Replaces the wait queue entry in the kiocb with a wait bit structure, to allow enough space for the wait bit key. This adds an extra level of indirection in references to the wait queue entry in the iocb. Also, adds an extra check in aio_wake_function to allow for other kinds of waiters which do not require wait bit, based on the assumption that the key passed in would be NULL in such cases. Signed-off-by: Suparna Bhattacharya Signed-off-by: Benjamin LaHaise diff -purN --exclude=description 74_default-io_wait/fs/aio.c 75_aio-wait_bit/fs/aio.c --- 74_default-io_wait/fs/aio.c 2005-08-08 17:15:47.000000000 -0400 +++ 75_aio-wait_bit/fs/aio.c 2005-08-08 17:15:58.000000000 -0400 @@ -546,7 +546,7 @@ struct kioctx *lookup_ioctx(unsigned lon return ioctx; } -static int lock_kiocb_action(void *param) +static int lock_kiocb_action(void *param, wait_queue_t *wait) { schedule(); return 0; @@ -730,14 +730,14 @@ static ssize_t aio_run_iocb(struct kiocb * cause the iocb to be kicked for continuation (through * the aio_wake_function callback). */ - BUG_ON(current->io_wait != NULL); - current->io_wait = &iocb->ki_wait; + BUG_ON(!is_sync_wait(current->io_wait)); + current->io_wait = &iocb->ki_wait.wait; ret = retry(iocb); current->io_wait = NULL; if (-EIOCBRETRY != ret) { if (-EIOCBQUEUED != ret) { - BUG_ON(!list_empty(&iocb->ki_wait.task_list)); + BUG_ON(!list_empty(&iocb->ki_wait.wait.task_list)); aio_complete(iocb, ret, 0); /* must not access the iocb after this */ } @@ -746,7 +746,7 @@ static ssize_t aio_run_iocb(struct kiocb * Issue an additional retry to avoid waiting forever if * no waits were queued (e.g. in case of a short read). */ - if (list_empty(&iocb->ki_wait.task_list)) + if (list_empty(&iocb->ki_wait.wait.task_list)) kiocbSetKicked(iocb); } out: @@ -899,7 +899,7 @@ static void queue_kicked_iocb(struct kio unsigned long flags; int run = 0; - WARN_ON((!list_empty(&iocb->ki_wait.task_list))); + WARN_ON((!list_empty(&iocb->ki_wait.wait.task_list))); spin_lock_irqsave(&ctx->ctx_lock, flags); run = __queue_kicked_iocb(iocb); @@ -1477,7 +1477,13 @@ static ssize_t aio_setup_iocb(struct kio */ int aio_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait); + struct wait_bit_queue *wait_bit + = container_of(wait, struct wait_bit_queue, wait); + struct kiocb *iocb = container_of(wait_bit, struct kiocb, ki_wait); + + /* Assumes that a non-NULL key implies wait bit filtering */ + if (key && !test_wait_bit_key(wait, key)) + return 0; list_del_init(&wait->task_list); kick_iocb(iocb); @@ -1532,8 +1538,9 @@ int fastcall io_submit_one(struct kioctx req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf; req->ki_left = req->ki_nbytes = iocb->aio_nbytes; req->ki_opcode = iocb->aio_lio_opcode; - init_waitqueue_func_entry(&req->ki_wait, aio_wake_function); - INIT_LIST_HEAD(&req->ki_wait.task_list); + init_waitqueue_func_entry(&req->ki_wait.wait, aio_wake_function); + INIT_LIST_HEAD(&req->ki_wait.wait.task_list); + req->ki_run_list.next = req->ki_run_list.prev = NULL; req->ki_retried = 0; ret = aio_setup_iocb(req); diff -purN --exclude=description 74_default-io_wait/fs/read_write.c 75_aio-wait_bit/fs/read_write.c --- 74_default-io_wait/fs/read_write.c 2005-08-08 17:15:47.000000000 -0400 +++ 75_aio-wait_bit/fs/read_write.c 2005-08-08 17:15:58.000000000 -0400 @@ -251,7 +251,7 @@ ssize_t do_sync_read(struct file *filp, while (ret == -EIOCBRETRY) ret = wait_on_retry_sync_kiocb(&kiocb); - BUG_ON(!list_empty(&kiocb.ki_wait.task_list)); + BUG_ON(!list_empty(&kiocb.ki_wait.wait.task_list)); *ppos = kiocb.ki_pos; return ret; } @@ -305,7 +305,7 @@ ssize_t do_sync_write(struct file *filp, while (ret == -EIOCBRETRY) ret = wait_on_retry_sync_kiocb(&kiocb); - BUG_ON(!list_empty(&kiocb.ki_wait.task_list)); + BUG_ON(!list_empty(&kiocb.ki_wait.wait.task_list)); *ppos = kiocb.ki_pos; return ret; } diff -purN --exclude=description 74_default-io_wait/include/linux/aio.h 75_aio-wait_bit/include/linux/aio.h --- 74_default-io_wait/include/linux/aio.h 2005-08-08 17:15:47.000000000 -0400 +++ 75_aio-wait_bit/include/linux/aio.h 2005-08-08 17:15:58.000000000 -0400 @@ -73,7 +73,7 @@ struct kiocb { size_t ki_nbytes; /* copy of iocb->aio_nbytes */ char __user *ki_buf; /* remaining iocb->aio_buf */ size_t ki_left; /* remaining bytes */ - wait_queue_t ki_wait; + struct wait_bit_queue ki_wait; long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ @@ -94,7 +94,7 @@ struct kiocb { (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ - init_wait((&(x)->ki_wait)); \ + init_wait_bit_task((&(x)->ki_wait), current);\ } while (0) #define AIO_RING_MAGIC 0xa10a10a1 diff -purN --exclude=description 74_default-io_wait/kernel/wait.c 75_aio-wait_bit/kernel/wait.c --- 74_default-io_wait/kernel/wait.c 2005-08-08 17:15:55.000000000 -0400 +++ 75_aio-wait_bit/kernel/wait.c 2005-08-08 17:15:58.000000000 -0400 @@ -132,7 +132,8 @@ EXPORT_SYMBOL(autoremove_wake_function); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg) { - if (!test_wait_bit_key(wait, arg)) + /* Assumes that a non-NULL key implies wait bit filtering */ + if (arg && !test_wait_bit_key(wait, arg)) return 0; return autoremove_wake_function(wait, mode, sync, arg); } @@ -154,7 +155,12 @@ __wait_on_bit(wait_queue_head_t *wq, str if (test_bit(q->key.bit_nr, q->key.flags)) ret = (*action)(q->key.flags, &q->wait); } while (test_bit(q->key.bit_nr, q->key.flags) && !ret); - finish_wait(wq, &q->wait); + /* + * AIO retries require the wait queue entry to remain queued + * for async notification + */ + if (ret != -EIOCBRETRY) + finish_wait(wq, &q->wait); return ret; } EXPORT_SYMBOL(__wait_on_bit); @@ -183,7 +189,12 @@ __wait_on_bit_lock(wait_queue_head_t *wq break; } } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); - finish_wait(wq, &q->wait); + /* + * AIO retries require the wait queue entry to remain queued + * for async notification + */ + if (ret != -EIOCBRETRY) + finish_wait(wq, &q->wait); return ret; } EXPORT_SYMBOL(__wait_on_bit_lock); diff -purN --exclude=description 74_default-io_wait/lib/semaphore-sleepers.c 75_aio-wait_bit/lib/semaphore-sleepers.c --- 74_default-io_wait/lib/semaphore-sleepers.c 2005-08-16 15:43:24.000000000 -0400 +++ 75_aio-wait_bit/lib/semaphore-sleepers.c 2005-08-08 17:15:58.000000000 -0400 @@ -104,7 +104,7 @@ static int aio_down_wait(wait_queue_t *w * the wait_queue_head. */ if (!atomic_add_negative(sleepers - 1, &sem->count)) { - iocb->ki_wait.func = aio_wake_function; + iocb->ki_wait.wait.func = aio_wake_function; iocb->ki_cancel = NULL; sem->sleepers = 0; sem->aio_owner = iocb; @@ -124,15 +124,14 @@ static int cancel_aio_down(struct kiocb /* At this point, the kiocb is locked and even if we have kicked * it, the pointer to the semaphore is still valid. */ - struct semaphore *sem = iocb->ki_wait.private; + struct semaphore *sem = iocb->ki_wait.wait.private; unsigned long flags; int ret = 0; spin_lock_irqsave(&sem->wait.lock, flags); - if (!list_empty(&iocb->ki_wait.task_list)) { + if (!list_empty(&iocb->ki_wait.wait.task_list)) { /* Ensure aio_down_wait() can no longer be called. */ - list_del_init(&iocb->ki_wait.task_list); - iocb->ki_cancel = NULL; + list_del_init(&iocb->ki_wait.wait.task_list); fixup_down_trylock_locked(sem); event->res = is_sync_kiocb(iocb) ? -ERESTARTSYS : -EINTR; } else @@ -151,16 +150,16 @@ fastcall long __sched __aio_down(struct return 0; } - iocb->ki_wait.private = sem; - iocb->ki_wait.func = aio_down_wait; + iocb->ki_wait.wait.private = sem; + iocb->ki_wait.wait.func = aio_down_wait; spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &iocb->ki_wait); + add_wait_queue_exclusive_locked(&sem->wait, &iocb->ki_wait.wait); sem->sleepers++; iocb->ki_cancel = cancel_aio_down; - aio_down_wait(&iocb->ki_wait, 0, 0, NULL); + aio_down_wait(&iocb->ki_wait.wait, 0, 0, NULL); spin_unlock_irqrestore(&sem->wait.lock, flags); return -EIOCBRETRY; }