diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h index 03a63077f..7880091b1 100644 --- a/include/sys/dmu_impl.h +++ b/include/sys/dmu_impl.h @@ -276,6 +276,18 @@ typedef struct dmu_sendarg { boolean_t dsa_sent_end; } dmu_sendarg_t; +typedef void dmu_done_func_t(void *private, int error, int numbufsp, dmu_buf_t **dbpp); + +typedef struct dmu_op { + dmu_done_func_t dop_done_func; + void *dop_private; + dmu_buf_t **dop_dbp; + uint64_t dop_nblks; + boolean_t *dop_read; + void *dop_tag + int dop_err; +} dmu_op_t; + void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *); void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *); int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t, diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 8779eb358..0e5d162cc 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -478,6 +478,51 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag, return (err); } +void dmu_done_func(zio_t *zio) +{ + dmu_op_t *dop = zio->io_private; + dmu_buf_t **dbp = dop->dop_dbp; + uint64_t nblks = dop->dop_nblks, i; + dmu_done_func_t done_func = dop->dop_done_func; + void *tag = dop->dop_tag; + void *private = dop->dop_private; + boolean_t read = dop->dop_read; + int err = zio->io_err; + + if (done_func != NULL) + kmem_free(dop); + + if (err) { + dmu_buf_rele_array(dbp, nblks, tag); + goto out; + } + + /* wait for other io to complete */ + if (read) { + for (i = 0; i < nblks; i++) { + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; + mutex_enter(&db->db_mtx); + while (db->db_state == DB_READ || + db->db_state == DB_FILL) + cv_wait(&db->db_changed, &db->db_mtx); + if (db->db_state == DB_UNCACHED) + err = SET_ERROR(EIO); + mutex_exit(&db->db_mtx); + if (err) { + dmu_buf_rele_array(dbp, nblks, tag); + gptp pout; + } + } + } + + err = 0; +out: + if (done_func != NULL) + dop->dop_done(private, err, nblks, dbp; + else + dop->dop_err = err; +} + /* * Note: longer-term, we should modify all of the dmu_buf_*() interfaces * to take a held dnode rather than -- the lookup is wasteful, @@ -485,8 +530,9 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag, * whose dnodes are in the same block. */ static int -dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, - boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) +dmu_buf_hold_array_by_dnode(dnode_t *dn, dmu_done_func_t done_func, void *private, + uint64_t offset, uint64_t length, boolean_t read, void *tag, int *numbufsp, + dmu_buf_t ***dbpp, uint32_t flags) { dmu_buf_t **dbp; uint64_t blkid, nblks, i; @@ -523,8 +569,18 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, nblks = 1; } dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); + dmu_op_t *dop = kmem_zalloc(sizeof (dmu_op_t), KM_SLEEP); + + dop->dop_done_func = dmu_done_func; + dop->dop_private = private; + dop->dop_dbp = dbp; + dop->dop_nblks = nlks; + dop->dop_read = read; + dop->dop_tag = tag; + + zio = zio_root(dn->dn_objset->os_spa, &dmu_done_func, dop, + ZIO_FLAG_CANFAIL); - zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, 0, offset); for (i = 0; i < nblks; i++) { dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag); @@ -548,34 +604,21 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, } rw_exit(&dn->dn_struct_rwlock); - /* wait for async i/o */ - err = zio_wait(zio); - if (err) { - dmu_buf_rele_array(dbp, nblks, tag); - return (err); + if (done_func != NULL) { + return (0); } - /* wait for other io to complete */ - if (read) { - for (i = 0; i < nblks; i++) { - dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i]; - mutex_enter(&db->db_mtx); - while (db->db_state == DB_READ || - db->db_state == DB_FILL) - cv_wait(&db->db_changed, &db->db_mtx); - if (db->db_state == DB_UNCACHED) - err = SET_ERROR(EIO); - mutex_exit(&db->db_mtx); - if (err) { - dmu_buf_rele_array(dbp, nblks, tag); - return (err); - } - } + (void) zio_wait(zio); + + err = dop->dop_err; + kmem_free(dop); + + if (err == 0) { + *numbufsp = nblks; + *dbpp = dbp; } - *numbufsp = nblks; - *dbpp = dbp; - return (0); + return (err); } static int @@ -589,8 +632,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset, if (err) return (err); - err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp, DMU_READ_PREFETCH); + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset, + length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH); dnode_rele(dn, FTAG); @@ -608,8 +651,8 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset, DB_DNODE_ENTER(db); dn = DB_DNODE(db); - err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag, - numbufsp, dbpp, DMU_READ_PREFETCH); + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset, + length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH); DB_DNODE_EXIT(db); return (err); @@ -955,8 +998,9 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size, * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array_by_dnode(dn, offset, mylen, - TRUE, FTAG, &numbufs, &dbp, flags); + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, + offset, mylen, TRUE, FTAG, &numbufs, &dbp, + flags); if (err) break; @@ -1064,8 +1108,9 @@ dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, if (size == 0) return; - VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size, - FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH)); + VERIFY0(dmu_buf_hold_array_by_dnode(dn, NULL, NULL, + offset, size, FALSE, FTAG, &numbufs, &dbp, + DMU_READ_PREFETCH)); dmu_write_impl(dbp, numbufs, offset, size, buf, tx); dmu_buf_rele_array(dbp, numbufs, FTAG); } @@ -1391,8 +1436,8 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) * NB: we could do this block-at-a-time, but it's nice * to be reading in parallel. */ - err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, - TRUE, FTAG, &numbufs, &dbp, 0); + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, + uio->uio_loffset, size, TRUE, FTAG, &numbufs, &dbp, 0); if (err) return (err); @@ -1495,8 +1540,9 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx) int err = 0; int i; - err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size, - FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH); + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, + uio->uio_loffset, size, FALSE, FTAG, &numbufs, &dbp, + DMU_READ_PREFETCH); if (err) return (err);