diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h
index 03a63077f..7880091b1 100644
--- a/include/sys/dmu_impl.h
+++ b/include/sys/dmu_impl.h
@@ -276,6 +276,18 @@ typedef struct dmu_sendarg {
 	boolean_t dsa_sent_end;
 } dmu_sendarg_t;
 
+typedef void dmu_done_func_t(void *private, int error, int numbufsp, dmu_buf_t **dbpp);
+
+typedef struct dmu_op {
+	dmu_done_func_t		dop_done_func;
+	void			*dop_private;
+	dmu_buf_t		**dop_dbp;
+	uint64_t		dop_nblks;
+	boolean_t		*dop_read;
+	void			*dop_tag
+	int			dop_err;
+} dmu_op_t;
+
 void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
 void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
 int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
index 8779eb358..0e5d162cc 100644
--- a/module/zfs/dmu.c
+++ b/module/zfs/dmu.c
@@ -478,6 +478,51 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
 	return (err);
 }
 
+void dmu_done_func(zio_t *zio)
+{
+	dmu_op_t *dop = zio->io_private;
+	dmu_buf_t **dbp = dop->dop_dbp;
+	uint64_t nblks = dop->dop_nblks, i;
+	dmu_done_func_t done_func = dop->dop_done_func;
+	void *tag = dop->dop_tag;
+	void *private = dop->dop_private;
+	boolean_t read = dop->dop_read;
+	int err = zio->io_err;
+
+	if (done_func != NULL)
+		kmem_free(dop);
+
+	if (err) {
+		dmu_buf_rele_array(dbp, nblks, tag);
+		goto out;
+	}
+
+	/* wait for other io to complete */
+	if (read) {
+		for (i = 0; i < nblks; i++) {
+			dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
+			mutex_enter(&db->db_mtx);
+			while (db->db_state == DB_READ ||
+			    db->db_state == DB_FILL)
+				cv_wait(&db->db_changed, &db->db_mtx);
+			if (db->db_state == DB_UNCACHED)
+				err = SET_ERROR(EIO);
+			mutex_exit(&db->db_mtx);
+			if (err) {
+				dmu_buf_rele_array(dbp, nblks, tag);
+				gptp pout;
+			}
+		}
+	}
+
+	err = 0;
+out:
+	if (done_func != NULL)
+		dop->dop_done(private, err, nblks, dbp;
+	else
+		dop->dop_err = err;
+}
+
 /*
  * Note: longer-term, we should modify all of the dmu_buf_*() interfaces
  * to take a held dnode rather than <os, object> -- the lookup is wasteful,
@@ -485,8 +530,9 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
  * whose dnodes are in the same block.
  */
 static int
-dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
-    boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+dmu_buf_hold_array_by_dnode(dnode_t *dn, dmu_done_func_t done_func, void *private,
+    uint64_t offset, uint64_t length, boolean_t read, void *tag, int *numbufsp,
+    dmu_buf_t ***dbpp, uint32_t flags)
 {
 	dmu_buf_t **dbp;
 	uint64_t blkid, nblks, i;
@@ -523,8 +569,18 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
 		nblks = 1;
 	}
 	dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
+	dmu_op_t *dop = kmem_zalloc(sizeof (dmu_op_t), KM_SLEEP);
+
+	dop->dop_done_func = dmu_done_func;
+	dop->dop_private = private;
+	dop->dop_dbp = dbp;
+	dop->dop_nblks = nlks;
+	dop->dop_read = read;
+	dop->dop_tag = tag;
+
+	zio = zio_root(dn->dn_objset->os_spa, &dmu_done_func, dop,
+	    ZIO_FLAG_CANFAIL);
 
-	zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
 	blkid = dbuf_whichblock(dn, 0, offset);
 	for (i = 0; i < nblks; i++) {
 		dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
@@ -548,34 +604,21 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
 	}
 	rw_exit(&dn->dn_struct_rwlock);
 
-	/* wait for async i/o */
-	err = zio_wait(zio);
-	if (err) {
-		dmu_buf_rele_array(dbp, nblks, tag);
-		return (err);
+	if (done_func != NULL) {
+		return (0);
 	}
 
-	/* wait for other io to complete */
-	if (read) {
-		for (i = 0; i < nblks; i++) {
-			dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
-			mutex_enter(&db->db_mtx);
-			while (db->db_state == DB_READ ||
-			    db->db_state == DB_FILL)
-				cv_wait(&db->db_changed, &db->db_mtx);
-			if (db->db_state == DB_UNCACHED)
-				err = SET_ERROR(EIO);
-			mutex_exit(&db->db_mtx);
-			if (err) {
-				dmu_buf_rele_array(dbp, nblks, tag);
-				return (err);
-			}
-		}
+	(void) zio_wait(zio);
+
+	err = dop->dop_err;
+	kmem_free(dop);
+
+	if (err == 0) {
+		*numbufsp = nblks;
+		*dbpp = dbp;
 	}
 
-	*numbufsp = nblks;
-	*dbpp = dbp;
-	return (0);
+	return (err);
 }
 
 static int
@@ -589,8 +632,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
 	if (err)
 		return (err);
 
-	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
-	    numbufsp, dbpp, DMU_READ_PREFETCH);
+	err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset,
+	    length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH);
 
 	dnode_rele(dn, FTAG);
 
@@ -608,8 +651,8 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
 
 	DB_DNODE_ENTER(db);
 	dn = DB_DNODE(db);
-	err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
-	    numbufsp, dbpp, DMU_READ_PREFETCH);
+	err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset,
+	    length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH);
 	DB_DNODE_EXIT(db);
 
 	return (err);
@@ -955,8 +998,9 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
 		 * NB: we could do this block-at-a-time, but it's nice
 		 * to be reading in parallel.
 		 */
-		err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
-		    TRUE, FTAG, &numbufs, &dbp, flags);
+		err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
+		    offset, mylen, TRUE, FTAG, &numbufs, &dbp,
+		    flags);
 		if (err)
 			break;
 
@@ -1064,8 +1108,9 @@ dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
 	if (size == 0)
 		return;
 
-	VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size,
-	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
+	VERIFY0(dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
+	    offset, size, FALSE, FTAG, &numbufs, &dbp,
+	    DMU_READ_PREFETCH));
 	dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
 	dmu_buf_rele_array(dbp, numbufs, FTAG);
 }
@@ -1391,8 +1436,8 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
 	 * NB: we could do this block-at-a-time, but it's nice
 	 * to be reading in parallel.
 	 */
-	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
-	    TRUE, FTAG, &numbufs, &dbp, 0);
+	err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
+	    uio->uio_loffset, size, TRUE, FTAG, &numbufs, &dbp, 0);
 	if (err)
 		return (err);
 
@@ -1495,8 +1540,9 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
 	int err = 0;
 	int i;
 
-	err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
-	    FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
+	err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
+	    uio->uio_loffset, size, FALSE, FTAG, &numbufs, &dbp,
+	    DMU_READ_PREFETCH);
 	if (err)
 		return (err);