spacepaste

  1.  
  2. diff --git a/include/sys/dmu_impl.h b/include/sys/dmu_impl.h
  3. index 03a63077f..7880091b1 100644
  4. --- a/include/sys/dmu_impl.h
  5. +++ b/include/sys/dmu_impl.h
  6. @@ -276,6 +276,18 @@ typedef struct dmu_sendarg {
  7. boolean_t dsa_sent_end;
  8. } dmu_sendarg_t;
  9. +typedef void dmu_done_func_t(void *private, int error, int numbufsp, dmu_buf_t **dbpp);
  10. +
  11. +typedef struct dmu_op {
  12. + dmu_done_func_t dop_done_func;
  13. + void *dop_private;
  14. + dmu_buf_t **dop_dbp;
  15. + uint64_t dop_nblks;
  16. + boolean_t *dop_read;
  17. + void *dop_tag
  18. + int dop_err;
  19. +} dmu_op_t;
  20. +
  21. void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
  22. void dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
  23. int dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
  24. diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c
  25. index 8779eb358..0e5d162cc 100644
  26. --- a/module/zfs/dmu.c
  27. +++ b/module/zfs/dmu.c
  28. @@ -478,6 +478,51 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
  29. return (err);
  30. }
  31. +void dmu_done_func(zio_t *zio)
  32. +{
  33. + dmu_op_t *dop = zio->io_private;
  34. + dmu_buf_t **dbp = dop->dop_dbp;
  35. + uint64_t nblks = dop->dop_nblks, i;
  36. + dmu_done_func_t done_func = dop->dop_done_func;
  37. + void *tag = dop->dop_tag;
  38. + void *private = dop->dop_private;
  39. + boolean_t read = dop->dop_read;
  40. + int err = zio->io_err;
  41. +
  42. + if (done_func != NULL)
  43. + kmem_free(dop);
  44. +
  45. + if (err) {
  46. + dmu_buf_rele_array(dbp, nblks, tag);
  47. + goto out;
  48. + }
  49. +
  50. + /* wait for other io to complete */
  51. + if (read) {
  52. + for (i = 0; i < nblks; i++) {
  53. + dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
  54. + mutex_enter(&db->db_mtx);
  55. + while (db->db_state == DB_READ ||
  56. + db->db_state == DB_FILL)
  57. + cv_wait(&db->db_changed, &db->db_mtx);
  58. + if (db->db_state == DB_UNCACHED)
  59. + err = SET_ERROR(EIO);
  60. + mutex_exit(&db->db_mtx);
  61. + if (err) {
  62. + dmu_buf_rele_array(dbp, nblks, tag);
  63. + gptp pout;
  64. + }
  65. + }
  66. + }
  67. +
  68. + err = 0;
  69. +out:
  70. + if (done_func != NULL)
  71. + dop->dop_done(private, err, nblks, dbp;
  72. + else
  73. + dop->dop_err = err;
  74. +}
  75. +
  76. /*
  77. * Note: longer-term, we should modify all of the dmu_buf_*() interfaces
  78. * to take a held dnode rather than <os, object> -- the lookup is wasteful,
  79. @@ -485,8 +530,9 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, uint32_t flags, void *tag,
  80. * whose dnodes are in the same block.
  81. */
  82. static int
  83. -dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
  84. - boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
  85. +dmu_buf_hold_array_by_dnode(dnode_t *dn, dmu_done_func_t done_func, void *private,
  86. + uint64_t offset, uint64_t length, boolean_t read, void *tag, int *numbufsp,
  87. + dmu_buf_t ***dbpp, uint32_t flags)
  88. {
  89. dmu_buf_t **dbp;
  90. uint64_t blkid, nblks, i;
  91. @@ -523,8 +569,18 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
  92. nblks = 1;
  93. }
  94. dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
  95. + dmu_op_t *dop = kmem_zalloc(sizeof (dmu_op_t), KM_SLEEP);
  96. +
  97. + dop->dop_done_func = dmu_done_func;
  98. + dop->dop_private = private;
  99. + dop->dop_dbp = dbp;
  100. + dop->dop_nblks = nlks;
  101. + dop->dop_read = read;
  102. + dop->dop_tag = tag;
  103. +
  104. + zio = zio_root(dn->dn_objset->os_spa, &dmu_done_func, dop,
  105. + ZIO_FLAG_CANFAIL);
  106. - zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
  107. blkid = dbuf_whichblock(dn, 0, offset);
  108. for (i = 0; i < nblks; i++) {
  109. dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
  110. @@ -548,34 +604,21 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
  111. }
  112. rw_exit(&dn->dn_struct_rwlock);
  113. - /* wait for async i/o */
  114. - err = zio_wait(zio);
  115. - if (err) {
  116. - dmu_buf_rele_array(dbp, nblks, tag);
  117. - return (err);
  118. + if (done_func != NULL) {
  119. + return (0);
  120. }
  121. - /* wait for other io to complete */
  122. - if (read) {
  123. - for (i = 0; i < nblks; i++) {
  124. - dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbp[i];
  125. - mutex_enter(&db->db_mtx);
  126. - while (db->db_state == DB_READ ||
  127. - db->db_state == DB_FILL)
  128. - cv_wait(&db->db_changed, &db->db_mtx);
  129. - if (db->db_state == DB_UNCACHED)
  130. - err = SET_ERROR(EIO);
  131. - mutex_exit(&db->db_mtx);
  132. - if (err) {
  133. - dmu_buf_rele_array(dbp, nblks, tag);
  134. - return (err);
  135. - }
  136. - }
  137. + (void) zio_wait(zio);
  138. +
  139. + err = dop->dop_err;
  140. + kmem_free(dop);
  141. +
  142. + if (err == 0) {
  143. + *numbufsp = nblks;
  144. + *dbpp = dbp;
  145. }
  146. - *numbufsp = nblks;
  147. - *dbpp = dbp;
  148. - return (0);
  149. + return (err);
  150. }
  151. static int
  152. @@ -589,8 +632,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
  153. if (err)
  154. return (err);
  155. - err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
  156. - numbufsp, dbpp, DMU_READ_PREFETCH);
  157. + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset,
  158. + length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH);
  159. dnode_rele(dn, FTAG);
  160. @@ -608,8 +651,8 @@ dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
  161. DB_DNODE_ENTER(db);
  162. dn = DB_DNODE(db);
  163. - err = dmu_buf_hold_array_by_dnode(dn, offset, length, read, tag,
  164. - numbufsp, dbpp, DMU_READ_PREFETCH);
  165. + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL, offset,
  166. + length, read, tag, numbufsp, dbpp, DMU_READ_PREFETCH);
  167. DB_DNODE_EXIT(db);
  168. return (err);
  169. @@ -955,8 +998,9 @@ dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
  170. * NB: we could do this block-at-a-time, but it's nice
  171. * to be reading in parallel.
  172. */
  173. - err = dmu_buf_hold_array_by_dnode(dn, offset, mylen,
  174. - TRUE, FTAG, &numbufs, &dbp, flags);
  175. + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
  176. + offset, mylen, TRUE, FTAG, &numbufs, &dbp,
  177. + flags);
  178. if (err)
  179. break;
  180. @@ -1064,8 +1108,9 @@ dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
  181. if (size == 0)
  182. return;
  183. - VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size,
  184. - FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
  185. + VERIFY0(dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
  186. + offset, size, FALSE, FTAG, &numbufs, &dbp,
  187. + DMU_READ_PREFETCH));
  188. dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
  189. dmu_buf_rele_array(dbp, numbufs, FTAG);
  190. }
  191. @@ -1391,8 +1436,8 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
  192. * NB: we could do this block-at-a-time, but it's nice
  193. * to be reading in parallel.
  194. */
  195. - err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
  196. - TRUE, FTAG, &numbufs, &dbp, 0);
  197. + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
  198. + uio->uio_loffset, size, TRUE, FTAG, &numbufs, &dbp, 0);
  199. if (err)
  200. return (err);
  201. @@ -1495,8 +1540,9 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
  202. int err = 0;
  203. int i;
  204. - err = dmu_buf_hold_array_by_dnode(dn, uio->uio_loffset, size,
  205. - FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH);
  206. + err = dmu_buf_hold_array_by_dnode(dn, NULL, NULL,
  207. + uio->uio_loffset, size, FALSE, FTAG, &numbufs, &dbp,
  208. + DMU_READ_PREFETCH);
  209. if (err)
  210. return (err);
  211.