|
| 1 | +/* |
| 2 | + * Copyright (C) 2020 Fotis Xenakis |
| 3 | + * |
| 4 | + * This work is open source software, licensed under the terms of the |
| 5 | + * BSD license as described in the LICENSE file in the top-level directory. |
| 6 | + */ |
| 7 | + |
| 8 | +#include <algorithm> |
| 9 | +#include <mutex> |
| 10 | + |
| 11 | +#include <osv/debug.h> |
| 12 | +#include <osv/uio.h> |
| 13 | + |
| 14 | +#include "fuse_kernel.h" |
| 15 | +#include "virtiofs.hh" |
| 16 | +#include "virtiofs_dax.hh" |
| 17 | +#include "virtiofs_i.hh" |
| 18 | + |
| 19 | +namespace virtiofs { |
| 20 | + |
| 21 | +int dax_manager::read(virtiofs_inode& inode, uint64_t file_handle, u64 read_amt, |
| 22 | + struct uio& uio, bool aggressive) |
| 23 | +{ |
| 24 | + std::lock_guard<mutex> guard {_lock}; |
| 25 | + |
| 26 | + // Necessary pre-declarations due to goto below |
| 27 | + size_t to_map; |
| 28 | + chunk nchunks; |
| 29 | + int error; |
| 30 | + mapping_part mp; |
| 31 | + chunk fstart = uio.uio_offset / _chunk_size; |
| 32 | + off_t coffset = uio.uio_offset % _chunk_size; // offset within chunk |
| 33 | + if (find(inode.nodeid, fstart, mp)) { |
| 34 | + // Requested data (at least some initial) is already mapped |
| 35 | + auto read_amt_act = std::min<size_t>(read_amt, |
| 36 | + (mp.nchunks * _chunk_size) - coffset); |
| 37 | + virtiofs_debug("inode %lld, found in DAX (foffset=%lld, len=%lld, " |
| 38 | + "moffset=%lld)\n", inode.nodeid, uio.uio_offset, read_amt_act, |
| 39 | + (mp.mstart * _chunk_size) + coffset); |
| 40 | + goto out; |
| 41 | + } |
| 42 | + |
| 43 | + // Map file |
| 44 | + to_map = coffset; // bytes to map |
| 45 | + if (aggressive) { |
| 46 | + // Map the rest of the file |
| 47 | + to_map += inode.attr.size - uio.uio_offset; |
| 48 | + } else { |
| 49 | + // Map just enough chunks to satisfy read_amt |
| 50 | + to_map += read_amt; |
| 51 | + } |
| 52 | + nchunks = to_map / _chunk_size; |
| 53 | + if (to_map % _chunk_size > 0) { |
| 54 | + nchunks++; |
| 55 | + } |
| 56 | + // NOTE: This relies on the fact that requesting a mapping longer than the |
| 57 | + // remaining file works (see mmap() on the host). If that didn't work, we |
| 58 | + // would have to request exact mappings (byte-granularity, rather than |
| 59 | + // chunk-granularity). |
| 60 | + error = map(inode.nodeid, file_handle, nchunks, fstart, mp, true); |
| 61 | + if (error) { |
| 62 | + return error; |
| 63 | + } |
| 64 | + |
| 65 | +out: |
| 66 | + auto req_data = _window->addr + (mp.mstart * _chunk_size) + coffset; |
| 67 | + auto read_amt_act = std::min<size_t>(read_amt, |
| 68 | + (mp.nchunks * _chunk_size) - coffset); |
| 69 | + // NOTE: It shouldn't be necessary to use the mmio* interface (i.e. volatile |
| 70 | + // accesses). From the spec: "Drivers map this shared memory region with |
| 71 | + // writeback caching as if it were regular RAM." |
| 72 | + error = uiomove(const_cast<void*>(req_data), read_amt_act, &uio); |
| 73 | + if (error) { |
| 74 | + kprintf("[virtiofs] inode %lld, uiomove failed\n", inode.nodeid); |
| 75 | + } |
| 76 | + return error; |
| 77 | +} |
| 78 | + |
| 79 | +int dax_manager::map(uint64_t nodeid, uint64_t file_handle, chunk nchunks, |
| 80 | + chunk fstart, mapping_part& mapped, bool evict) |
| 81 | +{ |
| 82 | + // If necessary, unmap just enough chunks |
| 83 | + auto empty = _window_chunks - first_empty(); |
| 84 | + if (evict && empty < nchunks) { |
| 85 | + mapping_part mp; |
| 86 | + auto error = unmap(nchunks - empty, mp, false); |
| 87 | + if (error) { |
| 88 | + return error; |
| 89 | + } |
| 90 | + empty += mp.nchunks; |
| 91 | + } |
| 92 | + auto to_map = std::min<chunk>(nchunks, empty); |
| 93 | + if (to_map == 0) { |
| 94 | + // The window is full and evict is false, or nchunks is 0 |
| 95 | + mapped.mstart = _window_chunks - empty; |
| 96 | + mapped.nchunks = 0; |
| 97 | + return (nchunks == 0) ? 0 : ENOBUFS; |
| 98 | + } |
| 99 | + |
| 100 | + // Map new chunks |
| 101 | + auto mstart = _window_chunks - empty; |
| 102 | + auto error = map_ll(nodeid, file_handle, to_map, fstart, mstart); |
| 103 | + if (error) { |
| 104 | + return error; |
| 105 | + } |
| 106 | + if (!_mappings.empty()) { |
| 107 | + auto& m {_mappings.back()}; |
| 108 | + if (m.nodeid == nodeid && m.fstart + m.nchunks == fstart) { |
| 109 | + // Extend previous mapping |
| 110 | + m.nchunks += to_map; |
| 111 | + mapped.mstart = mstart; |
| 112 | + mapped.nchunks = to_map; |
| 113 | + return 0; |
| 114 | + } |
| 115 | + } |
| 116 | + _mappings.emplace_back(nodeid, to_map, fstart, mstart); |
| 117 | + mapped.mstart = mstart; |
| 118 | + mapped.nchunks = to_map; |
| 119 | + return 0; |
| 120 | +} |
| 121 | + |
| 122 | +int dax_manager::unmap(chunk nchunks, mapping_part& unmapped, bool deep) |
| 123 | +{ |
| 124 | + // Determine necessary changes |
| 125 | + chunk to_unmap = 0; |
| 126 | + auto erase_first {_mappings.cend()}; |
| 127 | + chunk to_unmap_from_last = 0; |
| 128 | + for (auto it {_mappings.crbegin()}; |
| 129 | + to_unmap < nchunks && it != _mappings.crend(); it++) { |
| 130 | + |
| 131 | + if (it->nchunks <= nchunks - to_unmap) { |
| 132 | + // Remove *it |
| 133 | + erase_first = it.base() - 1; |
| 134 | + to_unmap += it->nchunks; |
| 135 | + } else { |
| 136 | + // Modify *it |
| 137 | + to_unmap_from_last = nchunks - to_unmap; |
| 138 | + to_unmap = nchunks; |
| 139 | + } |
| 140 | + } |
| 141 | + if (to_unmap == 0) { |
| 142 | + // The window is empty, or nchunks is 0 |
| 143 | + unmapped.mstart = first_empty(); |
| 144 | + unmapped.nchunks = 0; |
| 145 | + return (nchunks == 0) ? 0 : ENODATA; |
| 146 | + } |
| 147 | + |
| 148 | + // Apply changes |
| 149 | + if (deep) { |
| 150 | + auto mstart = first_empty() - to_unmap; |
| 151 | + auto error = unmap_ll(to_unmap, mstart); |
| 152 | + if (error) { |
| 153 | + return error; |
| 154 | + } |
| 155 | + } |
| 156 | + _mappings.erase(erase_first, _mappings.cend()); |
| 157 | + if (to_unmap_from_last > 0) { |
| 158 | + _mappings.back().nchunks -= to_unmap_from_last; |
| 159 | + } |
| 160 | + |
| 161 | + unmapped.mstart = first_empty(); |
| 162 | + unmapped.nchunks = to_unmap; |
| 163 | + return 0; |
| 164 | +} |
| 165 | + |
| 166 | +int dax_manager::map_ll(uint64_t nodeid, uint64_t file_handle, chunk nchunks, |
| 167 | + chunk fstart, chunk mstart) |
| 168 | +{ |
| 169 | + assert(mstart + nchunks <= _window_chunks); |
| 170 | + |
| 171 | + // NOTE: There are restrictions on the arguments to FUSE_SETUPMAPPING, from |
| 172 | + // the spec: "Alignment constraints for FUSE_SETUPMAPPING and |
| 173 | + // FUSE_REMOVEMAPPING requests are communicated during FUSE_INIT |
| 174 | + // negotiation"): |
| 175 | + // - foffset: multiple of map_alignment from FUSE_INIT |
| 176 | + // - len: not larger than remaining file? |
| 177 | + // - moffset: multiple of map_alignment from FUSE_INIT |
| 178 | + // In practice, map_alignment is the host's page size, because foffset and |
| 179 | + // moffset are passed to mmap() on the host. These are satisfied by |
| 180 | + // _chunk_size being a multiple of map_alignment. |
| 181 | + |
| 182 | + std::unique_ptr<fuse_setupmapping_in> in_args { |
| 183 | + new (std::nothrow) fuse_setupmapping_in()}; |
| 184 | + if (!in_args) { |
| 185 | + return ENOMEM; |
| 186 | + } |
| 187 | + in_args->fh = file_handle; |
| 188 | + in_args->foffset = fstart * _chunk_size; |
| 189 | + in_args->len = nchunks * _chunk_size; |
| 190 | + in_args->flags = 0; // Read-only |
| 191 | + in_args->moffset = mstart * _chunk_size; |
| 192 | + |
| 193 | + virtiofs_debug("inode %lld, setting up mapping (foffset=%lld, len=%lld, " |
| 194 | + "moffset=%lld)\n", nodeid, in_args->foffset, in_args->len, |
| 195 | + in_args->moffset); |
| 196 | + auto error = fuse_req_send_and_receive_reply(&_drv, FUSE_SETUPMAPPING, |
| 197 | + nodeid, in_args.get(), sizeof(*in_args), nullptr, 0); |
| 198 | + if (error) { |
| 199 | + kprintf("[virtiofs] inode %lld, mapping setup failed\n", nodeid); |
| 200 | + return error; |
| 201 | + } |
| 202 | + |
| 203 | + return 0; |
| 204 | +} |
| 205 | + |
| 206 | +int dax_manager::unmap_ll(chunk nchunks, chunk mstart) |
| 207 | +{ |
| 208 | + assert(mstart + nchunks <= _window_chunks); |
| 209 | + |
| 210 | + // NOTE: FUSE_REMOVEMAPPING accepts a fuse_removemapping_in followed by |
| 211 | + // fuse_removemapping_in.count fuse_removemapping_one arguments in general. |
| 212 | + auto in_args_size = sizeof(fuse_removemapping_in) + |
| 213 | + sizeof(fuse_removemapping_one); |
| 214 | + std::unique_ptr<u8> in_args {new (std::nothrow) u8[in_args_size]}; |
| 215 | + if (!in_args) { |
| 216 | + return ENOMEM; |
| 217 | + } |
| 218 | + auto r_in = new (in_args.get()) fuse_removemapping_in(); |
| 219 | + auto r_one = new (in_args.get() + sizeof(fuse_removemapping_in)) |
| 220 | + fuse_removemapping_one(); |
| 221 | + r_in->count = 1; |
| 222 | + r_one->moffset = mstart * _chunk_size; |
| 223 | + r_one->len = nchunks * _chunk_size; |
| 224 | + |
| 225 | + // The nodeid is irrelevant for the current implementation of |
| 226 | + // FUSE_REMOVEMAPPING. If it needed to be set, would we need to make a |
| 227 | + // request per inode? |
| 228 | + uint64_t nodeid = 0; |
| 229 | + |
| 230 | + virtiofs_debug("inode %lld, removing mapping (moffset=%lld, len=%lld)\n", |
| 231 | + nodeid, r_one->moffset, r_one->len); |
| 232 | + auto error = fuse_req_send_and_receive_reply(&_drv, FUSE_REMOVEMAPPING, |
| 233 | + nodeid, in_args.get(), in_args_size, nullptr, 0); |
| 234 | + if (error) { |
| 235 | + kprintf("[virtiofs] inode %lld, mapping removal failed\n", nodeid); |
| 236 | + return error; |
| 237 | + } |
| 238 | + |
| 239 | + return 0; |
| 240 | +} |
| 241 | + |
| 242 | +bool dax_manager::find(uint64_t nodeid, chunk fstart, mapping_part& found) const |
| 243 | +{ |
| 244 | + for (auto& m : _mappings) { |
| 245 | + if (m.nodeid == nodeid && |
| 246 | + m.fstart <= fstart && |
| 247 | + m.fstart + m.nchunks > fstart) { |
| 248 | + |
| 249 | + // m contains fstart |
| 250 | + auto excess = fstart - m.fstart; // excess contained in m |
| 251 | + found.nchunks = m.nchunks - excess; |
| 252 | + found.mstart = m.mstart + excess; |
| 253 | + return true; |
| 254 | + } |
| 255 | + } |
| 256 | + return false; |
| 257 | +} |
| 258 | + |
| 259 | +dax_manager::chunk dax_manager::first_empty() const |
| 260 | +{ |
| 261 | + if (_mappings.empty()) { |
| 262 | + return 0; |
| 263 | + } |
| 264 | + auto& m {_mappings.back()}; |
| 265 | + return m.mstart + m.nchunks; |
| 266 | +} |
| 267 | + |
| 268 | +} |
0 commit comments