summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-27 12:40:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-27 12:40:40 -0700
commitb2e7b0ffa56185d04871c6fe317b36d30ce2861d (patch)
tree1db231c66cc6ec88b4929d2a3ca109ceb2402841
parentef479de65a700437159d59c00ee5cad6cfc2a89d (diff)
parent0f24e3c05afeac905a9df557264cc48f3363ab47 (diff)
Merge tag 'erofs-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang: "In this cycle, EROFS 48-bit block addressing is available to support massive datasets for model training and other large data archive use cases. In addition, byte-oriented encoded extents have been supported to reduce metadata sizes when using large configurations as well as to improve Zstd compression speed. There are some bugfixes and cleanups as usual. Summary: - Support 48-bit block addressing for large images - Introduce encoded extents to reduce metadata on larger pclusters - Enable unaligned compressed data to improve Zstd compression speed - Allow 16-byte volume names again - Minor cleanups" * tag 'erofs-for-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs: erofs: enable 48-bit layout support erofs: support unaligned encoded data erofs: implement encoded extent metadata erofs: add encoded extent on-disk definition erofs: initialize decompression early erofs: support dot-omitted directories erofs: implement 48-bit block addressing for unencoded inodes erofs: add 48-bit block addressing on-disk support erofs: simplify erofs_{read,fill}_inode() erofs: get rid of erofs_map_blocks_flatmode() erofs: move {in,out}pages into struct z_erofs_decompress_req erofs: clean up header parsing for ztailpacking and fragments erofs: simplify tail inline pcluster handling erofs: allow 16-byte volume name again erofs: get rid of erofs_kmap_type erofs: use Z_EROFS_LCLUSTER_TYPE_MAX to simplify switches
-rw-r--r--fs/erofs/Kconfig14
-rw-r--r--fs/erofs/compress.h2
-rw-r--r--fs/erofs/data.c148
-rw-r--r--fs/erofs/decompressor.c95
-rw-r--r--fs/erofs/decompressor_deflate.c8
-rw-r--r--fs/erofs/decompressor_lzma.c8
-rw-r--r--fs/erofs/decompressor_zstd.c8
-rw-r--r--fs/erofs/dir.c9
-rw-r--r--fs/erofs/erofs_fs.h191
-rw-r--r--fs/erofs/fileio.c2
-rw-r--r--fs/erofs/fscache.c2
-rw-r--r--fs/erofs/inode.c125
-rw-r--r--fs/erofs/internal.h47
-rw-r--r--fs/erofs/namei.c2
-rw-r--r--fs/erofs/super.c85
-rw-r--r--fs/erofs/sysfs.c2
-rw-r--r--fs/erofs/xattr.c12
-rw-r--r--fs/erofs/zdata.c102
-rw-r--r--fs/erofs/zmap.c286
-rw-r--r--include/trace/events/erofs.h2
20 files changed, 579 insertions, 571 deletions
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 6ea60661fa55..331e49cd1b8d 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -13,12 +13,12 @@ config EROFS_FS
smartphones with Android OS, LiveCDs and high-density hosts with
numerous containers;
- It also provides fixed-sized output compression support in order to
- improve storage density as well as keep relatively higher compression
- ratios and implements in-place decompression to reuse the file page
- for compressed data temporarily with proper strategies, which is
- quite useful to ensure guaranteed end-to-end runtime decompression
- performance under extremely memory pressure without extra cost.
+ It also provides transparent compression and deduplication support to
+ improve storage density and maintain relatively high compression
+ ratios, and it implements in-place decompression to temporarily reuse
+ page cache for compressed data using proper strategies, which is
+ quite useful for ensuring guaranteed end-to-end runtime decompression
+ performance under extreme memory pressure without extra cost.
See the documentation at <file:Documentation/filesystems/erofs.rst>
and the web pages at <https://erofs.docs.kernel.org> for more details.
@@ -97,7 +97,7 @@ config EROFS_FS_ZIP
select LZ4_DECOMPRESS
default y
help
- Enable fixed-sized output compression for EROFS.
+ Enable transparent compression support for EROFS file systems.
If you don't want to enable compression feature, say N.
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 65ff39401020..2704d7a592a5 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -11,6 +11,7 @@
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
+ unsigned int inpages, outpages;
unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
@@ -59,7 +60,6 @@ extern const struct z_erofs_decompressor *z_erofs_decomp[];
struct z_erofs_stream_dctx {
struct z_erofs_decompress_req *rq;
- unsigned int inpages, outpages; /* # of {en,de}coded pages */
int no, ni; /* the current {en,de}coded page # */
unsigned int avail_out; /* remaining bytes in the decoded buffer */
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 0cd6b5c4df98..2409d2ab0c28 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -25,8 +25,7 @@ void erofs_put_metabuf(struct erofs_buf *buf)
buf->page = NULL;
}
-void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
- enum erofs_kmap_type type)
+void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
{
pgoff_t index = offset >> PAGE_SHIFT;
struct folio *folio = NULL;
@@ -43,10 +42,10 @@ void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
return folio;
}
buf->page = folio_file_page(folio, index);
- if (!buf->base && type == EROFS_KMAP)
- buf->base = kmap_local_page(buf->page);
- if (type == EROFS_NO_KMAP)
+ if (!need_kmap)
return NULL;
+ if (!buf->base)
+ buf->base = kmap_local_page(buf->page);
return buf->base + (offset & ~PAGE_MASK);
}
@@ -65,64 +64,47 @@ void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb)
}
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
- erofs_off_t offset, enum erofs_kmap_type type)
+ erofs_off_t offset, bool need_kmap)
{
erofs_init_metabuf(buf, sb);
- return erofs_bread(buf, offset, type);
-}
-
-static int erofs_map_blocks_flatmode(struct inode *inode,
- struct erofs_map_blocks *map)
-{
- struct erofs_inode *vi = EROFS_I(inode);
- struct super_block *sb = inode->i_sb;
- bool tailendpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
- erofs_blk_t lastblk = erofs_iblks(inode) - tailendpacking;
-
- map->m_flags = EROFS_MAP_MAPPED; /* no hole in flat inodes */
- if (map->m_la < erofs_pos(sb, lastblk)) {
- map->m_pa = erofs_pos(sb, vi->raw_blkaddr) + map->m_la;
- map->m_plen = erofs_pos(sb, lastblk) - map->m_la;
- } else {
- DBG_BUGON(!tailendpacking);
- map->m_pa = erofs_iloc(inode) + vi->inode_isize +
- vi->xattr_isize + erofs_blkoff(sb, map->m_la);
- map->m_plen = inode->i_size - map->m_la;
-
- /* inline data should be located in the same meta block */
- if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
- erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
- map->m_flags |= EROFS_MAP_META;
- }
- return 0;
+ return erofs_bread(buf, offset, need_kmap);
}
int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
{
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct super_block *sb = inode->i_sb;
+ unsigned int unit, blksz = sb->s_blocksize;
struct erofs_inode *vi = EROFS_I(inode);
struct erofs_inode_chunk_index *idx;
- struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- u64 chunknr;
- unsigned int unit;
+ erofs_blk_t startblk, addrmask;
+ bool tailpacking;
erofs_off_t pos;
- void *kaddr;
+ u64 chunknr;
int err = 0;
trace_erofs_map_blocks_enter(inode, map, 0);
map->m_deviceid = 0;
- if (map->m_la >= inode->i_size) {
- /* leave out-of-bound access unmapped */
- map->m_flags = 0;
- map->m_plen = map->m_llen;
+ map->m_flags = 0;
+ if (map->m_la >= inode->i_size)
goto out;
- }
if (vi->datalayout != EROFS_INODE_CHUNK_BASED) {
- err = erofs_map_blocks_flatmode(inode, map);
+ tailpacking = (vi->datalayout == EROFS_INODE_FLAT_INLINE);
+ if (!tailpacking && vi->startblk == EROFS_NULL_ADDR)
+ goto out;
+ pos = erofs_pos(sb, erofs_iblks(inode) - tailpacking);
+
+ map->m_flags = EROFS_MAP_MAPPED;
+ if (map->m_la < pos) {
+ map->m_pa = erofs_pos(sb, vi->startblk) + map->m_la;
+ map->m_llen = pos - map->m_la;
+ } else {
+ map->m_pa = erofs_iloc(inode) + vi->inode_isize +
+ vi->xattr_isize + erofs_blkoff(sb, map->m_la);
+ map->m_llen = inode->i_size - map->m_la;
+ map->m_flags |= EROFS_MAP_META;
+ }
goto out;
}
@@ -135,45 +117,44 @@ int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map)
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize +
vi->xattr_isize, unit) + unit * chunknr;
- kaddr = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP);
- if (IS_ERR(kaddr)) {
- err = PTR_ERR(kaddr);
+ idx = erofs_read_metabuf(&buf, sb, pos, true);
+ if (IS_ERR(idx)) {
+ err = PTR_ERR(idx);
goto out;
}
map->m_la = chunknr << vi->chunkbits;
- map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits,
- round_up(inode->i_size - map->m_la, sb->s_blocksize));
-
- /* handle block map */
- if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) {
- __le32 *blkaddr = kaddr;
-
- if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) {
- map->m_flags = 0;
- } else {
- map->m_pa = erofs_pos(sb, le32_to_cpu(*blkaddr));
+ map->m_llen = min_t(erofs_off_t, 1UL << vi->chunkbits,
+ round_up(inode->i_size - map->m_la, blksz));
+ if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) {
+ addrmask = (vi->chunkformat & EROFS_CHUNK_FORMAT_48BIT) ?
+ BIT_ULL(48) - 1 : BIT_ULL(32) - 1;
+ startblk = (((u64)le16_to_cpu(idx->startblk_hi) << 32) |
+ le32_to_cpu(idx->startblk_lo)) & addrmask;
+ if ((startblk ^ EROFS_NULL_ADDR) & addrmask) {
+ map->m_deviceid = le16_to_cpu(idx->device_id) &
+ EROFS_SB(sb)->device_id_mask;
+ map->m_pa = erofs_pos(sb, startblk);
+ map->m_flags = EROFS_MAP_MAPPED;
+ }
+ } else {
+ startblk = le32_to_cpu(*(__le32 *)idx);
+ if (startblk != (u32)EROFS_NULL_ADDR) {
+ map->m_pa = erofs_pos(sb, startblk);
map->m_flags = EROFS_MAP_MAPPED;
}
- goto out_unlock;
- }
- /* parse chunk indexes */
- idx = kaddr;
- switch (le32_to_cpu(idx->blkaddr)) {
- case EROFS_NULL_ADDR:
- map->m_flags = 0;
- break;
- default:
- map->m_deviceid = le16_to_cpu(idx->device_id) &
- EROFS_SB(sb)->device_id_mask;
- map->m_pa = erofs_pos(sb, le32_to_cpu(idx->blkaddr));
- map->m_flags = EROFS_MAP_MAPPED;
- break;
}
-out_unlock:
erofs_put_metabuf(&buf);
out:
- if (!err)
- map->m_llen = map->m_plen;
+ if (!err) {
+ map->m_plen = map->m_llen;
+ /* inline data should be located in the same meta block */
+ if ((map->m_flags & EROFS_MAP_META) &&
+ erofs_blkoff(sb, map->m_pa) + map->m_plen > blksz) {
+ erofs_err(sb, "inline data across blocks @ nid %llu", vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ }
trace_erofs_map_blocks_exit(inode, map, 0, err);
return err;
}
@@ -192,7 +173,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
{
struct erofs_dev_context *devs = EROFS_SB(sb)->devs;
struct erofs_device_info *dif;
- erofs_off_t startoff, length;
+ erofs_off_t startoff;
int id;
erofs_fill_from_devinfo(map, sb, &EROFS_SB(sb)->dif0);
@@ -205,7 +186,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
return -ENODEV;
}
if (devs->flatdev) {
- map->m_pa += erofs_pos(sb, dif->mapped_blkaddr);
+ map->m_pa += erofs_pos(sb, dif->uniaddr);
up_read(&devs->rwsem);
return 0;
}
@@ -214,13 +195,12 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map)
} else if (devs->extra_devices && !devs->flatdev) {
down_read(&devs->rwsem);
idr_for_each_entry(&devs->tree, dif, id) {
- if (!dif->mapped_blkaddr)
+ if (!dif->uniaddr)
continue;
- startoff = erofs_pos(sb, dif->mapped_blkaddr);
- length = erofs_pos(sb, dif->blocks);
+ startoff = erofs_pos(sb, dif->uniaddr);
if (map->m_pa >= startoff &&
- map->m_pa < startoff + length) {
+ map->m_pa < startoff + erofs_pos(sb, dif->blocks)) {
map->m_pa -= startoff;
erofs_fill_from_devinfo(map, sb, dif);
break;
@@ -312,7 +292,7 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
iomap->type = IOMAP_INLINE;
- ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, EROFS_KMAP);
+ ptr = erofs_read_metabuf(&buf, sb, mdev.m_pa, true);
if (IS_ERR(ptr))
return PTR_ERR(ptr);
iomap->inline_data = ptr;
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 2b123b070a42..bf62e2836b60 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -9,14 +9,6 @@
#define LZ4_MAX_DISTANCE_PAGES (DIV_ROUND_UP(LZ4_DISTANCE_MAX, PAGE_SIZE) + 1)
-struct z_erofs_lz4_decompress_ctx {
- struct z_erofs_decompress_req *rq;
- /* # of encoded, decoded pages */
- unsigned int inpages, outpages;
- /* decoded block total length (used for in-place decompression) */
- unsigned int oend;
-};
-
static int z_erofs_load_lz4_config(struct super_block *sb,
struct erofs_super_block *dsb, void *data, int size)
{
@@ -55,10 +47,9 @@ static int z_erofs_load_lz4_config(struct super_block *sb,
* Fill all gaps with bounce pages if it's a sparse page list. Also check if
* all physical pages are consecutive, which can be seen for moderate CR.
*/
-static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
+static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- struct z_erofs_decompress_req *rq = ctx->rq;
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
BITS_PER_LONG)] = { 0 };
@@ -68,7 +59,7 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
unsigned int i, j, top;
top = 0;
- for (i = j = 0; i < ctx->outpages; ++i, ++j) {
+ for (i = j = 0; i < rq->outpages; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
@@ -114,36 +105,36 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
return kaddr ? 1 : 0;
}
-static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
+static void *z_erofs_lz4_handle_overlap(struct z_erofs_decompress_req *rq,
void *inpage, void *out, unsigned int *inputmargin,
int *maptype, bool may_inplace)
{
- struct z_erofs_decompress_req *rq = ctx->rq;
- unsigned int omargin, total, i;
+ unsigned int oend, omargin, total, i;
struct page **in;
void *src, *tmp;
if (rq->inplace_io) {
- omargin = PAGE_ALIGN(ctx->oend) - ctx->oend;
+ oend = rq->pageofs_out + rq->outputsize;
+ omargin = PAGE_ALIGN(oend) - oend;
if (rq->partial_decoding || !may_inplace ||
omargin < LZ4_DECOMPRESS_INPLACE_MARGIN(rq->inputsize))
goto docopy;
- for (i = 0; i < ctx->inpages; ++i)
- if (rq->out[ctx->outpages - ctx->inpages + i] !=
+ for (i = 0; i < rq->inpages; ++i)
+ if (rq->out[rq->outpages - rq->inpages + i] !=
rq->in[i])
goto docopy;
kunmap_local(inpage);
*maptype = 3;
- return out + ((ctx->outpages - ctx->inpages) << PAGE_SHIFT);
+ return out + ((rq->outpages - rq->inpages) << PAGE_SHIFT);
}
- if (ctx->inpages <= 1) {
+ if (rq->inpages <= 1) {
*maptype = 0;
return inpage;
}
kunmap_local(inpage);
- src = erofs_vm_map_ram(rq->in, ctx->inpages);
+ src = erofs_vm_map_ram(rq->in, rq->inpages);
if (!src)
return ERR_PTR(-ENOMEM);
*maptype = 1;
@@ -152,7 +143,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
docopy:
/* Or copy compressed data which can be overlapped to per-CPU buffer */
in = rq->in;
- src = z_erofs_get_gbuf(ctx->inpages);
+ src = z_erofs_get_gbuf(rq->inpages);
if (!src) {
DBG_BUGON(1);
kunmap_local(inpage);
@@ -197,10 +188,8 @@ int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf,
return 0;
}
-static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
- u8 *dst)
+static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, u8 *dst)
{
- struct z_erofs_decompress_req *rq = ctx->rq;
bool support_0padding = false, may_inplace = false;
unsigned int inputmargin;
u8 *out, *headpage, *src;
@@ -224,7 +213,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
}
inputmargin = rq->pageofs_in;
- src = z_erofs_lz4_handle_overlap(ctx, headpage, dst, &inputmargin,
+ src = z_erofs_lz4_handle_overlap(rq, headpage, dst, &inputmargin,
&maptype, may_inplace);
if (IS_ERR(src))
return PTR_ERR(src);
@@ -251,7 +240,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
if (maptype == 0) {
kunmap_local(headpage);
} else if (maptype == 1) {
- vm_unmap_ram(src, ctx->inpages);
+ vm_unmap_ram(src, rq->inpages);
} else if (maptype == 2) {
z_erofs_put_gbuf(src);
} else if (maptype != 3) {
@@ -264,54 +253,42 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- struct z_erofs_lz4_decompress_ctx ctx;
unsigned int dst_maptype;
void *dst;
int ret;
- ctx.rq = rq;
- ctx.oend = rq->pageofs_out + rq->outputsize;
- ctx.outpages = PAGE_ALIGN(ctx.oend) >> PAGE_SHIFT;
- ctx.inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
-
/* one optimized fast path only for non bigpcluster cases yet */
- if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
+ if (rq->inpages == 1 && rq->outpages == 1 && !rq->inplace_io) {
DBG_BUGON(!*rq->out);
dst = kmap_local_page(*rq->out);
dst_maptype = 0;
- goto dstmap_out;
- }
-
- /* general decoding path which can be used for all cases */
- ret = z_erofs_lz4_prepare_dstpages(&ctx, pagepool);
- if (ret < 0) {
- return ret;
- } else if (ret > 0) {
- dst = page_address(*rq->out);
- dst_maptype = 1;
} else {
- dst = erofs_vm_map_ram(rq->out, ctx.outpages);
- if (!dst)
- return -ENOMEM;
- dst_maptype = 2;
+ /* general decoding path which can be used for all cases */
+ ret = z_erofs_lz4_prepare_dstpages(rq, pagepool);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ dst = page_address(*rq->out);
+ dst_maptype = 1;
+ } else {
+ dst = erofs_vm_map_ram(rq->out, rq->outpages);
+ if (!dst)
+ return -ENOMEM;
+ dst_maptype = 2;
+ }
}
-
-dstmap_out:
- ret = z_erofs_lz4_decompress_mem(&ctx, dst);
+ ret = z_erofs_lz4_decompress_mem(rq, dst);
if (!dst_maptype)
kunmap_local(dst);
else if (dst_maptype == 2)
- vm_unmap_ram(dst, ctx.outpages);
+ vm_unmap_ram(dst, rq->outpages);
return ret;
}
static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- const unsigned int nrpages_in =
- PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT;
- const unsigned int nrpages_out =
- PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ const unsigned int nrpages_in = rq->inpages, nrpages_out = rq->outpages;
const unsigned int bs = rq->sb->s_blocksize;
unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt;
u8 *kin;
@@ -336,7 +313,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
rq->outputsize -= cur;
}
- for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) {
+ for (; rq->outputsize; rq->pageofs_in = 0, cur += insz, ni++) {
insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize);
rq->outputsize -= insz;
if (!rq->in[ni])
@@ -373,7 +350,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
unsigned int j;
if (!dctx->avail_out) {
- if (++dctx->no >= dctx->outpages || !rq->outputsize) {
+ if (++dctx->no >= rq->outpages || !rq->outputsize) {
erofs_err(sb, "insufficient space for decompressed data");
return -EFSCORRUPTED;
}
@@ -401,7 +378,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
}
if (dctx->inbuf_pos == dctx->inbuf_sz && rq->inputsize) {
- if (++dctx->ni >= dctx->inpages) {
+ if (++dctx->ni >= rq->inpages) {
erofs_err(sb, "invalid compressed data");
return -EFSCORRUPTED;
}
@@ -434,7 +411,7 @@ int z_erofs_stream_switch_bufs(struct z_erofs_stream_dctx *dctx, void **dst,
dctx->bounced = true;
}
- for (j = dctx->ni + 1; j < dctx->inpages; ++j) {
+ for (j = dctx->ni + 1; j < rq->inpages; ++j) {
if (rq->out[dctx->no] != rq->in[j])
continue;
tmppage = erofs_allocpage(pgpl, rq->gfp);
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index 5070d2fcc737..c6908a487054 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -101,13 +101,7 @@ static int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct super_block *sb = rq->sb;
- struct z_erofs_stream_dctx dctx = {
- .rq = rq,
- .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
- .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
- >> PAGE_SHIFT,
- .no = -1, .ni = 0,
- };
+ struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 };
struct z_erofs_deflate *strm;
int zerr, err;
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 40666815046f..832cffb83a66 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -150,13 +150,7 @@ static int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct super_block *sb = rq->sb;
- struct z_erofs_stream_dctx dctx = {
- .rq = rq,
- .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
- .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
- >> PAGE_SHIFT,
- .no = -1, .ni = 0,
- };
+ struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 };
struct xz_buf buf = {};
struct z_erofs_lzma *strm;
enum xz_ret xz_err;
diff --git a/fs/erofs/decompressor_zstd.c b/fs/erofs/decompressor_zstd.c
index 7e177304967e..b4bfe14229f9 100644
--- a/fs/erofs/decompressor_zstd.c
+++ b/fs/erofs/decompressor_zstd.c
@@ -139,13 +139,7 @@ static int z_erofs_zstd_decompress(struct z_erofs_decompress_req *rq,
struct page **pgpl)
{
struct super_block *sb = rq->sb;
- struct z_erofs_stream_dctx dctx = {
- .rq = rq,
- .inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT,
- .outpages = PAGE_ALIGN(rq->pageofs_out + rq->outputsize)
- >> PAGE_SHIFT,
- .no = -1, .ni = 0,
- };
+ struct z_erofs_stream_dctx dctx = { .rq = rq, .no = -1, .ni = 0 };
zstd_in_buffer in_buf = { NULL, 0, 0 };
zstd_out_buffer out_buf = { NULL, 0, 0 };
struct z_erofs_zstd *strm;
diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c
index c3b90abdee37..2fae209d0274 100644
--- a/fs/erofs/dir.c
+++ b/fs/erofs/dir.c
@@ -58,9 +58,9 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
struct erofs_dirent *de;
unsigned int nameoff, maxsize;
- de = erofs_bread(&buf, dbstart, EROFS_KMAP);
+ de = erofs_bread(&buf, dbstart, true);
if (IS_ERR(de)) {
- erofs_err(sb, "fail to readdir of logical block %u of nid %llu",
+ erofs_err(sb, "failed to readdir of logical block %llu of nid %llu",
erofs_blknr(sb, dbstart), EROFS_I(dir)->nid);
err = PTR_ERR(de);
break;
@@ -90,6 +90,11 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx)
ofs = 0;
}
erofs_put_metabuf(&buf);
+ if (EROFS_I(dir)->dot_omitted && ctx->pos == dir->i_size) {
+ if (!dir_emit_dot(f, ctx))
+ return 0;
+ ++ctx->pos;
+ }
return err < 0 ? err : 0;
}
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 199395ed1c1f..9581e9bf8192 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -30,25 +30,19 @@
#define EROFS_FEATURE_INCOMPAT_FRAGMENTS 0x00000020
#define EROFS_FEATURE_INCOMPAT_DEDUPE 0x00000020
#define EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES 0x00000040
+#define EROFS_FEATURE_INCOMPAT_48BIT 0x00000080
#define EROFS_ALL_FEATURE_INCOMPAT \
- (EROFS_FEATURE_INCOMPAT_ZERO_PADDING | \
- EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \
- EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \
- EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \
- EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \
- EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 | \
- EROFS_FEATURE_INCOMPAT_ZTAILPACKING | \
- EROFS_FEATURE_INCOMPAT_FRAGMENTS | \
- EROFS_FEATURE_INCOMPAT_DEDUPE | \
- EROFS_FEATURE_INCOMPAT_XATTR_PREFIXES)
+ ((EROFS_FEATURE_INCOMPAT_48BIT << 1) - 1)
#define EROFS_SB_EXTSLOT_SIZE 16
struct erofs_deviceslot {
u8 tag[64]; /* digest(sha256), etc. */
- __le32 blocks; /* total fs blocks of this device */
- __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */
- u8 reserved[56];
+ __le32 blocks_lo; /* total blocks count of this device */
+ __le32 uniaddr_lo; /* unified starting block of this device */
+ __le32 blocks_hi; /* total blocks count MSB */
+ __le16 uniaddr_hi; /* unified starting block MSB */
+ u8 reserved[50];
};
#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot)
@@ -59,13 +53,14 @@ struct erofs_super_block {
__le32 feature_compat;
__u8 blkszbits; /* filesystem block size in bit shift */
__u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */
-
- __le16 root_nid; /* nid of root directory */
+ union {
+ __le16 rootnid_2b; /* nid of root directory */
+ __le16 blocks_hi; /* (48BIT on) blocks count MSB */
+ } rb;
__le64 inos; /* total valid ino # (== f_files - f_favail) */
-
- __le64 build_time; /* compact inode time derivation */
- __le32 build_time_nsec; /* compact inode time derivation in ns scale */
- __le32 blocks; /* used for statfs */
+ __le64 epoch; /* base seconds used for compact inodes */
+ __le32 fixed_nsec; /* fixed nanoseconds for compact inodes */
+ __le32 blocks_lo; /* blocks count LSB */
__le32 meta_blkaddr; /* start block address of metadata area */
__le32 xattr_blkaddr; /* start block address of shared xattr area */
__u8 uuid[16]; /* 128-bit uuid for volume */
@@ -84,7 +79,10 @@ struct erofs_super_block {
__le32 xattr_prefix_start; /* start of long xattr prefixes */
__le64 packed_nid; /* nid of the special packed inode */
__u8 xattr_filter_reserved; /* reserved for xattr name filter */
- __u8 reserved2[23];
+ __u8 reserved[3];
+ __le32 build_time; /* seconds added to epoch for mkfs time */
+ __le64 rootnid_8b; /* (48BIT on) nid of root directory */
+ __u8 reserved2[8];
};
/*
@@ -115,19 +113,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode)
#define EROFS_I_VERSION_MASK 0x01
#define EROFS_I_DATALAYOUT_MASK 0x07
-#define EROFS_I_VERSION_BIT 0
-#define EROFS_I_DATALAYOUT_BIT 1
-#define EROFS_I_ALL_BIT 4
-
-#define EROFS_I_ALL ((1 << EROFS_I_ALL_BIT) - 1)
+#define EROFS_I_VERSION_BIT 0
+#define EROFS_I_DATALAYOUT_BIT 1
+#define EROFS_I_NLINK_1_BIT 4 /* non-directory compact inodes only */
+#define EROFS_I_DOT_OMITTED_BIT 4 /* (directories) omit the `.` dirent */
+#define EROFS_I_ALL ((1 << (EROFS_I_NLINK_1_BIT + 1)) - 1)
/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */
#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F
-/* with chunk indexes or just a 4-byte blkaddr array */
+/* with chunk indexes or just a 4-byte block array */
#define EROFS_CHUNK_FORMAT_INDEXES 0x0020
+#define EROFS_CHUNK_FORMAT_48BIT 0x0040
-#define EROFS_CHUNK_FORMAT_ALL \
- (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES)
+#define EROFS_CHUNK_FORMAT_ALL ((EROFS_CHUNK_FORMAT_48BIT << 1) - 1)
/* 32-byte on-disk inode */
#define EROFS_INODE_LAYOUT_COMPACT 0
@@ -140,45 +138,40 @@ struct erofs_inode_chunk_info {
};
union erofs_inode_i_u {
- /* total compressed blocks for compressed inodes */
- __le32 compressed_blocks;
-
- /* block address for uncompressed flat inodes */
- __le32 raw_blkaddr;
-
- /* for device files, used to indicate old/new device # */
- __le32 rdev;
-
- /* for chunk-based files, it contains the summary info */
+ __le32 blocks_lo; /* total blocks count (if compressed inodes) */
+ __le32 startblk_lo; /* starting block number (if flat inodes) */
+ __le32 rdev; /* device ID (if special inodes) */
struct erofs_inode_chunk_info c;
};
+union erofs_inode_i_nb {
+ __le16 nlink; /* if EROFS_I_NLINK_1_BIT is unset */
+ __le16 blocks_hi; /* total blocks count MSB */
+ __le16 startblk_hi; /* starting block number MSB */
+};
+
/* 32-byte reduced form of an ondisk inode */
struct erofs_inode_compact {
__le16 i_format; /* inode format hints */
-
-/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
__le16 i_xattr_icount;
__le16 i_mode;
- __le16 i_nlink;
+ union erofs_inode_i_nb i_nb;
__le32 i_size;
- __le32 i_reserved;
+ __le32 i_mtime;
union erofs_inode_i_u i_u;
__le32 i_ino; /* only used for 32-bit stat compatibility */
__le16 i_uid;
__le16 i_gid;
- __le32 i_reserved2;
+ __le32 i_reserved;
};
/* 64-byte complete form of an ondisk inode */
struct erofs_inode_extended {
__le16 i_format; /* inode format hints */
-
-/* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
__le16 i_xattr_icount;
__le16 i_mode;
- __le16 i_reserved;
+ union erofs_inode_i_nb i_nb;
__le64 i_size;
union erofs_inode_i_u i_u;
@@ -248,6 +241,7 @@ static inline unsigned int erofs_xattr_ibody_size(__le16 i_xattr_icount)
if (!i_xattr_icount)
return 0;
+ /* 1 header + n-1 * 4 bytes inline xattr to keep continuity */
return sizeof(struct erofs_xattr_ibody_header) +
sizeof(__u32) * (le16_to_cpu(i_xattr_icount) - 1);
}
@@ -266,11 +260,11 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e)
/* 4-byte block address array */
#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32)
-/* 8-byte inode chunk indexes */
+/* 8-byte inode chunk index */
struct erofs_inode_chunk_index {
- __le16 advise; /* always 0, don't care for now */
+ __le16 startblk_hi; /* starting block number MSB */
__le16 device_id; /* back-end storage id (with bits masked) */
- __le32 blkaddr; /* start block address of this inode chunk */
+ __le32 startblk_lo; /* starting block number of this chunk */
};
/* dirent sorts in alphabet order, thus we can do binary search */
@@ -337,21 +331,20 @@ struct z_erofs_zstd_cfgs {
#define Z_EROFS_ZSTD_MAX_DICT_SIZE Z_EROFS_PCLUSTER_MAX_SIZE
/*
- * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
- * e.g. for 4k logical cluster size, 4B if compacted 2B is off;
- * (4B) + 2B + (4B) if compacted 2B is on.
- * bit 1 : HEAD1 big pcluster (0 - off; 1 - on)
- * bit 2 : HEAD2 big pcluster (0 - off; 1 - on)
- * bit 3 : tailpacking inline pcluster (0 - off; 1 - on)
- * bit 4 : interlaced plain pcluster (0 - off; 1 - on)
- * bit 5 : fragment pcluster (0 - off; 1 - on)
+ * Enable COMPACTED_2B for EROFS_INODE_COMPRESSED_COMPACT inodes:
+ * 4B (disabled) vs 4B+2B+4B (enabled)
*/
#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001
+/* Enable extent metadata for EROFS_INODE_COMPRESSED_FULL inodes */
+#define Z_EROFS_ADVISE_EXTENTS 0x0001
#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002
#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004
#define Z_EROFS_ADVISE_INLINE_PCLUSTER 0x0008
#define Z_EROFS_ADVISE_INTERLACED_PCLUSTER 0x0010
#define Z_EROFS_ADVISE_FRAGMENT_PCLUSTER 0x0020
+/* Indicate the record size for each extent if extent metadata is used */
+#define Z_EROFS_ADVISE_EXTRECSZ_BIT 1
+#define Z_EROFS_ADVISE_EXTRECSZ_MASK 0x3
#define Z_EROFS_FRAGMENT_INODE_BIT 7
struct z_erofs_map_header {
@@ -363,45 +356,24 @@ struct z_erofs_map_header {
/* indicates the encoded size of tailpacking data */
__le16 h_idata_size;
};
+ __le32 h_extents_lo; /* extent count LSB */
};
__le16 h_advise;
- /*
- * bit 0-3 : algorithm type of head 1 (logical cluster type 01);
- * bit 4-7 : algorithm type of head 2 (logical cluster type 11).
- */
- __u8 h_algorithmtype;
- /*
- * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096;
- * bit 3-6 : reserved;
- * bit 7 : move the whole file into packed inode or not.
- */
- __u8 h_clusterbits;
+ union {
+ struct {
+ /* algorithm type (bit 0-3: HEAD1; bit 4-7: HEAD2) */
+ __u8 h_algorithmtype;
+ /*
+ * bit 0-3 : logical cluster bits - blkszbits
+ * bit 4-6 : reserved
+ * bit 7 : pack the whole file into packed inode
+ */
+ __u8 h_clusterbits;
+ };
+ __le16 h_extents_hi; /* extent count MSB */
+ };
};
-/*
- * On-disk logical cluster type:
- * 0 - literal (uncompressed) lcluster
- * 1,3 - compressed lcluster (for HEAD lclusters)
- * 2 - compressed lcluster (for NONHEAD lclusters)
- *
- * In detail,
- * 0 - literal (uncompressed) lcluster,
- * di_advise = 0
- * di_clusterofs = the literal data offset of the lcluster
- * di_blkaddr = the blkaddr of the literal pcluster
- *
- * 1,3 - compressed lcluster (for HEAD lclusters)
- * di_advise = 1 or 3
- * di_clusterofs = the decompressed data offset of the lcluster
- * di_blkaddr = the blkaddr of the compressed pcluster
- *
- * 2 - compressed lcluster (for NONHEAD lclusters)
- * di_advise = 2
- * di_clusterofs =
- * the decompressed data offset in its own HEAD lcluster
- * di_u.delta[0] = distance to this HEAD lcluster
- * di_u.delta[1] = distance to the next HEAD lcluster
- */
enum {
Z_EROFS_LCLUSTER_TYPE_PLAIN = 0,
Z_EROFS_LCLUSTER_TYPE_HEAD1 = 1,
@@ -415,11 +387,7 @@ enum {
/* (noncompact only, HEAD) This pcluster refers to partial decompressed data */
#define Z_EROFS_LI_PARTIAL_REF (1 << 15)
-/*
- * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the
- * compressed block count of a compressed extent (in logical clusters, aka.
- * block count of a pcluster).
- */
+/* Set on 1st non-head lcluster to store compressed block counti (in blocks) */
#define Z_EROFS_LI_D0_CBLKCNT (1 << 11)
struct z_erofs_lcluster_index {
@@ -428,19 +396,36 @@ struct z_erofs_lcluster_index {
__le16 di_clusterofs;
union {
- /* for the HEAD lclusters */
- __le32 blkaddr;
+ __le32 blkaddr; /* for the HEAD lclusters */
/*
- * for the NONHEAD lclusters
* [0] - distance to its HEAD lcluster
* [1] - distance to the next HEAD lcluster
*/
- __le16 delta[2];
+ __le16 delta[2]; /* for the NONHEAD lclusters */
} di_u;
};
-#define Z_EROFS_FULL_INDEX_ALIGN(end) \
- (ALIGN(end, 8) + sizeof(struct z_erofs_map_header) + 8)
+#define Z_EROFS_MAP_HEADER_END(end) \
+ (ALIGN(end, 8) + sizeof(struct z_erofs_map_header))
+#define Z_EROFS_FULL_INDEX_START(end) (Z_EROFS_MAP_HEADER_END(end) + 8)
+
+#define Z_EROFS_EXTENT_PLEN_PARTIAL BIT(27)
+#define Z_EROFS_EXTENT_PLEN_FMT_BIT 28
+#define Z_EROFS_EXTENT_PLEN_MASK ((Z_EROFS_PCLUSTER_MAX_SIZE << 1) - 1)
+struct z_erofs_extent {
+ __le32 plen; /* encoded length */
+ __le32 pstart_lo; /* physical offset */
+ __le32 pstart_hi; /* physical offset MSB */
+ __le32 lstart_lo; /* logical offset */
+ __le32 lstart_hi; /* logical offset MSB (>= 4GiB inodes) */
+ __u8 reserved[12]; /* for future use */
+};
+
+static inline int z_erofs_extent_recsize(unsigned int advise)
+{
+ return 4 << ((advise >> Z_EROFS_ADVISE_EXTRECSZ_BIT) &
+ Z_EROFS_ADVISE_EXTRECSZ_MASK);
+}
/* check the EROFS on-disk layout strictly at compile time */
static inline void erofs_check_ondisk_layout_definitions(void)
diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c
index 0ffd1c63beeb..bec4b56b3826 100644
--- a/fs/erofs/fileio.c
+++ b/fs/erofs/fileio.c
@@ -112,7 +112,7 @@ static int erofs_fileio_scan_folio(struct erofs_fileio *io, struct folio *folio)
void *src;
src = erofs_read_metabuf(&buf, inode->i_sb,
- map->m_pa + ofs, EROFS_KMAP);
+ map->m_pa + ofs, true);
if (IS_ERR(src)) {
err = PTR_ERR(src);
break;
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index ce3d8737df85..9c9129bca346 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -276,7 +276,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
size_t size = map.m_llen;
void *src;
- src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
+ src = erofs_read_metabuf(&buf, sb, map.m_pa, true);
if (IS_ERR(src))
return PTR_ERR(src);
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index d4b89407822a..a0ae0b4f7b01 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -27,29 +27,27 @@ static int erofs_fill_symlink(struct inode *inode, void *kaddr,
static int erofs_read_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
+ erofs_blk_t blkaddr = erofs_blknr(sb, erofs_iloc(inode));
+ unsigned int ofs = erofs_blkoff(sb, erofs_iloc(inode));
+ struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
struct erofs_sb_info *sbi = EROFS_SB(sb);
+ erofs_blk_t addrmask = BIT_ULL(48) - 1;
struct erofs_inode *vi = EROFS_I(inode);
- const erofs_off_t inode_loc = erofs_iloc(inode);
- erofs_blk_t blkaddr, nblks = 0;
- void *kaddr;
+ struct erofs_inode_extended *die, copied;
struct erofs_inode_compact *dic;
- struct erofs_inode_extended *die, *copied = NULL;
- union erofs_inode_i_u iu;
- struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- unsigned int ifmt, ofs;
+ unsigned int ifmt;
+ void *ptr;
int err = 0;
- blkaddr = erofs_blknr(sb, inode_loc);
- ofs = erofs_blkoff(sb, inode_loc);
-
- kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), EROFS_KMAP);
- if (IS_ERR(kaddr)) {
- erofs_err(sb, "failed to get inode (nid: %llu) page, err %ld",
- vi->nid, PTR_ERR(kaddr));
- return PTR_ERR(kaddr);
+ ptr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr), true);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ erofs_err(sb, "failed to get inode (nid: %llu) page, err %d",
+ vi->nid, err);
+ goto err_out;
}
- dic = kaddr + ofs;
+ dic = ptr + ofs;
ifmt = le16_to_cpu(dic->i_format);
if (ifmt & ~EROFS_I_ALL) {
erofs_err(sb, "unsupported i_format %u of nid %llu",
@@ -73,40 +71,34 @@ static int erofs_read_inode(struct inode *inode)
if (ofs + vi->inode_isize <= sb->s_blocksize) {
ofs += vi->inode_isize;
die = (struct erofs_inode_extended *)dic;
+ copied.i_u = die->i_u;
+ copied.i_nb = die->i_nb;
} else {
const unsigned int gotten = sb->s_blocksize - ofs;
- copied = kmalloc(vi->inode_isize, GFP_KERNEL);
- if (!copied) {
- err = -ENOMEM;
+ memcpy(&copied, dic, gotten);
+ ptr = erofs_read_metabuf(&buf, sb,
+ erofs_pos(sb, blkaddr + 1), true);
+ if (IS_ERR(ptr)) {
+ err = PTR_ERR(ptr);
+ erofs_err(sb, "failed to get inode payload block (nid: %llu), err %d",
+ vi->nid, err);
goto err_out;
}
- memcpy(copied, dic, gotten);
- kaddr = erofs_read_metabuf(&buf, sb, erofs_pos(sb, blkaddr + 1),
- EROFS_KMAP);
- if (IS_ERR(kaddr)) {
- erofs_err(sb, "failed to get inode payload block (nid: %llu), err %ld",
- vi->nid, PTR_ERR(kaddr));
- kfree(copied);
- return PTR_ERR(kaddr);
- }
ofs = vi->inode_isize - gotten;
- memcpy((u8 *)copied + gotten, kaddr, ofs);
- die = copied;
+ memcpy((u8 *)&copied + gotten, ptr, ofs);
+ die = &copied;
}
vi->xattr_isize = erofs_xattr_ibody_size(die->i_xattr_icount);
inode->i_mode = le16_to_cpu(die->i_mode);
- iu = die->i_u;
i_uid_write(inode, le32_to_cpu(die->i_uid));
i_gid_write(inode, le32_to_cpu(die->i_gid));
set_nlink(inode, le32_to_cpu(die->i_nlink));
- /* each extended inode has its own timestamp */
- inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
+ inode_set_mtime(inode, le64_to_cpu(die->i_mtime),
le32_to_cpu(die->i_mtime_nsec));
inode->i_size = le64_to_cpu(die->i_size);
- kfree(copied);
break;
case EROFS_INODE_LAYOUT_COMPACT:
vi->inode_isize = sizeof(struct erofs_inode_compact);
@@ -114,12 +106,20 @@ static int erofs_read_inode(struct inode *inode)
vi->xattr_isize = erofs_xattr_ibody_size(dic->i_xattr_icount);
inode->i_mode = le16_to_cpu(dic->i_mode);
- iu = dic->i_u;
+ copied.i_u = dic->i_u;
i_uid_write(inode, le16_to_cpu(dic->i_uid));
i_gid_write(inode, le16_to_cpu(dic->i_gid));
- set_nlink(inode, le16_to_cpu(dic->i_nlink));
- /* use build time for compact inodes */
- inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
+ if (!S_ISDIR(inode->i_mode) &&
+ ((ifmt >> EROFS_I_NLINK_1_BIT) & 1)) {
+ set_nlink(inode, 1);
+ copied.i_nb = dic->i_nb;
+ } else {
+ set_nlink(inode, le16_to_cpu(dic->i_nb.nlink));
+ copied.i_nb.startblk_hi = 0;
+ addrmask = BIT_ULL(32) - 1;
+ }
+ inode_set_mtime(inode, sbi->epoch + le32_to_cpu(dic->i_mtime),
+ sbi->fixed_nsec);
inode->i_size = le32_to_cpu(dic->i_size);
break;
@@ -136,19 +136,26 @@ static int erofs_read_inode(struct inode *inode)
goto err_out;
}
switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
case S_IFDIR:
+ vi->dot_omitted = (ifmt >> EROFS_I_DOT_OMITTED_BIT) & 1;
+ fallthrough;
+ case S_IFREG:
case S_IFLNK:
- vi->raw_blkaddr = le32_to_cpu(iu.raw_blkaddr);
+ vi->startblk = le32_to_cpu(copied.i_u.startblk_lo) |
+ ((u64)le16_to_cpu(copied.i_nb.startblk_hi) << 32);
+ if (vi->datalayout == EROFS_INODE_FLAT_PLAIN &&
+ !((vi->startblk ^ EROFS_NULL_ADDR) & addrmask))
+ vi->startblk = EROFS_NULL_ADDR;
+
if(S_ISLNK(inode->i_mode)) {
- err = erofs_fill_symlink(inode, kaddr, ofs);
+ err = erofs_fill_symlink(inode, ptr, ofs);
if (err)
goto err_out;
}
break;
case S_IFCHR:
case S_IFBLK:
- inode->i_rdev = new_decode_dev(le32_to_cpu(iu.rdev));
+ inode->i_rdev = new_decode_dev(le32_to_cpu(copied.i_u.rdev));
break;
case S_IFIFO:
case S_IFSOCK:
@@ -161,12 +168,15 @@ static int erofs_read_inode(struct inode *inode)
goto err_out;
}
- /* total blocks for compressed files */
- if (erofs_inode_is_data_compressed(vi->datalayout)) {
- nblks = le32_to_cpu(iu.compressed_blocks);
- } else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
+ if (erofs_inode_is_data_compressed(vi->datalayout))
+ inode->i_blocks = le32_to_cpu(copied.i_u.blocks_lo) <<
+ (sb->s_blocksize_bits - 9);
+ else
+ inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9;
+
+ if (vi->datalayout == EROFS_INODE_CHUNK_BASED) {
/* fill chunked inode summary info */
- vi->chunkformat = le16_to_cpu(iu.c.format);
+ vi->chunkformat = le16_to_cpu(copied.i_u.c.format);
if (vi->chunkformat & ~EROFS_CHUNK_FORMAT_ALL) {
erofs_err(sb, "unsupported chunk format %x of nid %llu",
vi->chunkformat, vi->nid);
@@ -176,22 +186,15 @@ static int erofs_read_inode(struct inode *inode)
vi->chunkbits = sb->s_blocksize_bits +
(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
}
- inode_set_mtime_to_ts(inode,
- inode_set_atime_to_ts(inode, inode_get_ctime(inode)));
+ inode_set_atime_to_ts(inode,
+ inode_set_ctime_to_ts(inode, inode_get_mtime(inode)));
inode->i_flags &= ~S_DAX;
if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
(vi->datalayout == EROFS_INODE_FLAT_PLAIN ||
vi->datalayout == EROFS_INODE_CHUNK_BASED))
inode->i_flags |= S_DAX;
-
- if (!nblks)
- /* measure inode.i_blocks as generic filesystems */
- inode->i_blocks = round_up(inode->i_size, sb->s_blocksize) >> 9;
- else
- inode->i_blocks = nblks << (sb->s_blocksize_bits - 9);
err_out:
- DBG_BUGON(err);
erofs_put_metabuf(&buf);
return err;
}
@@ -202,13 +205,10 @@ static int erofs_fill_inode(struct inode *inode)
int err;
trace_erofs_fill_inode(inode);
-
- /* read inode base data from disk */
err = erofs_read_inode(inode);
if (err)
return err;
- /* setup the new inode */
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &erofs_generic_iops;
@@ -229,15 +229,10 @@ static int erofs_fill_inode(struct inode *inode)
inode->i_op = &erofs_symlink_iops;
inode_nohighmem(inode);
break;
- case S_IFCHR:
- case S_IFBLK:
- case S_IFIFO:
- case S_IFSOCK:
+ default:
inode->i_op = &erofs_generic_iops;
init_special_inode(inode, inode->i_mode, inode->i_rdev);
return 0;
- default:
- return -EFSCORRUPTED;
}
mapping_set_large_folios(inode->i_mapping);
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 686d835eb533..4ac188d5d894 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -37,8 +37,7 @@ __printf(2, 3) void _erofs_printk(struct super_block *sb, const char *fmt, ...);
typedef u64 erofs_nid_t;
typedef u64 erofs_off_t;
-/* data type for filesystem-wide blocks number */
-typedef u32 erofs_blk_t;
+typedef u64 erofs_blk_t;
struct erofs_device_info {
char *path;
@@ -47,8 +46,8 @@ struct erofs_device_info {
struct dax_device *dax_dev;
u64 dax_part_off;
- u32 blocks;
- u32 mapped_blkaddr;
+ erofs_blk_t blocks;
+ erofs_blk_t uniaddr;
};
enum {
@@ -143,8 +142,8 @@ struct erofs_sb_info {
unsigned char blkszbits; /* filesystem block size in bit shift */
u32 sb_size; /* total superblock size */
- u32 build_time_nsec;
- u64 build_time;
+ u32 fixed_nsec;
+ s64 epoch;
/* what we really care is nid, rather than ino.. */
erofs_nid_t root_nid;
@@ -152,8 +151,6 @@ struct erofs_sb_info {
/* used for statfs, f_files - f_favail */
u64 inos;
- u8 uuid[16]; /* 128-bit uuid for volume */
- u8 volume_name[16]; /* volume name */
u32 feature_compat;
u32 feature_incompat;
@@ -199,11 +196,6 @@ enum {
EROFS_ZIP_CACHE_READAROUND
};
-enum erofs_kmap_type {
- EROFS_NO_KMAP, /* don't map the buffer */
- EROFS_KMAP, /* use kmap_local_page() to map the buffer */
-};
-
struct erofs_buf {
struct address_space *mapping;
struct file *file;
@@ -212,8 +204,8 @@ struct erofs_buf {
};
#define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL })
-#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits))
-#define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1))
+#define erofs_blknr(sb, pos) ((erofs_blk_t)((pos) >> (sb)->s_blocksize_bits))
+#define erofs_blkoff(sb, pos) ((pos) & ((sb)->s_blocksize - 1))
#define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits)
#define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits)
@@ -233,6 +225,7 @@ EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING)
EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
+EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
@@ -252,6 +245,7 @@ struct erofs_inode {
unsigned char datalayout;
unsigned char inode_isize;
+ bool dot_omitted;
unsigned int xattr_isize;
unsigned int xattr_name_filter;
@@ -259,7 +253,7 @@ struct erofs_inode {
unsigned int *xattr_shared_xattrs;
union {
- erofs_blk_t raw_blkaddr;
+ erofs_blk_t startblk;
struct {
unsigned short chunkformat;
unsigned char chunkbits;
@@ -268,15 +262,13 @@ struct erofs_inode {
struct {
unsigned short z_advise;
unsigned char z_algorithmtype[2];
- unsigned char z_logical_clusterbits;
- unsigned long z_tailextent_headlcn;
+ unsigned char z_lclusterbits;
union {
- struct {
- erofs_off_t z_idataoff;
- unsigned short z_idata_size;
- };
- erofs_off_t z_fragmentoff;
+ u64 z_tailextent_headlcn;
+ u64 z_extents;
};
+ erofs_off_t z_fragmentoff;
+ unsigned short z_idata_size;
};
#endif /* CONFIG_EROFS_FS_ZIP */
};
@@ -387,11 +379,10 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
erofs_off_t *offset, int *lengthp);
void erofs_unmap_metabuf(struct erofs_buf *buf);
void erofs_put_metabuf(struct erofs_buf *buf);
-void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset,
- enum erofs_kmap_type type);
+void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap);
void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb);
void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb,
- erofs_off_t offset, enum erofs_kmap_type type);
+ erofs_off_t offset, bool need_kmap);
int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev);
int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
@@ -448,6 +439,7 @@ int __init erofs_init_shrinker(void);
void erofs_exit_shrinker(void);
int __init z_erofs_init_subsystem(void);
void z_erofs_exit_subsystem(void);
+int z_erofs_init_super(struct super_block *sb);
unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
unsigned long nr_shrink);
int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
@@ -457,7 +449,6 @@ void z_erofs_put_gbuf(void *ptr);
int z_erofs_gbuf_growsize(unsigned int nrpages);
int __init z_erofs_gbuf_init(void);
void z_erofs_gbuf_exit(void);
-int erofs_init_managed_cache(struct super_block *sb);
int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb);
#else
static inline void erofs_shrinker_register(struct super_block *sb) {}
@@ -466,7 +457,7 @@ static inline int erofs_init_shrinker(void) { return 0; }
static inline void erofs_exit_shrinker(void) {}
static inline int z_erofs_init_subsystem(void) { return 0; }
static inline void z_erofs_exit_subsystem(void) {}
-static inline int erofs_init_managed_cache(struct super_block *sb) { return 0; }
+static inline int z_erofs_init_super(struct super_block *sb) { return 0; }
#endif /* !CONFIG_EROFS_FS_ZIP */
#ifdef CONFIG_EROFS_FS_BACKED_BY_FILE
diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c
index c94d0c1608a8..f7cf4f41af28 100644
--- a/fs/erofs/namei.c
+++ b/fs/erofs/namei.c
@@ -100,7 +100,7 @@ static void *erofs_find_target_block(struct erofs_buf *target,
struct erofs_dirent *de;
buf.mapping = dir->i_mapping;
- de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), EROFS_KMAP);
+ de = erofs_bread(&buf, erofs_pos(dir->i_sb, mid), true);
if (!IS_ERR(de)) {
const int nameoff = nameoff_from_disk(de->nameoff, bsz);
const int ndirents = nameoff / sizeof(*de);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 827b62665649..cadec6b1b554 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -94,7 +94,7 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
int len, i, cnt;
*offset = round_up(*offset, 4);
- ptr = erofs_bread(buf, *offset, EROFS_KMAP);
+ ptr = erofs_bread(buf, *offset, true);
if (IS_ERR(ptr))
return ptr;
@@ -110,7 +110,7 @@ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf,
for (i = 0; i < len; i += cnt) {
cnt = min_t(int, sb->s_blocksize - erofs_blkoff(sb, *offset),
len - i);
- ptr = erofs_bread(buf, *offset, EROFS_KMAP);
+ ptr = erofs_bread(buf, *offset, true);
if (IS_ERR(ptr)) {
kfree(buffer);
return ptr;
@@ -141,7 +141,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
struct erofs_deviceslot *dis;
struct file *file;
- dis = erofs_read_metabuf(buf, sb, *pos, EROFS_KMAP);
+ dis = erofs_read_metabuf(buf, sb, *pos, true);
if (IS_ERR(dis))
return PTR_ERR(dis);
@@ -178,8 +178,8 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb,
dif->file = file;
}
- dif->blocks = le32_to_cpu(dis->blocks);
- dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr);
+ dif->blocks = le32_to_cpu(dis->blocks_lo);
+ dif->uniaddr = le32_to_cpu(dis->uniaddr_lo);
sbi->total_blocks += dif->blocks;
*pos += EROFS_DEVT_SLOT_SIZE;
return 0;
@@ -255,7 +255,7 @@ static int erofs_read_superblock(struct super_block *sb)
void *data;
int ret;
- data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP);
+ data = erofs_read_metabuf(&buf, sb, 0, true);
if (IS_ERR(data)) {
erofs_err(sb, "cannot read erofs superblock");
return PTR_ERR(data);
@@ -268,7 +268,7 @@ static int erofs_read_superblock(struct super_block *sb)
goto out;
}
- sbi->blkszbits = dsb->blkszbits;
+ sbi->blkszbits = dsb->blkszbits;
if (sbi->blkszbits < 9 || sbi->blkszbits > PAGE_SHIFT) {
erofs_err(sb, "blkszbits %u isn't supported", sbi->blkszbits);
goto out;
@@ -299,7 +299,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->sb_size);
goto out;
}
- sbi->dif0.blocks = le32_to_cpu(dsb->blocks);
+ sbi->dif0.blocks = le32_to_cpu(dsb->blocks_lo);
sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr);
#ifdef CONFIG_EROFS_FS_XATTR
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
@@ -308,23 +308,20 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
- sbi->root_nid = le16_to_cpu(dsb->root_nid);
+ if (erofs_sb_has_48bit(sbi) && dsb->rootnid_8b) {
+ sbi->root_nid = le64_to_cpu(dsb->rootnid_8b);
+ sbi->dif0.blocks = (sbi->dif0.blocks << 32) |
+ le16_to_cpu(dsb->rb.blocks_hi);
+ } else {
+ sbi->root_nid = le16_to_cpu(dsb->rb.rootnid_2b);
+ }
sbi->packed_nid = le64_to_cpu(dsb->packed_nid);
sbi->inos = le64_to_cpu(dsb->inos);
- sbi->build_time = le64_to_cpu(dsb->build_time);
- sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
-
+ sbi->epoch = (s64)le64_to_cpu(dsb->epoch);
+ sbi->fixed_nsec = le32_to_cpu(dsb->fixed_nsec);
super_set_uuid(sb, (void *)dsb->uuid, sizeof(dsb->uuid));
- ret = strscpy(sbi->volume_name, dsb->volume_name,
- sizeof(dsb->volume_name));
- if (ret < 0) { /* -E2BIG */
- erofs_err(sb, "bad volume name without NIL terminator");
- ret = -EFSCORRUPTED;
- goto out;
- }
-
/* parse on-disk compression configurations */
ret = z_erofs_parse_cfgs(sb, dsb);
if (ret < 0)
@@ -333,6 +330,8 @@ static int erofs_read_superblock(struct super_block *sb)
/* handle multiple devices */
ret = erofs_scan_devices(sb, dsb);
+ if (erofs_sb_has_48bit(sbi))
+ erofs_info(sb, "EXPERIMENTAL 48-bit layout support in use. Use at your own risk!");
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "[deprecated] fscache-based on-demand read feature in use. Use at your own risk!");
out:
@@ -639,9 +638,16 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
else
sb->s_flags &= ~SB_POSIXACL;
-#ifdef CONFIG_EROFS_FS_ZIP
- xa_init(&sbi->managed_pslots);
-#endif
+ err = z_erofs_init_super(sb);
+ if (err)
+ return err;
+
+ if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
+ inode = erofs_iget(sb, sbi->packed_nid);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ sbi->packed_inode = inode;
+ }
inode = erofs_iget(sb, sbi->root_nid);
if (IS_ERR(inode))
@@ -653,24 +659,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
iput(inode);
return -EINVAL;
}
-
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
erofs_shrinker_register(sb);
- if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
- sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
- if (IS_ERR(sbi->packed_inode)) {
- err = PTR_ERR(sbi->packed_inode);
- sbi->packed_inode = NULL;
- return err;
- }
- }
- err = erofs_init_managed_cache(sb);
- if (err)
- return err;
-
err = erofs_xattr_prefixes_init(sb);
if (err)
return err;
@@ -806,6 +799,16 @@ static int erofs_init_fs_context(struct fs_context *fc)
return 0;
}
+static void erofs_drop_internal_inodes(struct erofs_sb_info *sbi)
+{
+ iput(sbi->packed_inode);
+ sbi->packed_inode = NULL;
+#ifdef CONFIG_EROFS_FS_ZIP
+ iput(sbi->managed_cache);
+ sbi->managed_cache = NULL;
+#endif
+}
+
static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi = EROFS_SB(sb);
@@ -815,6 +818,7 @@ static void erofs_kill_sb(struct super_block *sb)
kill_anon_super(sb);
else
kill_block_super(sb);
+ erofs_drop_internal_inodes(sbi);
fs_put_dax(sbi->dif0.dax_dev, NULL);
erofs_fscache_unregister_fs(sb);
erofs_sb_free(sbi);
@@ -825,17 +829,10 @@ static void erofs_put_super(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
- DBG_BUGON(!sbi);
-
erofs_unregister_sysfs(sb);
erofs_shrinker_unregister(sb);
erofs_xattr_prefixes_cleanup(sb);
-#ifdef CONFIG_EROFS_FS_ZIP
- iput(sbi->managed_cache);
- sbi->managed_cache = NULL;
-#endif
- iput(sbi->packed_inode);
- sbi->packed_inode = NULL;
+ erofs_drop_internal_inodes(sbi);
erofs_free_dev_context(sbi->devs);
sbi->devs = NULL;
erofs_fscache_unregister_fs(sb);
diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c
index 19d586273b70..dad4e6c6c155 100644
--- a/fs/erofs/sysfs.c
+++ b/fs/erofs/sysfs.c
@@ -81,6 +81,7 @@ EROFS_ATTR_FEATURE(sb_chksum);
EROFS_ATTR_FEATURE(ztailpacking);
EROFS_ATTR_FEATURE(fragments);
EROFS_ATTR_FEATURE(dedupe);
+EROFS_ATTR_FEATURE(48bit);
static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(zero_padding),
@@ -93,6 +94,7 @@ static struct attribute *erofs_feat_attrs[] = {
ATTR_LIST(ztailpacking),
ATTR_LIST(fragments),
ATTR_LIST(dedupe),
+ ATTR_LIST(48bit),
NULL,
};
ATTRIBUTE_GROUPS(erofs_feat);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index df2777e05661..9cf84717a92e 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -81,7 +81,7 @@ static int erofs_init_inode_xattrs(struct inode *inode)
it.pos = erofs_iloc(inode) + vi->inode_isize;
/* read in shared xattr array (non-atomic, see kmalloc below) */
- it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP);
+ it.kaddr = erofs_bread(&it.buf, it.pos, true);
if (IS_ERR(it.kaddr)) {
ret = PTR_ERR(it.kaddr);
goto out_unlock;
@@ -102,7 +102,7 @@ static int erofs_init_inode_xattrs(struct inode *inode)
it.pos += sizeof(struct erofs_xattr_ibody_header);
for (i = 0; i < vi->xattr_shared_count; ++i) {
- it.kaddr = erofs_bread(&it.buf, it.pos, EROFS_KMAP);
+ it.kaddr = erofs_bread(&it.buf, it.pos, true);
if (IS_ERR(it.kaddr)) {
kfree(vi->xattr_shared_xattrs);
vi->xattr_shared_xattrs = NULL;
@@ -183,7 +183,7 @@ static int erofs_xattr_copy_to_buffer(struct erofs_xattr_iter *it,
void *src;
for (processed = 0; processed < len; processed += slice) {
- it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP);
+ it->kaddr = erofs_bread(&it->buf, it->pos, true);
if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
@@ -286,7 +286,7 @@ static int erofs_getxattr_foreach(struct erofs_xattr_iter *it)
/* 2. handle xattr name */
for (processed = 0; processed < entry.e_name_len; processed += slice) {
- it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP);
+ it->kaddr = erofs_bread(&it->buf, it->pos, true);
if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
@@ -330,7 +330,7 @@ static int erofs_xattr_iter_inline(struct erofs_xattr_iter *it,
it->pos = erofs_iloc(inode) + vi->inode_isize + xattr_header_sz;
while (remaining) {
- it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP);
+ it->kaddr = erofs_bread(&it->buf, it->pos, true);
if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
@@ -367,7 +367,7 @@ static int erofs_xattr_iter_shared(struct erofs_xattr_iter *it,
for (i = 0; i < vi->xattr_shared_count; ++i) {
it->pos = erofs_pos(sb, sbi->xattr_blkaddr) +
vi->xattr_shared_xattrs[i] * sizeof(__le32);
- it->kaddr = erofs_bread(&it->buf, it->pos, EROFS_KMAP);
+ it->kaddr = erofs_bread(&it->buf, it->pos, true);
if (IS_ERR(it->kaddr))
return PTR_ERR(it->kaddr);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index d771e06db738..0671184d9cf1 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -44,8 +44,8 @@ struct z_erofs_pcluster {
/* A: point to next chained pcluster or TAILs */
struct z_erofs_pcluster *next;
- /* I: start block address of this pcluster */
- erofs_off_t index;
+ /* I: start physical position of this pcluster */
+ erofs_off_t pos;
/* L: the maximum decompression size of this round */
unsigned int length;
@@ -73,6 +73,9 @@ struct z_erofs_pcluster {
/* I: compression algorithm format */
unsigned char algorithmformat;
+ /* I: whether compressed data is in-lined or not */
+ bool from_meta;
+
/* L: whether partial decompression or not */
bool partial;
@@ -102,14 +105,9 @@ struct z_erofs_decompressqueue {
bool eio, sync;
};
-static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
-{
- return !pcl->index;
-}
-
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
{
- return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
+ return PAGE_ALIGN(pcl->pageofs_in + pcl->pclustersize) >> PAGE_SHIFT;
}
static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo)
@@ -133,7 +131,7 @@ struct z_erofs_pcluster_slab {
static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = {
_PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128),
- _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES)
+ _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES + 1)
};
struct z_erofs_bvec_iter {
@@ -267,7 +265,6 @@ static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
if (!pcl)
return ERR_PTR(-ENOMEM);
- pcl->pclustersize = size;
return pcl;
}
return ERR_PTR(-EINVAL);
@@ -516,6 +513,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
struct z_erofs_pcluster *pcl = fe->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool shouldalloc = z_erofs_should_alloc_cache(fe);
+ pgoff_t poff = pcl->pos >> PAGE_SHIFT;
bool may_bypass = true;
/* Optimistic allocation, as in-place I/O can be used as a fallback */
gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) |
@@ -532,7 +530,7 @@ static void z_erofs_bind_cache(struct z_erofs_frontend *fe)
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
- folio = filemap_get_folio(mc, pcl->index + i);
+ folio = filemap_get_folio(mc, poff + i);
if (IS_ERR(folio)) {
may_bypass = false;
if (!shouldalloc)
@@ -575,7 +573,7 @@ static int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi,
struct folio *folio;
int i;
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ DBG_BUGON(pcl->from_meta);
/* Each cached folio contains one page unless bs > ps is supported */
for (i = 0; i < pclusterpages; ++i) {
if (pcl->compressed_bvecs[i].page) {
@@ -607,7 +605,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
ret = false;
spin_lock(&pcl->lockref.lock);
if (pcl->lockref.count <= 0) {
- DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
+ DBG_BUGON(pcl->from_meta);
for (; bvec < end; ++bvec) {
if (bvec->page && page_folio(bvec->page) == folio) {
bvec->page = NULL;
@@ -644,18 +642,18 @@ static const struct address_space_operations z_erofs_cache_aops = {
.invalidate_folio = z_erofs_cache_invalidate_folio,
};
-int erofs_init_managed_cache(struct super_block *sb)
+int z_erofs_init_super(struct super_block *sb)
{
struct inode *const inode = new_inode(sb);
if (!inode)
return -ENOMEM;
-
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &z_erofs_cache_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
EROFS_SB(sb)->managed_cache = inode;
+ xa_init(&EROFS_SB(sb)->managed_pslots);
return 0;
}
@@ -667,16 +665,20 @@ static int z_erofs_attach_page(struct z_erofs_frontend *fe,
int ret;
if (exclusive) {
- /* give priority for inplaceio to use file pages first */
- spin_lock(&pcl->lockref.lock);
- while (fe->icur > 0) {
- if (pcl->compressed_bvecs[--fe->icur].page)
- continue;
- pcl->compressed_bvecs[fe->icur] = *bvec;
+ /* Inplace I/O is limited to one page for uncompressed data */
+ if (pcl->algorithmformat < Z_EROFS_COMPRESSION_MAX ||
+ fe->icur <= 1) {
+ /* Try to prioritize inplace I/O here */
+ spin_lock(&pcl->lockref.lock);
+ while (fe->icur > 0) {
+ if (pcl->compressed_bvecs[--fe->icur].page)
+ continue;
+ pcl->compressed_bvecs[fe->icur] = *bvec;
+ spin_unlock(&pcl->lockref.lock);
+ return 0;
+ }
spin_unlock(&pcl->lockref.lock);
- return 0;
}
- spin_unlock(&pcl->lockref.lock);
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
@@ -711,27 +713,26 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
struct erofs_sb_info *sbi = EROFS_SB(sb);
- bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl, *pre;
+ unsigned int pageofs_in;
int err;
- if (!(map->m_flags & EROFS_MAP_ENCODED) ||
- (!ztailpacking && !erofs_blknr(sb, map->m_pa))) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
-
- /* no available pcluster, let's allocate one */
- pcl = z_erofs_alloc_pcluster(map->m_plen);
+ pageofs_in = erofs_blkoff(sb, map->m_pa);
+ pcl = z_erofs_alloc_pcluster(pageofs_in + map->m_plen);
if (IS_ERR(pcl))
return PTR_ERR(pcl);
lockref_init(&pcl->lockref); /* one ref for this request */
pcl->algorithmformat = map->m_algorithmformat;
+ pcl->pclustersize = map->m_plen;
+ pcl->pageofs_in = pageofs_in;
pcl->length = 0;
pcl->partial = true;
pcl->next = fe->head;
+ pcl->pos = map->m_pa;
+ pcl->pageofs_in = pageofs_in;
pcl->pageofs_out = map->m_la & ~PAGE_MASK;
+ pcl->from_meta = map->m_flags & EROFS_MAP_META;
fe->mode = Z_EROFS_PCLUSTER_FOLLOWED;
/*
@@ -741,13 +742,10 @@ static int z_erofs_register_pcluster(struct z_erofs_frontend *fe)
mutex_init(&pcl->lock);
DBG_BUGON(!mutex_trylock(&pcl->lock));
- if (ztailpacking) {
- pcl->index = 0; /* which indicates ztailpacking */
- } else {
- pcl->index = erofs_blknr(sb, map->m_pa);
+ if (!pcl->from_meta) {
while (1) {
xa_lock(&sbi->managed_pslots);
- pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->index,
+ pre = __xa_cmpxchg(&sbi->managed_pslots, pcl->pos,
NULL, pcl, GFP_KERNEL);
if (!pre || xa_is_err(pre) || z_erofs_get_pcluster(pre)) {
xa_unlock(&sbi->managed_pslots);
@@ -779,7 +777,6 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
struct super_block *sb = fe->inode->i_sb;
- erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
struct z_erofs_pcluster *pcl = NULL;
int ret;
@@ -790,9 +787,9 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
if (!(map->m_flags & EROFS_MAP_META)) {
while (1) {
rcu_read_lock();
- pcl = xa_load(&EROFS_SB(sb)->managed_pslots, blknr);
+ pcl = xa_load(&EROFS_SB(sb)->managed_pslots, map->m_pa);
if (!pcl || z_erofs_get_pcluster(pcl)) {
- DBG_BUGON(pcl && blknr != pcl->index);
+ DBG_BUGON(pcl && map->m_pa != pcl->pos);
rcu_read_unlock();
break;
}
@@ -826,13 +823,13 @@ static int z_erofs_pcluster_begin(struct z_erofs_frontend *fe)
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
- if (!z_erofs_is_inline_pcluster(fe->pcl)) {
+ if (!fe->pcl->from_meta) {
/* bind cache first when cached decompression is preferred */
z_erofs_bind_cache(fe);
} else {
void *mptr;
- mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, EROFS_NO_KMAP);
+ mptr = erofs_read_metabuf(&map->buf, sb, map->m_pa, false);
if (IS_ERR(mptr)) {
ret = PTR_ERR(mptr);
erofs_err(sb, "failed to get inline data %d", ret);
@@ -871,7 +868,7 @@ static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
* It's impossible to fail after the pcluster is freezed, but in order
* to avoid some race conditions, add a DBG_BUGON to observe this.
*/
- DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
+ DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->pos) != pcl);
lockref_mark_dead(&pcl->lockref);
return true;
@@ -967,7 +964,7 @@ static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio,
buf.mapping = packed_inode->i_mapping;
for (; cur < end; cur += cnt, pos += cnt) {
cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos));
- src = erofs_bread(&buf, pos, EROFS_KMAP);
+ src = erofs_bread(&buf, pos, true);
if (IS_ERR(src)) {
erofs_put_metabuf(&buf);
return PTR_ERR(src);
@@ -1221,7 +1218,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_backend *be, bool *overlapped)
}
be->compressed_pages[i] = page;
- if (z_erofs_is_inline_pcluster(pcl) ||
+ if (pcl->from_meta ||
erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) {
if (!PageUptodate(page))
err = -EIO;
@@ -1284,6 +1281,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
.sb = be->sb,
.in = be->compressed_pages,
.out = be->decompressed_pages,
+ .inpages = pclusterpages,
+ .outpages = be->nr_pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = pcl->pageofs_out,
.inputsize = pcl->pclustersize,
@@ -1297,7 +1296,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
}, be->pagepool);
/* must handle all compressed pages before actual file pages */
- if (z_erofs_is_inline_pcluster(pcl)) {
+ if (pcl->from_meta) {
page = pcl->compressed_bvecs[0].page;
WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
put_page(page);
@@ -1357,7 +1356,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_backend *be, int err)
WRITE_ONCE(pcl->next, NULL);
mutex_unlock(&pcl->lock);
- if (z_erofs_is_inline_pcluster(pcl))
+ if (pcl->from_meta)
z_erofs_free_pcluster(pcl);
else
z_erofs_put_pcluster(sbi, pcl, try_free);
@@ -1538,7 +1537,7 @@ out_allocfolio:
folio = page_folio(page);
out_tocache:
if (!tocache || bs != PAGE_SIZE ||
- filemap_add_folio(mc, folio, pcl->index + nr, gfp)) {
+ filemap_add_folio(mc, folio, (pcl->pos >> PAGE_SHIFT) + nr, gfp)) {
/* turn into a temporary shortlived folio (1 ref) */
folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE;
return;
@@ -1655,19 +1654,20 @@ static void z_erofs_submit_queue(struct z_erofs_frontend *f,
pcl = next;
next = READ_ONCE(pcl->next);
- if (z_erofs_is_inline_pcluster(pcl)) {
+ if (pcl->from_meta) {
z_erofs_move_to_bypass_queue(pcl, next, qtail);
continue;
}
/* no device id here, thus it will always succeed */
mdev = (struct erofs_map_dev) {
- .m_pa = erofs_pos(sb, pcl->index),
+ .m_pa = round_down(pcl->pos, sb->s_blocksize),
};
(void)erofs_map_dev(sb, &mdev);
cur = mdev.m_pa;
- end = cur + pcl->pclustersize;
+ end = round_up(cur + pcl->pageofs_in + pcl->pclustersize,
+ sb->s_blocksize);
do {
bvec.bv_page = NULL;
if (bio && (cur != last_pa ||
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 689437e99a5a..8de50df05dfe 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -25,13 +25,13 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
- const erofs_off_t pos = Z_EROFS_FULL_INDEX_ALIGN(erofs_iloc(inode) +
+ const erofs_off_t pos = Z_EROFS_FULL_INDEX_START(erofs_iloc(inode) +
vi->inode_isize + vi->xattr_isize) +
lcn * sizeof(struct z_erofs_lcluster_index);
struct z_erofs_lcluster_index *di;
unsigned int advise;
- di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, EROFS_KMAP);
+ di = erofs_read_metabuf(&m->map->buf, inode->i_sb, pos, true);
if (IS_ERR(di))
return PTR_ERR(di);
m->lcn = lcn;
@@ -40,7 +40,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
advise = le16_to_cpu(di->di_advise);
m->type = advise & Z_EROFS_LI_LCLUSTER_TYPE_MASK;
if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
- m->clusterofs = 1 << vi->z_logical_clusterbits;
+ m->clusterofs = 1 << vi->z_lclusterbits;
m->delta[0] = le16_to_cpu(di->di_u.delta[0]);
if (m->delta[0] & Z_EROFS_LI_D0_CBLKCNT) {
if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
@@ -55,7 +55,7 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
} else {
m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF);
m->clusterofs = le16_to_cpu(di->di_clusterofs);
- if (m->clusterofs >= 1 << vi->z_logical_clusterbits) {
+ if (m->clusterofs >= 1 << vi->z_lclusterbits) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
@@ -102,9 +102,9 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
{
struct inode *const inode = m->inode;
struct erofs_inode *const vi = EROFS_I(inode);
- const erofs_off_t ebase = sizeof(struct z_erofs_map_header) +
- ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const erofs_off_t ebase = Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
+ vi->inode_isize + vi->xattr_isize);
+ const unsigned int lclusterbits = vi->z_lclusterbits;
const unsigned int totalidx = erofs_iblks(inode);
unsigned int compacted_4b_initial, compacted_2b, amortizedshift;
unsigned int vcnt, lo, lobits, encodebits, nblk, bytes;
@@ -146,7 +146,7 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
else
return -EOPNOTSUPP;
- in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, EROFS_KMAP);
+ in = erofs_read_metabuf(&m->map->buf, m->inode->i_sb, pos, true);
if (IS_ERR(in))
return PTR_ERR(in);
@@ -255,7 +255,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
{
struct super_block *sb = m->inode->i_sb;
struct erofs_inode *const vi = EROFS_I(m->inode);
- const unsigned int lclusterbits = vi->z_logical_clusterbits;
+ const unsigned int lclusterbits = vi->z_lclusterbits;
while (m->lcn >= lookback_distance) {
unsigned long lcn = m->lcn - lookback_distance;
@@ -265,26 +265,22 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
if (err)
return err;
- switch (m->type) {
- case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
+ if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
+ erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
+ m->type, lcn, vi->nid);
+ DBG_BUGON(1);
+ return -EOPNOTSUPP;
+ } else if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
lookback_distance = m->delta[0];
if (!lookback_distance)
- goto err_bogus;
+ break;
continue;
- case Z_EROFS_LCLUSTER_TYPE_PLAIN:
- case Z_EROFS_LCLUSTER_TYPE_HEAD1:
- case Z_EROFS_LCLUSTER_TYPE_HEAD2:
+ } else {
m->headtype = m->type;
m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
return 0;
- default:
- erofs_err(sb, "unknown type %u @ lcn %lu of nid %llu",
- m->type, lcn, vi->nid);
- DBG_BUGON(1);
- return -EOPNOTSUPP;
}
}
-err_bogus:
erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
lookback_distance, m->lcn, vi->nid);
DBG_BUGON(1);
@@ -308,7 +304,7 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
if ((m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD1 && !bigpcl1) ||
((m->headtype == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
m->headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2) && !bigpcl2) ||
- (lcn << vi->z_logical_clusterbits) >= inode->i_size)
+ (lcn << vi->z_lclusterbits) >= inode->i_size)
m->compressedblks = 1;
if (m->compressedblks)
@@ -329,35 +325,28 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
DBG_BUGON(lcn == initial_lcn &&
m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD);
- switch (m->type) {
- case Z_EROFS_LCLUSTER_TYPE_PLAIN:
- case Z_EROFS_LCLUSTER_TYPE_HEAD1:
- case Z_EROFS_LCLUSTER_TYPE_HEAD2:
+ if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
+ if (m->delta[0] != 1) {
+ erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ if (m->compressedblks)
+ goto out;
+ } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
/*
* if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
* rather than CBLKCNT, it's a 1 block-sized pcluster.
*/
m->compressedblks = 1;
- break;
- case Z_EROFS_LCLUSTER_TYPE_NONHEAD:
- if (m->delta[0] != 1)
- goto err_bonus_cblkcnt;
- if (m->compressedblks)
- break;
- fallthrough;
- default:
- erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn,
- vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
+ goto out;
}
+ erofs_err(sb, "cannot found CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
out:
m->map->m_plen = erofs_pos(sb, m->compressedblks);
return 0;
-err_bonus_cblkcnt:
- erofs_err(sb, "bogus CBLKCNT @ lcn %lu of nid %llu", lcn, vi->nid);
- DBG_BUGON(1);
- return -EFSCORRUPTED;
}
static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
@@ -365,7 +354,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
struct inode *inode = m->inode;
struct erofs_inode *vi = EROFS_I(inode);
struct erofs_map_blocks *map = m->map;
- unsigned int lclusterbits = vi->z_logical_clusterbits;
+ unsigned int lclusterbits = vi->z_lclusterbits;
u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits;
int err;
@@ -386,9 +375,7 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
m->delta[1] = 1;
DBG_BUGON(1);
}
- } else if (m->type == Z_EROFS_LCLUSTER_TYPE_PLAIN ||
- m->type == Z_EROFS_LCLUSTER_TYPE_HEAD1 ||
- m->type == Z_EROFS_LCLUSTER_TYPE_HEAD2) {
+ } else if (m->type < Z_EROFS_LCLUSTER_TYPE_MAX) {
if (lcn != headlcn)
break; /* ends at the next HEAD lcluster */
m->delta[1] = 1;
@@ -404,23 +391,32 @@ static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m)
return 0;
}
-static int z_erofs_do_map_blocks(struct inode *inode,
+static int z_erofs_map_blocks_fo(struct inode *inode,
struct erofs_map_blocks *map, int flags)
{
- struct erofs_inode *const vi = EROFS_I(inode);
- bool ztailpacking = vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER;
+ struct erofs_inode *vi = EROFS_I(inode);
+ struct super_block *sb = inode->i_sb;
bool fragment = vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER;
+ bool ztailpacking = vi->z_idata_size;
+ unsigned int lclusterbits = vi->z_lclusterbits;
struct z_erofs_maprecorder m = {
.inode = inode,
.map = map,
};
int err = 0;
- unsigned int lclusterbits, endoff, afmt;
+ unsigned int endoff, afmt;
unsigned long initial_lcn;
unsigned long long ofs, end;
- lclusterbits = vi->z_logical_clusterbits;
ofs = flags & EROFS_GET_BLOCKS_FINDTAIL ? inode->i_size - 1 : map->m_la;
+ if (fragment && !(flags & EROFS_GET_BLOCKS_FINDTAIL) &&
+ !vi->z_tailextent_headlcn) {
+ map->m_la = 0;
+ map->m_llen = inode->i_size;
+ map->m_flags = EROFS_MAP_MAPPED |
+ EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
+ return 0;
+ }
initial_lcn = ofs >> lclusterbits;
endoff = ofs & ((1 << lclusterbits) - 1);
@@ -428,9 +424,8 @@ static int z_erofs_do_map_blocks(struct inode *inode,
if (err)
goto unmap_out;
- if (ztailpacking && (flags & EROFS_GET_BLOCKS_FINDTAIL))
- vi->z_idataoff = m.nextpackoff;
-
+ if ((flags & EROFS_GET_BLOCKS_FINDTAIL) && ztailpacking)
+ vi->z_fragmentoff = m.nextpackoff;
map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED;
end = (m.lcn + 1ULL) << lclusterbits;
@@ -452,8 +447,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
}
/* m.lcn should be >= 1 if endoff < m.clusterofs */
if (!m.lcn) {
- erofs_err(inode->i_sb,
- "invalid logical cluster 0 at nid %llu",
+ erofs_err(sb, "invalid logical cluster 0 at nid %llu",
vi->nid);
err = -EFSCORRUPTED;
goto unmap_out;
@@ -469,8 +463,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
goto unmap_out;
break;
default:
- erofs_err(inode->i_sb,
- "unknown type %u @ offset %llu of nid %llu",
+ erofs_err(sb, "unknown type %u @ offset %llu of nid %llu",
m.type, ofs, vi->nid);
err = -EOPNOTSUPP;
goto unmap_out;
@@ -487,12 +480,18 @@ static int z_erofs_do_map_blocks(struct inode *inode,
}
if (ztailpacking && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_META;
- map->m_pa = vi->z_idataoff;
+ map->m_pa = vi->z_fragmentoff;
map->m_plen = vi->z_idata_size;
+ if (erofs_blkoff(sb, map->m_pa) + map->m_plen > sb->s_blocksize) {
+ erofs_err(sb, "invalid tail-packing pclustersize %llu",
+ map->m_plen);
+ err = -EFSCORRUPTED;
+ goto unmap_out;
+ }
} else if (fragment && m.lcn == vi->z_tailextent_headlcn) {
map->m_flags |= EROFS_MAP_FRAGMENT;
} else {
- map->m_pa = erofs_pos(inode->i_sb, m.pblk);
+ map->m_pa = erofs_pos(sb, m.pblk);
err = z_erofs_get_extent_compressedlen(&m, initial_lcn);
if (err)
goto unmap_out;
@@ -511,7 +510,7 @@ static int z_erofs_do_map_blocks(struct inode *inode,
afmt = m.headtype == Z_EROFS_LCLUSTER_TYPE_HEAD2 ?
vi->z_algorithmtype[1] : vi->z_algorithmtype[0];
if (!(EROFS_I_SB(inode)->available_compr_algs & (1 << afmt))) {
- erofs_err(inode->i_sb, "inconsistent algorithmtype %u for nid %llu",
+ erofs_err(sb, "inconsistent algorithmtype %u for nid %llu",
afmt, vi->nid);
err = -EFSCORRUPTED;
goto unmap_out;
@@ -535,6 +534,115 @@ unmap_out:
return err;
}
+static int z_erofs_map_blocks_ext(struct inode *inode,
+ struct erofs_map_blocks *map, int flags)
+{
+ struct erofs_inode *vi = EROFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ bool interlaced = vi->z_advise & Z_EROFS_ADVISE_INTERLACED_PCLUSTER;
+ unsigned int recsz = z_erofs_extent_recsize(vi->z_advise);
+ erofs_off_t pos = round_up(Z_EROFS_MAP_HEADER_END(erofs_iloc(inode) +
+ vi->inode_isize + vi->xattr_isize), recsz);
+ erofs_off_t lend = inode->i_size;
+ erofs_off_t l, r, mid, pa, la, lstart;
+ struct z_erofs_extent *ext;
+ unsigned int fmt;
+ bool last;
+
+ map->m_flags = 0;
+ if (recsz <= offsetof(struct z_erofs_extent, pstart_hi)) {
+ if (recsz <= offsetof(struct z_erofs_extent, pstart_lo)) {
+ ext = erofs_read_metabuf(&map->buf, sb, pos, true);
+ if (IS_ERR(ext))
+ return PTR_ERR(ext);
+ pa = le64_to_cpu(*(__le64 *)ext);
+ pos += sizeof(__le64);
+ lstart = 0;
+ } else {
+ lstart = map->m_la >> vi->z_lclusterbits;
+ pa = EROFS_NULL_ADDR;
+ }
+
+ for (; lstart <= map->m_la; lstart += 1 << vi->z_lclusterbits) {
+ ext = erofs_read_metabuf(&map->buf, sb, pos, true);
+ if (IS_ERR(ext))
+ return PTR_ERR(ext);
+ map->m_plen = le32_to_cpu(ext->plen);
+ if (pa != EROFS_NULL_ADDR) {
+ map->m_pa = pa;
+ pa += map->m_plen & Z_EROFS_EXTENT_PLEN_MASK;
+ } else {
+ map->m_pa = le32_to_cpu(ext->pstart_lo);
+ }
+ pos += recsz;
+ }
+ last = (lstart >= round_up(lend, 1 << vi->z_lclusterbits));
+ lend = min(lstart, lend);
+ lstart -= 1 << vi->z_lclusterbits;
+ } else {
+ lstart = lend;
+ for (l = 0, r = vi->z_extents; l < r; ) {
+ mid = l + (r - l) / 2;
+ ext = erofs_read_metabuf(&map->buf, sb,
+ pos + mid * recsz, true);
+ if (IS_ERR(ext))
+ return PTR_ERR(ext);
+
+ la = le32_to_cpu(ext->lstart_lo);
+ pa = le32_to_cpu(ext->pstart_lo) |
+ (u64)le32_to_cpu(ext->pstart_hi) << 32;
+ if (recsz > offsetof(struct z_erofs_extent, lstart_hi))
+ la |= (u64)le32_to_cpu(ext->lstart_hi) << 32;
+
+ if (la > map->m_la) {
+ r = mid;
+ lend = la;
+ } else {
+ l = mid + 1;
+ if (map->m_la == la)
+ r = min(l + 1, r);
+ lstart = la;
+ map->m_plen = le32_to_cpu(ext->plen);
+ map->m_pa = pa;
+ }
+ }
+ last = (l >= vi->z_extents);
+ }
+
+ if (lstart < lend) {
+ map->m_la = lstart;
+ if (last && (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
+ map->m_flags |= EROFS_MAP_MAPPED | EROFS_MAP_FRAGMENT;
+ vi->z_fragmentoff = map->m_plen;
+ if (recsz >= offsetof(struct z_erofs_extent, pstart_lo))
+ vi->z_fragmentoff |= map->m_pa << 32;
+ } else if (map->m_plen) {
+ map->m_flags |= EROFS_MAP_MAPPED |
+ EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED;
+ fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT;
+ if (fmt)
+ map->m_algorithmformat = fmt - 1;
+ else if (interlaced && !erofs_blkoff(sb, map->m_pa))
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_INTERLACED;
+ else
+ map->m_algorithmformat =
+ Z_EROFS_COMPRESSION_SHIFTED;
+ if (map->m_plen & Z_EROFS_EXTENT_PLEN_PARTIAL)
+ map->m_flags |= EROFS_MAP_PARTIAL_REF;
+ map->m_plen &= Z_EROFS_EXTENT_PLEN_MASK;
+ }
+ }
+ map->m_llen = lend - map->m_la;
+ if (!last && map->m_llen < sb->s_blocksize) {
+ erofs_err(sb, "extent too small %llu @ offset %llu of nid %llu",
+ map->m_llen, map->m_la, vi->nid);
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
+ }
+ return 0;
+}
+
static int z_erofs_fill_inode_lazy(struct inode *inode)
{
struct erofs_inode *const vi = EROFS_I(inode);
@@ -561,7 +669,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_unlock;
pos = ALIGN(erofs_iloc(inode) + vi->inode_isize + vi->xattr_isize, 8);
- h = erofs_read_metabuf(&buf, sb, pos, EROFS_KMAP);
+ h = erofs_read_metabuf(&buf, sb, pos, true);
if (IS_ERR(h)) {
err = PTR_ERR(h);
goto out_unlock;
@@ -578,8 +686,20 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto done;
}
vi->z_advise = le16_to_cpu(h->h_advise);
+ vi->z_lclusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 15);
+ if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+ (vi->z_advise & Z_EROFS_ADVISE_EXTENTS)) {
+ vi->z_extents = le32_to_cpu(h->h_extents_lo) |
+ ((u64)le16_to_cpu(h->h_extents_hi) << 32);
+ goto done;
+ }
+
vi->z_algorithmtype[0] = h->h_algorithmtype & 15;
vi->z_algorithmtype[1] = h->h_algorithmtype >> 4;
+ if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)
+ vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
+ else if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER)
+ vi->z_idata_size = le16_to_cpu(h->h_idata_size);
headnr = 0;
if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX ||
@@ -590,7 +710,6 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_put_metabuf;
}
- vi->z_logical_clusterbits = sb->s_blocksize_bits + (h->h_clusterbits & 7);
if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) &&
vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 |
Z_EROFS_ADVISE_BIG_PCLUSTER_2)) {
@@ -608,34 +727,13 @@ static int z_erofs_fill_inode_lazy(struct inode *inode)
goto out_put_metabuf;
}
- if (vi->z_advise & Z_EROFS_ADVISE_INLINE_PCLUSTER) {
+ if (vi->z_idata_size ||
+ (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER)) {
struct erofs_map_blocks map = {
.buf = __EROFS_BUF_INITIALIZER
};
- vi->z_idata_size = le16_to_cpu(h->h_idata_size);
- err = z_erofs_do_map_blocks(inode, &map,
- EROFS_GET_BLOCKS_FINDTAIL);
- erofs_put_metabuf(&map.buf);
-
- if (!map.m_plen ||
- erofs_blkoff(sb, map.m_pa) + map.m_plen > sb->s_blocksize) {
- erofs_err(sb, "invalid tail-packing pclustersize %llu",
- map.m_plen);
- err = -EFSCORRUPTED;
- }
- if (err < 0)
- goto out_put_metabuf;
- }
-
- if (vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER &&
- !(h->h_clusterbits >> Z_EROFS_FRAGMENT_INODE_BIT)) {
- struct erofs_map_blocks map = {
- .buf = __EROFS_BUF_INITIALIZER
- };
-
- vi->z_fragmentoff = le32_to_cpu(h->h_fragmentoff);
- err = z_erofs_do_map_blocks(inode, &map,
+ err = z_erofs_map_blocks_fo(inode, &map,
EROFS_GET_BLOCKS_FINDTAIL);
erofs_put_metabuf(&map.buf);
if (err < 0)
@@ -666,15 +764,11 @@ int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map,
} else {
err = z_erofs_fill_inode_lazy(inode);
if (!err) {
- if ((vi->z_advise & Z_EROFS_ADVISE_FRAGMENT_PCLUSTER) &&
- !vi->z_tailextent_headlcn) {
- map->m_la = 0;
- map->m_llen = inode->i_size;
- map->m_flags = EROFS_MAP_MAPPED |
- EROFS_MAP_FULL_MAPPED | EROFS_MAP_FRAGMENT;
- } else {
- err = z_erofs_do_map_blocks(inode, map, flags);
- }
+ if (vi->datalayout == EROFS_INODE_COMPRESSED_FULL &&
+ (vi->z_advise & Z_EROFS_ADVISE_EXTENTS))
+ err = z_erofs_map_blocks_ext(inode, map, flags);
+ else
+ err = z_erofs_map_blocks_fo(inode, map, flags);
}
if (!err && (map->m_flags & EROFS_MAP_ENCODED) &&
unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 57df3843e650..c69c7b1e41d1 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -75,7 +75,7 @@ TRACE_EVENT(erofs_fill_inode,
__entry->ofs = erofs_blkoff(inode->i_sb, erofs_iloc(inode));
),
- TP_printk("dev = (%d,%d), nid = %llu, blkaddr %u ofs %u",
+ TP_printk("dev = (%d,%d), nid = %llu, blkaddr %llu ofs %u",
show_dev_nid(__entry),
__entry->blkaddr, __entry->ofs)
);