diff options
Diffstat (limited to 'recipes/linux/linux-vusolo2-2.6.37/ubifs_packport.patch')
-rw-r--r-- | recipes/linux/linux-vusolo2-2.6.37/ubifs_packport.patch | 9138 |
1 files changed, 9138 insertions, 0 deletions
diff --git a/recipes/linux/linux-vusolo2-2.6.37/ubifs_packport.patch b/recipes/linux/linux-vusolo2-2.6.37/ubifs_packport.patch new file mode 100644 index 0000000..193c5e6 --- /dev/null +++ b/recipes/linux/linux-vusolo2-2.6.37/ubifs_packport.patch @@ -0,0 +1,9138 @@ +diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig +index 830e3f7..f8b0160 100644 +--- a/fs/ubifs/Kconfig ++++ b/fs/ubifs/Kconfig +@@ -44,29 +44,17 @@ config UBIFS_FS_ZLIB + + # Debugging-related stuff + config UBIFS_FS_DEBUG +- bool "Enable debugging" ++ bool "Enable debugging support" + depends on UBIFS_FS + select DEBUG_FS +- select KALLSYMS_ALL +- help +- This option enables UBIFS debugging. +- +-config UBIFS_FS_DEBUG_MSG_LVL +- int "Default message level (0 = no extra messages, 3 = lots)" +- depends on UBIFS_FS_DEBUG +- default "0" +- help +- This controls the amount of debugging messages produced by UBIFS. +- If reporting bugs, please try to have available a full dump of the +- messages at level 1 while the misbehaviour was occurring. Level 2 +- may become necessary if level 1 messages were not enough to find the +- bug. Generally Level 3 should be avoided. +- +-config UBIFS_FS_DEBUG_CHKS +- bool "Enable extra checks" +- depends on UBIFS_FS_DEBUG +- help +- If extra checks are enabled UBIFS will check the consistency of its +- internal data structures during operation. However, UBIFS performance +- is dramatically slower when this option is selected especially if the +- file system is large. ++ select KALLSYMS ++ help ++ This option enables UBIFS debugging support. It makes sure various ++ assertions, self-checks, debugging messages and test modes are compiled ++ in (this all is compiled out otherwise). Assertions are light-weight ++ and this option also enables them. Self-checks, debugging messages and ++ test modes are switched off by default. Thus, it is safe and actually ++ recommended to have debugging support enabled, and it should not slow ++ down UBIFS. You can then further enable / disable individual debugging ++ features using UBIFS module parameters and the corresponding sysfs ++ interfaces. +diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c +index c8ff0d1..02c73e7 100644 +--- a/fs/ubifs/budget.c ++++ b/fs/ubifs/budget.c +@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) + long long liab; + + spin_lock(&c->space_lock); +- liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; ++ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; + spin_unlock(&c->space_lock); + return liab; + } +@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) + int idx_lebs; + long long idx_size; + +- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; ++ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; + /* And make sure we have thrice the index size of space reserved */ + idx_size += idx_size << 1; + /* +@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) + * budgeted index space to the size of the current index, multiplies this by 3, + * and makes sure this does not exceed the amount of free LEBs. + * +- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: ++ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. +- * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, +- * the index may be consolidated to take up to @c->min_idx_lebs LEBs. ++ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, ++ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. +@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c) + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " +- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, ++ "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, + rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", +@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c) + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + +- c->min_idx_lebs = min_idx_lebs; ++ c->bi.min_idx_lebs = min_idx_lebs; + return 0; + } + +@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c, + { + int data_growth; + +- data_growth = req->new_ino ? c->inode_budget : 0; ++ data_growth = req->new_ino ? c->bi.inode_budget : 0; + if (req->new_page) +- data_growth += c->page_budget; ++ data_growth += c->bi.page_budget; + if (req->new_dent) +- data_growth += c->dent_budget; ++ data_growth += c->bi.dent_budget; + data_growth += req->new_ino_d; + return data_growth; + } +@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c, + { + int dd_growth; + +- dd_growth = req->dirtied_page ? c->page_budget : 0; ++ dd_growth = req->dirtied_page ? c->bi.page_budget : 0; + + if (req->dirtied_ino) +- dd_growth += c->inode_budget << (req->dirtied_ino - 1); ++ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) +- dd_growth += c->dent_budget; ++ dd_growth += c->bi.dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; + } +@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) + + again: + spin_lock(&c->space_lock); +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); + +- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { ++ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + +- c->budg_idx_growth += idx_growth; +- c->budg_data_growth += data_growth; +- c->budg_dd_growth += dd_growth; ++ c->bi.idx_growth += idx_growth; ++ c->bi.data_growth += data_growth; ++ c->bi.dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { +@@ -484,9 +484,9 @@ again: + } + + /* Restore the old values */ +- c->budg_idx_growth -= idx_growth; +- c->budg_data_growth -= data_growth; +- c->budg_dd_growth -= dd_growth; ++ c->bi.idx_growth -= idx_growth; ++ c->bi.data_growth -= data_growth; ++ c->bi.dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { +@@ -506,9 +506,9 @@ again: + goto again; + } + dbg_budg("FS is full, -ENOSPC"); +- c->nospace = 1; ++ c->bi.nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) +- c->nospace_rp = 1; ++ c->bi.nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); +@@ -523,8 +523,8 @@ again: + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget +- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be +- * zeroed by the commit operation. ++ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed ++ * by the commit operation. + */ + void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) + { +@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) + if (!req->data_growth && !req->dd_growth) + return; + +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); +- c->budg_idx_growth -= req->idx_growth; +- c->budg_uncommitted_idx += req->idx_growth; +- c->budg_data_growth -= req->data_growth; +- c->budg_dd_growth -= req->dd_growth; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); +- +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); +- ubifs_assert(c->min_idx_lebs < c->main_lebs); +- ubifs_assert(!(c->budg_idx_growth & 7)); +- ubifs_assert(!(c->budg_data_growth & 7)); +- ubifs_assert(!(c->budg_dd_growth & 7)); ++ c->bi.idx_growth -= req->idx_growth; ++ c->bi.uncommitted_idx += req->idx_growth; ++ c->bi.data_growth -= req->data_growth; ++ c->bi.dd_growth -= req->dd_growth; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); ++ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); ++ ubifs_assert(!(c->bi.idx_growth & 7)); ++ ubifs_assert(!(c->bi.data_growth & 7)); ++ ubifs_assert(!(c->bi.dd_growth & 7)); + spin_unlock(&c->space_lock); + } + +@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) + { + spin_lock(&c->space_lock); + /* Release the index growth reservation */ +- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ +- c->budg_data_growth -= c->page_budget; ++ c->bi.data_growth -= c->bi.page_budget; + /* Increase the dirty data growth reservation instead */ +- c->budg_dd_growth += c->page_budget; ++ c->bi.dd_growth += c->bi.page_budget; + /* And re-calculate the indexing space reservation */ +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + } + +@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + + memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ +- req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ++ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); + ubifs_release_budget(c, &req); + } + +@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) + int rsvd_idx_lebs, lebs; + long long available, outstanding, free; + +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- outstanding = c->budg_data_growth + c->budg_dd_growth; +- available = ubifs_calc_available(c, c->min_idx_lebs); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); + + /* + * When reporting free space to user-space, UBIFS guarantees that it is +@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c +index 02429d8..fb3b5c8 100644 +--- a/fs/ubifs/commit.c ++++ b/fs/ubifs/commit.c +@@ -48,6 +48,56 @@ + #include <linux/slab.h> + #include "ubifs.h" + ++/* ++ * nothing_to_commit - check if there is nothing to commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which checks if there is anything to commit. It is ++ * used as an optimization to avoid starting the commit if it is not really ++ * necessary. Indeed, the commit operation always assumes flash I/O (e.g., ++ * writing the commit start node to the log), and it is better to avoid doing ++ * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is ++ * nothing to commit, it is more optimal to avoid any flash I/O. ++ * ++ * This function has to be called with @c->commit_sem locked for writing - ++ * this function does not take LPT/TNC locks because the @c->commit_sem ++ * guarantees that we have exclusive access to the TNC and LPT data structures. ++ * ++ * This function returns %1 if there is nothing to commit and %0 otherwise. ++ */ ++static int nothing_to_commit(struct ubifs_info *c) ++{ ++ /* ++ * During mounting or remounting from R/O mode to R/W mode we may ++ * commit for various recovery-related reasons. ++ */ ++ if (c->mounting || c->remounting_rw) ++ return 0; ++ ++ /* ++ * If the root TNC node is dirty, we definitely have something to ++ * commit. ++ */ ++ if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) ++ return 0; ++ ++ /* ++ * Even though the TNC is clean, the LPT tree may have dirty nodes. For ++ * example, this may happen if the budgeting subsystem invoked GC to ++ * make some free space, and the GC found an LEB with only dirty and ++ * free space. In this case GC would just change the lprops of this ++ * LEB (by turning all space into free space) and unmap it. ++ */ ++ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) ++ return 0; ++ ++ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); ++ ubifs_assert(c->dirty_pn_cnt == 0); ++ ubifs_assert(c->dirty_nn_cnt == 0); ++ ++ return 1; ++} ++ + /** + * do_commit - commit the journal. + * @c: UBIFS file-system description object +@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c) + goto out_up; + } + ++ if (nothing_to_commit(c)) { ++ up_write(&c->commit_sem); ++ err = 0; ++ goto out_cancel; ++ } ++ + /* Sync all write buffers (necessary for recovery) */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); +@@ -126,7 +182,7 @@ static int do_commit(struct ubifs_info *c) + c->mst_node->root_len = cpu_to_le32(zroot.len); + c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); + c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); +- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); ++ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); + c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); + c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); + c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); +@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c) + if (err) + goto out; + ++out_cancel: + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_RESTING; + wake_up(&c->cmt_wq); + dbg_cmt("commit end"); + spin_unlock(&c->cs_lock); +- + return 0; + + out_up: +@@ -362,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BROKEN) { +- err = -EINVAL; ++ err = -EROFS; + goto out; + } + +@@ -388,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) + * re-check it. + */ + if (c->cmt_state == COMMIT_BROKEN) { +- err = -EINVAL; ++ err = -EROFS; + goto out_cmt_unlock; + } + +@@ -520,8 +576,8 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) + struct idx_node *i; + size_t sz; + +- if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) +- goto out; ++ if (!dbg_is_chk_index(c)) ++ return 0; + + INIT_LIST_HEAD(&list); + +diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c +index 0bee4db..1934084 100644 +--- a/fs/ubifs/debug.c ++++ b/fs/ubifs/debug.c +@@ -27,33 +27,16 @@ + * various local functions of those subsystems. + */ + +-#define UBIFS_DBG_PRESERVE_UBI +- +-#include "ubifs.h" + #include <linux/module.h> +-#include <linux/moduleparam.h> + #include <linux/debugfs.h> + #include <linux/math64.h> +-#include <linux/slab.h> ++#include <linux/uaccess.h> ++#include <linux/random.h> ++#include "ubifs.h" + + #ifdef CONFIG_UBIFS_FS_DEBUG + +-DEFINE_SPINLOCK(dbg_lock); +- +-static char dbg_key_buf0[128]; +-static char dbg_key_buf1[128]; +- +-unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +-unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +-unsigned int ubifs_tst_flags; +- +-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +-module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +-module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); +- +-MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +-MODULE_PARM_DESC(debug_chks, "Debug check flags"); +-MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); ++static DEFINE_SPINLOCK(dbg_lock); + + static const char *get_key_fmt(int fmt) + { +@@ -95,8 +78,30 @@ static const char *get_key_type(int type) + } + } + +-static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, +- char *buffer) ++static const char *get_dent_type(int type) ++{ ++ switch (type) { ++ case UBIFS_ITYPE_REG: ++ return "file"; ++ case UBIFS_ITYPE_DIR: ++ return "dir"; ++ case UBIFS_ITYPE_LNK: ++ return "symlink"; ++ case UBIFS_ITYPE_BLK: ++ return "blkdev"; ++ case UBIFS_ITYPE_CHR: ++ return "char dev"; ++ case UBIFS_ITYPE_FIFO: ++ return "fifo"; ++ case UBIFS_ITYPE_SOCK: ++ return "socket"; ++ default: ++ return "unknown/invalid type"; ++ } ++} ++ ++const char *dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, int len) + { + char *p = buffer; + int type = key_type(c, key); +@@ -104,45 +109,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, + if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { + switch (type) { + case UBIFS_INO_KEY: +- sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), +- get_key_type(type)); ++ len -= snprintf(p, len, "(%lu, %s)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type)); + break; + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: +- sprintf(p, "(%lu, %s, %#08x)", +- (unsigned long)key_inum(c, key), +- get_key_type(type), key_hash(c, key)); ++ len -= snprintf(p, len, "(%lu, %s, %#08x)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_hash(c, key)); + break; + case UBIFS_DATA_KEY: +- sprintf(p, "(%lu, %s, %u)", +- (unsigned long)key_inum(c, key), +- get_key_type(type), key_block(c, key)); ++ len -= snprintf(p, len, "(%lu, %s, %u)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_block(c, key)); + break; + case UBIFS_TRUN_KEY: +- sprintf(p, "(%lu, %s)", +- (unsigned long)key_inum(c, key), +- get_key_type(type)); ++ len -= snprintf(p, len, "(%lu, %s)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type)); + break; + default: +- sprintf(p, "(bad key type: %#08x, %#08x)", +- key->u32[0], key->u32[1]); ++ len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", ++ key->u32[0], key->u32[1]); + } + } else +- sprintf(p, "bad key format %d", c->key_fmt); +-} +- +-const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +-{ +- /* dbg_lock must be held */ +- sprintf_key(c, key, dbg_key_buf0); +- return dbg_key_buf0; +-} +- +-const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +-{ +- /* dbg_lock must be held */ +- sprintf_key(c, key, dbg_key_buf1); +- return dbg_key_buf1; ++ len -= snprintf(p, len, "bad key format %d", c->key_fmt); ++ ubifs_assert(len > 0); ++ return p; + } + + const char *dbg_ntype(int type) +@@ -227,53 +221,83 @@ const char *dbg_jhead(int jhead) + + static void dump_ch(const struct ubifs_ch *ch) + { +- printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); +- printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); +- printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, ++ printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); ++ printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); ++ printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, + dbg_ntype(ch->node_type)); +- printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, ++ printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, + dbg_gtype(ch->group_type)); +- printk(KERN_DEBUG "\tsqnum %llu\n", ++ printk(KERN_ERR "\tsqnum %llu\n", + (unsigned long long)le64_to_cpu(ch->sqnum)); +- printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); ++ printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); + } + +-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) ++void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) + { + const struct ubifs_inode *ui = ubifs_inode(inode); ++ struct qstr nm = { .name = NULL }; ++ union ubifs_key key; ++ struct ubifs_dent_node *dent, *pdent = NULL; ++ int count = 2; + +- printk(KERN_DEBUG "Dump in-memory inode:"); +- printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); +- printk(KERN_DEBUG "\tsize %llu\n", ++ printk(KERN_ERR "Dump in-memory inode:"); ++ printk(KERN_ERR "\tinode %lu\n", inode->i_ino); ++ printk(KERN_ERR "\tsize %llu\n", + (unsigned long long)i_size_read(inode)); +- printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); +- printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); +- printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); +- printk(KERN_DEBUG "\tatime %u.%u\n", ++ printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); ++ printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); ++ printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); ++ printk(KERN_ERR "\tatime %u.%u\n", + (unsigned int)inode->i_atime.tv_sec, + (unsigned int)inode->i_atime.tv_nsec); +- printk(KERN_DEBUG "\tmtime %u.%u\n", ++ printk(KERN_ERR "\tmtime %u.%u\n", + (unsigned int)inode->i_mtime.tv_sec, + (unsigned int)inode->i_mtime.tv_nsec); +- printk(KERN_DEBUG "\tctime %u.%u\n", ++ printk(KERN_ERR "\tctime %u.%u\n", + (unsigned int)inode->i_ctime.tv_sec, + (unsigned int)inode->i_ctime.tv_nsec); +- printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); +- printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); +- printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); +- printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); +- printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); +- printk(KERN_DEBUG "\txattr %u\n", ui->xattr); +- printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); +- printk(KERN_DEBUG "\tsynced_i_size %llu\n", ++ printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); ++ printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); ++ printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); ++ printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); ++ printk(KERN_ERR "\tdirty %u\n", ui->dirty); ++ printk(KERN_ERR "\txattr %u\n", ui->xattr); ++ printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); ++ printk(KERN_ERR "\tsynced_i_size %llu\n", + (unsigned long long)ui->synced_i_size); +- printk(KERN_DEBUG "\tui_size %llu\n", ++ printk(KERN_ERR "\tui_size %llu\n", + (unsigned long long)ui->ui_size); +- printk(KERN_DEBUG "\tflags %d\n", ui->flags); +- printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); +- printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); +- printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); +- printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); ++ printk(KERN_ERR "\tflags %d\n", ui->flags); ++ printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); ++ printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); ++ printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); ++ printk(KERN_ERR "\tdata_len %d\n", ui->data_len); ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return; ++ ++ printk(KERN_ERR "List of directory entries:\n"); ++ ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); ++ ++ lowest_dent_key(c, &key, inode->i_ino); ++ while (1) { ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ if (PTR_ERR(dent) != -ENOENT) ++ printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); ++ break; ++ } ++ ++ printk(KERN_ERR "\t%d: %s (%s)\n", ++ count++, dent->name, get_dent_type(dent->type)); ++ ++ nm.name = dent->name; ++ nm.len = le16_to_cpu(dent->nlen); ++ kfree(pdent); ++ pdent = dent; ++ key_read(c, &dent->key, &key); ++ } ++ kfree(pdent); + } + + void dbg_dump_node(const struct ubifs_info *c, const void *node) +@@ -281,14 +305,15 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int i, n; + union ubifs_key key; + const struct ubifs_ch *ch = node; ++ char key_buf[DBG_KEY_BUF_LEN]; + +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + + /* If the magic is incorrect, just hexdump the first bytes */ + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { +- printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); +- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); ++ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node, UBIFS_CH_SZ, 1); + return; + } +@@ -301,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_pad_node *pad = node; + +- printk(KERN_DEBUG "\tpad_len %u\n", ++ printk(KERN_ERR "\tpad_len %u\n", + le32_to_cpu(pad->pad_len)); + break; + } +@@ -310,48 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_sb_node *sup = node; + unsigned int sup_flags = le32_to_cpu(sup->flags); + +- printk(KERN_DEBUG "\tkey_hash %d (%s)\n", ++ printk(KERN_ERR "\tkey_hash %d (%s)\n", + (int)sup->key_hash, get_key_hash(sup->key_hash)); +- printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", ++ printk(KERN_ERR "\tkey_fmt %d (%s)\n", + (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); +- printk(KERN_DEBUG "\tflags %#x\n", sup_flags); +- printk(KERN_DEBUG "\t big_lpt %u\n", ++ printk(KERN_ERR "\tflags %#x\n", sup_flags); ++ printk(KERN_ERR "\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); +- printk(KERN_DEBUG "\tmin_io_size %u\n", ++ printk(KERN_ERR "\t space_fixup %u\n", ++ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); ++ printk(KERN_ERR "\tmin_io_size %u\n", + le32_to_cpu(sup->min_io_size)); +- printk(KERN_DEBUG "\tleb_size %u\n", ++ printk(KERN_ERR "\tleb_size %u\n", + le32_to_cpu(sup->leb_size)); +- printk(KERN_DEBUG "\tleb_cnt %u\n", ++ printk(KERN_ERR "\tleb_cnt %u\n", + le32_to_cpu(sup->leb_cnt)); +- printk(KERN_DEBUG "\tmax_leb_cnt %u\n", ++ printk(KERN_ERR "\tmax_leb_cnt %u\n", + le32_to_cpu(sup->max_leb_cnt)); +- printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", ++ printk(KERN_ERR "\tmax_bud_bytes %llu\n", + (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); +- printk(KERN_DEBUG "\tlog_lebs %u\n", ++ printk(KERN_ERR "\tlog_lebs %u\n", + le32_to_cpu(sup->log_lebs)); +- printk(KERN_DEBUG "\tlpt_lebs %u\n", ++ printk(KERN_ERR "\tlpt_lebs %u\n", + le32_to_cpu(sup->lpt_lebs)); +- printk(KERN_DEBUG "\torph_lebs %u\n", ++ printk(KERN_ERR "\torph_lebs %u\n", + le32_to_cpu(sup->orph_lebs)); +- printk(KERN_DEBUG "\tjhead_cnt %u\n", ++ printk(KERN_ERR "\tjhead_cnt %u\n", + le32_to_cpu(sup->jhead_cnt)); +- printk(KERN_DEBUG "\tfanout %u\n", ++ printk(KERN_ERR "\tfanout %u\n", + le32_to_cpu(sup->fanout)); +- printk(KERN_DEBUG "\tlsave_cnt %u\n", ++ printk(KERN_ERR "\tlsave_cnt %u\n", + le32_to_cpu(sup->lsave_cnt)); +- printk(KERN_DEBUG "\tdefault_compr %u\n", ++ printk(KERN_ERR "\tdefault_compr %u\n", + (int)le16_to_cpu(sup->default_compr)); +- printk(KERN_DEBUG "\trp_size %llu\n", ++ printk(KERN_ERR "\trp_size %llu\n", + (unsigned long long)le64_to_cpu(sup->rp_size)); +- printk(KERN_DEBUG "\trp_uid %u\n", ++ printk(KERN_ERR "\trp_uid %u\n", + le32_to_cpu(sup->rp_uid)); +- printk(KERN_DEBUG "\trp_gid %u\n", ++ printk(KERN_ERR "\trp_gid %u\n", + le32_to_cpu(sup->rp_gid)); +- printk(KERN_DEBUG "\tfmt_version %u\n", ++ printk(KERN_ERR "\tfmt_version %u\n", + le32_to_cpu(sup->fmt_version)); +- printk(KERN_DEBUG "\ttime_gran %u\n", ++ printk(KERN_ERR "\ttime_gran %u\n", + le32_to_cpu(sup->time_gran)); +- printk(KERN_DEBUG "\tUUID %pUB\n", ++ printk(KERN_ERR "\tUUID %pUB\n", + sup->uuid); + break; + } +@@ -359,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_mst_node *mst = node; + +- printk(KERN_DEBUG "\thighest_inum %llu\n", ++ printk(KERN_ERR "\thighest_inum %llu\n", + (unsigned long long)le64_to_cpu(mst->highest_inum)); +- printk(KERN_DEBUG "\tcommit number %llu\n", ++ printk(KERN_ERR "\tcommit number %llu\n", + (unsigned long long)le64_to_cpu(mst->cmt_no)); +- printk(KERN_DEBUG "\tflags %#x\n", ++ printk(KERN_ERR "\tflags %#x\n", + le32_to_cpu(mst->flags)); +- printk(KERN_DEBUG "\tlog_lnum %u\n", ++ printk(KERN_ERR "\tlog_lnum %u\n", + le32_to_cpu(mst->log_lnum)); +- printk(KERN_DEBUG "\troot_lnum %u\n", ++ printk(KERN_ERR "\troot_lnum %u\n", + le32_to_cpu(mst->root_lnum)); +- printk(KERN_DEBUG "\troot_offs %u\n", ++ printk(KERN_ERR "\troot_offs %u\n", + le32_to_cpu(mst->root_offs)); +- printk(KERN_DEBUG "\troot_len %u\n", ++ printk(KERN_ERR "\troot_len %u\n", + le32_to_cpu(mst->root_len)); +- printk(KERN_DEBUG "\tgc_lnum %u\n", ++ printk(KERN_ERR "\tgc_lnum %u\n", + le32_to_cpu(mst->gc_lnum)); +- printk(KERN_DEBUG "\tihead_lnum %u\n", ++ printk(KERN_ERR "\tihead_lnum %u\n", + le32_to_cpu(mst->ihead_lnum)); +- printk(KERN_DEBUG "\tihead_offs %u\n", ++ printk(KERN_ERR "\tihead_offs %u\n", + le32_to_cpu(mst->ihead_offs)); +- printk(KERN_DEBUG "\tindex_size %llu\n", ++ printk(KERN_ERR "\tindex_size %llu\n", + (unsigned long long)le64_to_cpu(mst->index_size)); +- printk(KERN_DEBUG "\tlpt_lnum %u\n", ++ printk(KERN_ERR "\tlpt_lnum %u\n", + le32_to_cpu(mst->lpt_lnum)); +- printk(KERN_DEBUG "\tlpt_offs %u\n", ++ printk(KERN_ERR "\tlpt_offs %u\n", + le32_to_cpu(mst->lpt_offs)); +- printk(KERN_DEBUG "\tnhead_lnum %u\n", ++ printk(KERN_ERR "\tnhead_lnum %u\n", + le32_to_cpu(mst->nhead_lnum)); +- printk(KERN_DEBUG "\tnhead_offs %u\n", ++ printk(KERN_ERR "\tnhead_offs %u\n", + le32_to_cpu(mst->nhead_offs)); +- printk(KERN_DEBUG "\tltab_lnum %u\n", ++ printk(KERN_ERR "\tltab_lnum %u\n", + le32_to_cpu(mst->ltab_lnum)); +- printk(KERN_DEBUG "\tltab_offs %u\n", ++ printk(KERN_ERR "\tltab_offs %u\n", + le32_to_cpu(mst->ltab_offs)); +- printk(KERN_DEBUG "\tlsave_lnum %u\n", ++ printk(KERN_ERR "\tlsave_lnum %u\n", + le32_to_cpu(mst->lsave_lnum)); +- printk(KERN_DEBUG "\tlsave_offs %u\n", ++ printk(KERN_ERR "\tlsave_offs %u\n", + le32_to_cpu(mst->lsave_offs)); +- printk(KERN_DEBUG "\tlscan_lnum %u\n", ++ printk(KERN_ERR "\tlscan_lnum %u\n", + le32_to_cpu(mst->lscan_lnum)); +- printk(KERN_DEBUG "\tleb_cnt %u\n", ++ printk(KERN_ERR "\tleb_cnt %u\n", + le32_to_cpu(mst->leb_cnt)); +- printk(KERN_DEBUG "\tempty_lebs %u\n", ++ printk(KERN_ERR "\tempty_lebs %u\n", + le32_to_cpu(mst->empty_lebs)); +- printk(KERN_DEBUG "\tidx_lebs %u\n", ++ printk(KERN_ERR "\tidx_lebs %u\n", + le32_to_cpu(mst->idx_lebs)); +- printk(KERN_DEBUG "\ttotal_free %llu\n", ++ printk(KERN_ERR "\ttotal_free %llu\n", + (unsigned long long)le64_to_cpu(mst->total_free)); +- printk(KERN_DEBUG "\ttotal_dirty %llu\n", ++ printk(KERN_ERR "\ttotal_dirty %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dirty)); +- printk(KERN_DEBUG "\ttotal_used %llu\n", ++ printk(KERN_ERR "\ttotal_used %llu\n", + (unsigned long long)le64_to_cpu(mst->total_used)); +- printk(KERN_DEBUG "\ttotal_dead %llu\n", ++ printk(KERN_ERR "\ttotal_dead %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dead)); +- printk(KERN_DEBUG "\ttotal_dark %llu\n", ++ printk(KERN_ERR "\ttotal_dark %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dark)); + break; + } +@@ -421,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_ref_node *ref = node; + +- printk(KERN_DEBUG "\tlnum %u\n", ++ printk(KERN_ERR "\tlnum %u\n", + le32_to_cpu(ref->lnum)); +- printk(KERN_DEBUG "\toffs %u\n", ++ printk(KERN_ERR "\toffs %u\n", + le32_to_cpu(ref->offs)); +- printk(KERN_DEBUG "\tjhead %u\n", ++ printk(KERN_ERR "\tjhead %u\n", + le32_to_cpu(ref->jhead)); + break; + } +@@ -434,39 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_ino_node *ino = node; + + key_read(c, &ino->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tcreat_sqnum %llu\n", + (unsigned long long)le64_to_cpu(ino->creat_sqnum)); +- printk(KERN_DEBUG "\tsize %llu\n", ++ printk(KERN_ERR "\tsize %llu\n", + (unsigned long long)le64_to_cpu(ino->size)); +- printk(KERN_DEBUG "\tnlink %u\n", ++ printk(KERN_ERR "\tnlink %u\n", + le32_to_cpu(ino->nlink)); +- printk(KERN_DEBUG "\tatime %lld.%u\n", ++ printk(KERN_ERR "\tatime %lld.%u\n", + (long long)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); +- printk(KERN_DEBUG "\tmtime %lld.%u\n", ++ printk(KERN_ERR "\tmtime %lld.%u\n", + (long long)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); +- printk(KERN_DEBUG "\tctime %lld.%u\n", ++ printk(KERN_ERR "\tctime %lld.%u\n", + (long long)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); +- printk(KERN_DEBUG "\tuid %u\n", ++ printk(KERN_ERR "\tuid %u\n", + le32_to_cpu(ino->uid)); +- printk(KERN_DEBUG "\tgid %u\n", ++ printk(KERN_ERR "\tgid %u\n", + le32_to_cpu(ino->gid)); +- printk(KERN_DEBUG "\tmode %u\n", ++ printk(KERN_ERR "\tmode %u\n", + le32_to_cpu(ino->mode)); +- printk(KERN_DEBUG "\tflags %#x\n", ++ printk(KERN_ERR "\tflags %#x\n", + le32_to_cpu(ino->flags)); +- printk(KERN_DEBUG "\txattr_cnt %u\n", ++ printk(KERN_ERR "\txattr_cnt %u\n", + le32_to_cpu(ino->xattr_cnt)); +- printk(KERN_DEBUG "\txattr_size %u\n", ++ printk(KERN_ERR "\txattr_size %u\n", + le32_to_cpu(ino->xattr_size)); +- printk(KERN_DEBUG "\txattr_names %u\n", ++ printk(KERN_ERR "\txattr_names %u\n", + le32_to_cpu(ino->xattr_names)); +- printk(KERN_DEBUG "\tcompr_type %#x\n", ++ printk(KERN_ERR "\tcompr_type %#x\n", + (int)le16_to_cpu(ino->compr_type)); +- printk(KERN_DEBUG "\tdata len %u\n", ++ printk(KERN_ERR "\tdata len %u\n", + le32_to_cpu(ino->data_len)); + break; + } +@@ -477,15 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int nlen = le16_to_cpu(dent->nlen); + + key_read(c, &dent->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tinum %llu\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tinum %llu\n", + (unsigned long long)le64_to_cpu(dent->inum)); +- printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); +- printk(KERN_DEBUG "\tnlen %d\n", nlen); +- printk(KERN_DEBUG "\tname "); ++ printk(KERN_ERR "\ttype %d\n", (int)dent->type); ++ printk(KERN_ERR "\tnlen %d\n", nlen); ++ printk(KERN_ERR "\tname "); + + if (nlen > UBIFS_MAX_NLEN) +- printk(KERN_DEBUG "(bad name length, not printing, " ++ printk(KERN_ERR "(bad name length, not printing, " + "bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) +@@ -501,15 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; + + key_read(c, &dn->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tsize %u\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tsize %u\n", + le32_to_cpu(dn->size)); +- printk(KERN_DEBUG "\tcompr_typ %d\n", ++ printk(KERN_ERR "\tcompr_typ %d\n", + (int)le16_to_cpu(dn->compr_type)); +- printk(KERN_DEBUG "\tdata size %d\n", ++ printk(KERN_ERR "\tdata size %d\n", + dlen); +- printk(KERN_DEBUG "\tdata:\n"); +- print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, ++ printk(KERN_ERR "\tdata:\n"); ++ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, + (void *)&dn->data, dlen, 0); + break; + } +@@ -517,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_trun_node *trun = node; + +- printk(KERN_DEBUG "\tinum %u\n", ++ printk(KERN_ERR "\tinum %u\n", + le32_to_cpu(trun->inum)); +- printk(KERN_DEBUG "\told_size %llu\n", ++ printk(KERN_ERR "\told_size %llu\n", + (unsigned long long)le64_to_cpu(trun->old_size)); +- printk(KERN_DEBUG "\tnew_size %llu\n", ++ printk(KERN_ERR "\tnew_size %llu\n", + (unsigned long long)le64_to_cpu(trun->new_size)); + break; + } +@@ -530,19 +560,21 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_idx_node *idx = node; + + n = le16_to_cpu(idx->child_cnt); +- printk(KERN_DEBUG "\tchild_cnt %d\n", n); +- printk(KERN_DEBUG "\tlevel %d\n", ++ printk(KERN_ERR "\tchild_cnt %d\n", n); ++ printk(KERN_ERR "\tlevel %d\n", + (int)le16_to_cpu(idx->level)); +- printk(KERN_DEBUG "\tBranches:\n"); ++ printk(KERN_ERR "\tBranches:\n"); + + for (i = 0; i < n && i < c->fanout - 1; i++) { + const struct ubifs_branch *br; + + br = ubifs_idx_branch(c, idx, i); + key_read(c, &br->key, &key); +- printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", ++ printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", + i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), +- le32_to_cpu(br->len), DBGKEY(&key)); ++ le32_to_cpu(br->len), ++ dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + } + break; + } +@@ -552,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_orph_node *orph = node; + +- printk(KERN_DEBUG "\tcommit number %llu\n", ++ printk(KERN_ERR "\tcommit number %llu\n", + (unsigned long long) + le64_to_cpu(orph->cmt_no) & LLONG_MAX); +- printk(KERN_DEBUG "\tlast node flag %llu\n", ++ printk(KERN_ERR "\tlast node flag %llu\n", + (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); + n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; +- printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); ++ printk(KERN_ERR "\t%d orphan inode numbers:\n", n); + for (i = 0; i < n; i++) +- printk(KERN_DEBUG "\t ino %llu\n", ++ printk(KERN_ERR "\t ino %llu\n", + (unsigned long long)le64_to_cpu(orph->inos[i])); + break; + } + default: +- printk(KERN_DEBUG "node type %d was not recognized\n", ++ printk(KERN_ERR "node type %d was not recognized\n", + (int)ch->node_type); + } + spin_unlock(&dbg_lock); +@@ -574,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + void dbg_dump_budget_req(const struct ubifs_budget_req *req) + { + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", ++ printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", + req->new_ino, req->dirtied_ino); +- printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", ++ printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", + req->new_ino_d, req->dirtied_ino_d); +- printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", ++ printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", + req->new_page, req->dirtied_page); +- printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", ++ printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", + req->new_dent, req->mod_dent); +- printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); +- printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", ++ printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); ++ printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", + req->data_growth, req->dd_growth); + spin_unlock(&dbg_lock); + } +@@ -591,18 +623,18 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) + void dbg_dump_lstats(const struct ubifs_lp_stats *lst) + { + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " ++ printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " + "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); +- printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " ++ printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " + "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, + lst->total_dirty); +- printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " ++ printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " + "total_dead %lld\n", lst->total_used, lst->total_dark, + lst->total_dead); + spin_unlock(&dbg_lock); + } + +-void dbg_dump_budg(struct ubifs_info *c) ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) + { + int i; + struct rb_node *rb; +@@ -610,51 +642,69 @@ void dbg_dump_budg(struct ubifs_info *c) + struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + +- ubifs_assert(spin_is_locked(&c->space_lock)); ++ spin_lock(&c->space_lock); + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " +- "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, +- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); +- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " +- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, +- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, +- c->freeable_cnt); +- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " +- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, +- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); +- printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " ++ printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " ++ "total budget sum %lld\n", current->pid, ++ bi->data_growth + bi->dd_growth, ++ bi->data_growth + bi->dd_growth + bi->idx_growth); ++ printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " ++ "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, ++ bi->idx_growth); ++ printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " ++ "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, ++ bi->uncommitted_idx); ++ printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", ++ bi->page_budget, bi->inode_budget, bi->dent_budget); ++ printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", ++ bi->nospace, bi->nospace_rp); ++ printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", ++ c->dark_wm, c->dead_wm, c->max_idx_node_sz); ++ ++ if (bi != &c->bi) ++ /* ++ * If we are dumping saved budgeting data, do not print ++ * additional information which is about the current state, not ++ * the old one which corresponded to the saved budgeting data. ++ */ ++ goto out_unlock; ++ ++ printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", ++ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); ++ printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); +- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", +- c->dark_wm, c->dead_wm, c->max_idx_node_sz); +- printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", ++ printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", + c->gc_lnum, c->ihead_lnum); ++ + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) +- printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", ++ printk(KERN_ERR "\tjhead %s\t LEB %d\n", + dbg_jhead(c->jheads[i].wbuf.jhead), + c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); +- printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); ++ printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); + } + list_for_each_entry(bud, &c->old_buds, list) +- printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); ++ printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); + list_for_each_entry(idx_gc, &c->idx_gc, list) +- printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", ++ printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", + idx_gc->lnum, idx_gc->unmap); +- printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); ++ printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ +- available = ubifs_calc_available(c, c->min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + free = ubifs_get_free_space_nolock(c); +- printk(KERN_DEBUG "Budgeting predictions:\n"); +- printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", ++ printk(KERN_ERR "Budgeting predictions:\n"); ++ printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); ++out_unlock: + spin_unlock(&dbg_lock); ++ spin_unlock(&c->space_lock); + } + + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +@@ -670,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) + dark = ubifs_calc_dark(c, spc); + + if (lp->flags & LPROPS_INDEX) +- printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d flags %#x (", lp->lnum, lp->free, + lp->dirty, c->leb_size - spc, spc, lp->flags); + else +- printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " + "flags %#-4x (", lp->lnum, lp->free, lp->dirty, + c->leb_size - spc, spc, dark, dead, +@@ -729,7 +779,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) + if (bud->lnum == lp->lnum) { + int head = 0; + for (i = 0; i < c->jhead_cnt; i++) { +- if (lp->lnum == c->jheads[i].wbuf.lnum) { ++ /* ++ * Note, if we are in R/O mode or in the middle ++ * of mounting/re-mounting, the write-buffers do ++ * not exist. ++ */ ++ if (c->jheads && ++ lp->lnum == c->jheads[i].wbuf.lnum) { + printk(KERN_CONT ", jhead %s", + dbg_jhead(i)); + head = 1; +@@ -751,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c) + struct ubifs_lprops lp; + struct ubifs_lp_stats lst; + +- printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", ++ printk(KERN_ERR "(pid %d) start dumping LEB properties\n", + current->pid); + ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); +@@ -763,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c) + + dbg_dump_lprop(c, &lp); + } +- printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", ++ printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", + current->pid); + } + +@@ -772,69 +828,96 @@ void dbg_dump_lpt_info(struct ubifs_info *c) + int i; + + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); +- printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); +- printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); +- printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); +- printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); +- printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); +- printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); +- printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); +- printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); +- printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); +- printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); +- printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); +- printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); +- printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); +- printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); +- printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); +- printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); +- printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); +- printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); +- printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); +- printk(KERN_DEBUG "\tLPT head is at %d:%d\n", ++ printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); ++ printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); ++ printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); ++ printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); ++ printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); ++ printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); ++ printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); ++ printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); ++ printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); ++ printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); ++ printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); ++ printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); ++ printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); ++ printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); ++ printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); ++ printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); ++ printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); ++ printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); ++ printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); ++ printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); ++ printk(KERN_ERR "\tLPT head is at %d:%d\n", + c->nhead_lnum, c->nhead_offs); +- printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", ++ printk(KERN_ERR "\tLPT ltab is at %d:%d\n", + c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) +- printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", ++ printk(KERN_ERR "\tLPT lsave is at %d:%d\n", + c->lsave_lnum, c->lsave_offs); + for (i = 0; i < c->lpt_lebs; i++) +- printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " ++ printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " + "cmt %d\n", i + c->lpt_first, c->ltab[i].free, + c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); + spin_unlock(&dbg_lock); + } + ++void dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs) ++{ ++ struct ubifs_scan_node *snod; ++ ++ printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", ++ current->pid, sleb->lnum, offs); ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ cond_resched(); ++ printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, ++ snod->offs, snod->len); ++ dbg_dump_node(c, snod->node); ++ } ++} ++ + void dbg_dump_leb(const struct ubifs_info *c, int lnum) + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; ++ void *buf; + +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + +- printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", ++ printk(KERN_ERR "(pid %d) start dumping LEB %d\n", + current->pid, lnum); +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); ++ ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for dumping LEB %d", lnum); ++ return; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); +- return; ++ goto out; + } + +- printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, ++ printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, + sleb->nodes_cnt, sleb->endpt); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); +- printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, ++ printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, + snod->offs, snod->len); + dbg_dump_node(c, snod->node); + } + +- printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", ++ printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); + ubifs_scan_destroy(sleb); ++ ++out: ++ vfree(buf); + return; + } + +@@ -843,6 +926,7 @@ void dbg_dump_znode(const struct ubifs_info *c, + { + int n; + const struct ubifs_zbranch *zbr; ++ char key_buf[DBG_KEY_BUF_LEN]; + + spin_lock(&dbg_lock); + if (znode->parent) +@@ -850,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c, + else + zbr = &c->zroot; + +- printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" ++ printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" + " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, + zbr->len, znode->parent, znode->iip, znode->level, + znode->child_cnt, znode->flags); +@@ -860,19 +944,23 @@ void dbg_dump_znode(const struct ubifs_info *c, + return; + } + +- printk(KERN_DEBUG "zbranches:\n"); ++ printk(KERN_ERR "zbranches:\n"); + for (n = 0; n < znode->child_cnt; n++) { + zbr = &znode->zbranch[n]; + if (znode->level > 0) +- printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " ++ printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, +- DBGKEY(&zbr->key)); ++ dbg_snprintf_key(c, &zbr->key, ++ key_buf, ++ DBG_KEY_BUF_LEN)); + else +- printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " ++ printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, +- DBGKEY(&zbr->key)); ++ dbg_snprintf_key(c, &zbr->key, ++ key_buf, ++ DBG_KEY_BUF_LEN)); + } + spin_unlock(&dbg_lock); + } +@@ -881,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) + { + int i; + +- printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", ++ printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", + current->pid, cat, heap->cnt); + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + +- printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " ++ printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " + "flags %d\n", i, lprops->lnum, lprops->hpos, + lprops->free, lprops->dirty, lprops->flags); + } +- printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); ++ printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); + } + + void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, +@@ -898,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + { + int i; + +- printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); +- printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", ++ printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); ++ printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", + (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); +- printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", ++ printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", + pnode->flags, iip, pnode->level, pnode->num); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp = &pnode->lprops[i]; + +- printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", ++ printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", + i, lp->free, lp->dirty, lp->flags, lp->lnum); + } + } +@@ -916,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c) + struct ubifs_znode *znode; + int level; + +- printk(KERN_DEBUG "\n"); +- printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); ++ printk(KERN_ERR "\n"); ++ printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + level = znode->level; +- printk(KERN_DEBUG "== Level %d ==\n", level); ++ printk(KERN_ERR "== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; +- printk(KERN_DEBUG "== Level %d ==\n", level); ++ printk(KERN_ERR "== Level %d ==\n", level); + } + dbg_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + } +- printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); ++ printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); + } + + static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, +@@ -961,11 +1049,41 @@ void dbg_dump_index(struct ubifs_info *c) + void dbg_save_space_info(struct ubifs_info *c) + { + struct ubifs_debug_info *d = c->dbg; +- +- ubifs_get_lp_stats(c, &d->saved_lst); ++ int freeable_cnt; + + spin_lock(&c->space_lock); ++ memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); ++ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); ++ d->saved_idx_gc_cnt = c->idx_gc_cnt; ++ ++ /* ++ * We use a dirty hack here and zero out @c->freeable_cnt, because it ++ * affects the free space calculations, and UBIFS might not know about ++ * all freeable eraseblocks. Indeed, we know about freeable eraseblocks ++ * only when we read their lprops, and we do this only lazily, upon the ++ * need. So at any given point of time @c->freeable_cnt might be not ++ * exactly accurate. ++ * ++ * Just one example about the issue we hit when we did not zero ++ * @c->freeable_cnt. ++ * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the ++ * amount of free space in @d->saved_free ++ * 2. We re-mount R/W, which makes UBIFS to read the "lsave" ++ * information from flash, where we cache LEBs from various ++ * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' ++ * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' ++ * -> 'ubifs_get_pnode()' -> 'update_cats()' ++ * -> 'ubifs_add_to_cat()'). ++ * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt ++ * becomes %1. ++ * 4. We calculate the amount of free space when the re-mount is ++ * finished in 'dbg_check_space_info()' and it does not match ++ * @d->saved_free. ++ */ ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; + d->saved_free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); + } + +@@ -982,12 +1100,15 @@ int dbg_check_space_info(struct ubifs_info *c) + { + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; +- long long avail, free; ++ long long free; ++ int freeable_cnt; + + spin_lock(&c->space_lock); +- avail = ubifs_calc_available(c, c->min_idx_lebs); ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; ++ free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); +- free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", +@@ -1000,20 +1121,21 @@ int dbg_check_space_info(struct ubifs_info *c) + out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); +- ubifs_get_lp_stats(c, &lst); +- ++ ubifs_msg("saved budgeting info dump"); ++ dbg_dump_budg(c, &d->saved_bi); ++ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); + ubifs_msg("current lprops statistics dump"); ++ ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); +- +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_msg("current budgeting info dump"); ++ dbg_dump_budg(c, &c->bi); + dump_stack(); + return -EINVAL; + } + + /** + * dbg_check_synced_i_size - check synchronized inode size. ++ * @c: UBIFS file-system description object + * @inode: inode to check + * + * If inode is clean, synchronized inode size has to be equivalent to current +@@ -1021,12 +1143,12 @@ out: + * has to be locked). Returns %0 if synchronized inode size if correct, and + * %-EINVAL if not. + */ +-int dbg_check_synced_i_size(struct inode *inode) ++int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) + { + int err = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; +@@ -1059,7 +1181,7 @@ int dbg_check_synced_i_size(struct inode *inode) + * Note, it is good idea to make sure the @dir->i_mutex is locked before + * calling this function. + */ +-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) ++int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) + { + unsigned int nlink = 2; + union ubifs_key key; +@@ -1067,7 +1189,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) + struct qstr nm = { .name = NULL }; + loff_t size = UBIFS_INO_NODE_SZ; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + if (!S_ISDIR(dir->i_mode)) +@@ -1101,12 +1223,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) + "but calculated size is %llu", dir->i_ino, + (unsigned long long)i_size_read(dir), + (unsigned long long)size); ++ dbg_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } + if (dir->i_nlink != nlink) { + ubifs_err("directory inode %lu has nlink %u, but calculated " + "nlink is %u", dir->i_ino, dir->i_nlink, nlink); ++ dbg_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } +@@ -1133,6 +1257,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + int err, nlen1, nlen2, cmp; + struct ubifs_dent_node *dent1, *dent2; + union ubifs_key key; ++ char key_buf[DBG_KEY_BUF_LEN]; + + ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); + dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); +@@ -1163,9 +1288,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + key_read(c, &dent1->key, &key); + if (keys_cmp(c, &zbr1->key, &key)) { + dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, +- zbr1->offs, DBGKEY(&key)); ++ zbr1->offs, dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_err("but it should have key %s according to tnc", +- DBGKEY(&zbr1->key)); ++ dbg_snprintf_key(c, &zbr1->key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_dump_node(c, dent1); + goto out_free; + } +@@ -1173,9 +1300,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + key_read(c, &dent2->key, &key); + if (keys_cmp(c, &zbr2->key, &key)) { + dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, +- zbr1->offs, DBGKEY(&key)); ++ zbr1->offs, dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_err("but it should have key %s according to tnc", +- DBGKEY(&zbr2->key)); ++ dbg_snprintf_key(c, &zbr2->key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_dump_node(c, dent2); + goto out_free; + } +@@ -1192,7 +1321,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + dbg_err("2 xent/dent nodes with the same name"); + else + dbg_err("bad order of colliding key %s", +- DBGKEY(&key)); ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + + ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + dbg_dump_node(c, dent1); +@@ -1423,7 +1552,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) + long clean_cnt = 0, dirty_cnt = 0; + int err, last; + +- if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) ++ if (!dbg_is_chk_index(c)) + return 0; + + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); +@@ -1670,7 +1799,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) + int err; + long long calc = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) ++ if (!dbg_is_chk_index(c)) + return 0; + + err = dbg_walk_index(c, NULL, add_size, &calc); +@@ -1751,6 +1880,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); ++ struct inode *inode; ++ struct ubifs_inode *ui; + + p = &fsckd->inodes.rb_node; + while (*p) { +@@ -1774,19 +1905,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, + if (!fscki) + return ERR_PTR(-ENOMEM); + ++ inode = ilookup(c->vfs_sb, inum); ++ + fscki->inum = inum; +- fscki->nlink = le32_to_cpu(ino->nlink); +- fscki->size = le64_to_cpu(ino->size); +- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); +- fscki->xattr_sz = le32_to_cpu(ino->xattr_size); +- fscki->xattr_nms = le32_to_cpu(ino->xattr_names); +- fscki->mode = le32_to_cpu(ino->mode); ++ /* ++ * If the inode is present in the VFS inode cache, use it instead of ++ * the on-flash inode which might be out-of-date. E.g., the size might ++ * be out-of-date. If we do not do this, the following may happen, for ++ * example: ++ * 1. A power cut happens ++ * 2. We mount the file-system R/O, the replay process fixes up the ++ * inode size in the VFS cache, but on on-flash. ++ * 3. 'check_leaf()' fails because it hits a data node beyond inode ++ * size. ++ */ ++ if (!inode) { ++ fscki->nlink = le32_to_cpu(ino->nlink); ++ fscki->size = le64_to_cpu(ino->size); ++ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); ++ fscki->xattr_sz = le32_to_cpu(ino->xattr_size); ++ fscki->xattr_nms = le32_to_cpu(ino->xattr_names); ++ fscki->mode = le32_to_cpu(ino->mode); ++ } else { ++ ui = ubifs_inode(inode); ++ fscki->nlink = inode->i_nlink; ++ fscki->size = inode->i_size; ++ fscki->xattr_cnt = ui->xattr_cnt; ++ fscki->xattr_sz = ui->xattr_size; ++ fscki->xattr_nms = ui->xattr_names; ++ fscki->mode = inode->i_mode; ++ iput(inode); ++ } ++ + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } ++ + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); ++ + return fscki; + } + +@@ -2217,7 +2375,7 @@ int dbg_check_filesystem(struct ubifs_info *c) + int err; + struct fsck_data fsckd; + +- if (!(ubifs_chk_flags & UBIFS_CHK_FS)) ++ if (!dbg_is_chk_fs(c)) + return 0; + + fsckd.inodes = RB_ROOT; +@@ -2252,7 +2410,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { +@@ -2319,7 +2477,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { +@@ -2379,7 +2537,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) + hashb = key_block(c, &sb->key); + + if (hasha > hashb) { +- ubifs_err("larger hash %u goes before %u", hasha, hashb); ++ ubifs_err("larger hash %u goes before %u", ++ hasha, hashb); + goto error_dump; + } + } +@@ -2395,393 +2554,351 @@ error_dump: + return 0; + } + +-static int invocation_cnt; +- +-int dbg_force_in_the_gaps(void) +-{ +- if (!dbg_force_in_the_gaps_enabled) +- return 0; +- /* Force in-the-gaps every 8th commit */ +- return !((invocation_cnt++) & 0x7); +-} +- +-/* Failure mode for recovery testing */ +- +-#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) +- +-struct failure_mode_info { +- struct list_head list; +- struct ubifs_info *c; +-}; +- +-static LIST_HEAD(fmi_list); +-static DEFINE_SPINLOCK(fmi_lock); +- +-static unsigned int next; +- +-static int simple_rand(void) +-{ +- if (next == 0) +- next = current->pid; +- next = next * 1103515245 + 12345; +- return (next >> 16) & 32767; +-} +- +-static void failure_mode_init(struct ubifs_info *c) +-{ +- struct failure_mode_info *fmi; +- +- fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); +- if (!fmi) { +- ubifs_err("Failed to register failure mode - no memory"); +- return; +- } +- fmi->c = c; +- spin_lock(&fmi_lock); +- list_add_tail(&fmi->list, &fmi_list); +- spin_unlock(&fmi_lock); +-} +- +-static void failure_mode_exit(struct ubifs_info *c) +-{ +- struct failure_mode_info *fmi, *tmp; +- +- spin_lock(&fmi_lock); +- list_for_each_entry_safe(fmi, tmp, &fmi_list, list) +- if (fmi->c == c) { +- list_del(&fmi->list); +- kfree(fmi); +- } +- spin_unlock(&fmi_lock); +-} +- +-static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) ++static inline int chance(unsigned int n, unsigned int out_of) + { +- struct failure_mode_info *fmi; +- +- spin_lock(&fmi_lock); +- list_for_each_entry(fmi, &fmi_list, list) +- if (fmi->c->ubi == desc) { +- struct ubifs_info *c = fmi->c; ++ return !!((random32() % out_of) + 1 <= n); + +- spin_unlock(&fmi_lock); +- return c; +- } +- spin_unlock(&fmi_lock); +- return NULL; + } + +-static int in_failure_mode(struct ubi_volume_desc *desc) ++static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) + { +- struct ubifs_info *c = dbg_find_info(desc); +- +- if (c && dbg_failure_mode) +- return c->dbg->failure_mode; +- return 0; +-} ++ struct ubifs_debug_info *d = c->dbg; + +-static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +-{ +- struct ubifs_info *c = dbg_find_info(desc); +- struct ubifs_debug_info *d; ++ ubifs_assert(dbg_is_tst_rcvry(c)); + +- if (!c || !dbg_failure_mode) +- return 0; +- d = c->dbg; +- if (d->failure_mode) +- return 1; +- if (!d->fail_cnt) { +- /* First call - decide delay to failure */ ++ if (!d->pc_cnt) { ++ /* First call - decide delay to the power cut */ + if (chance(1, 2)) { +- unsigned int delay = 1 << (simple_rand() >> 11); ++ unsigned long delay; + + if (chance(1, 2)) { +- d->fail_delay = 1; +- d->fail_timeout = jiffies + +- msecs_to_jiffies(delay); +- dbg_rcvry("failing after %ums", delay); ++ d->pc_delay = 1; ++ /* Fail withing 1 minute */ ++ delay = random32() % 60000; ++ d->pc_timeout = jiffies; ++ d->pc_timeout += msecs_to_jiffies(delay); ++ ubifs_warn("failing after %lums", delay); + } else { +- d->fail_delay = 2; +- d->fail_cnt_max = delay; +- dbg_rcvry("failing after %u calls", delay); ++ d->pc_delay = 2; ++ delay = random32() % 10000; ++ /* Fail within 10000 operations */ ++ d->pc_cnt_max = delay; ++ ubifs_warn("failing after %lu calls", delay); + } + } +- d->fail_cnt += 1; ++ ++ d->pc_cnt += 1; + } ++ + /* Determine if failure delay has expired */ +- if (d->fail_delay == 1) { +- if (time_before(jiffies, d->fail_timeout)) ++ if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) + return 0; +- } else if (d->fail_delay == 2) +- if (d->fail_cnt++ < d->fail_cnt_max) ++ if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) + return 0; ++ + if (lnum == UBIFS_SB_LNUM) { +- if (write) { +- if (chance(1, 2)) +- return 0; +- } else if (chance(19, 20)) ++ if (write && chance(1, 2)) + return 0; +- dbg_rcvry("failing in super block LEB %d", lnum); ++ if (chance(19, 20)) ++ return 0; ++ ubifs_warn("failing in super block LEB %d", lnum); + } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { + if (chance(19, 20)) + return 0; +- dbg_rcvry("failing in master LEB %d", lnum); ++ ubifs_warn("failing in master LEB %d", lnum); + } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { +- if (write) { +- if (chance(99, 100)) +- return 0; +- } else if (chance(399, 400)) ++ if (write && chance(99, 100)) ++ return 0; ++ if (chance(399, 400)) + return 0; +- dbg_rcvry("failing in log LEB %d", lnum); ++ ubifs_warn("failing in log LEB %d", lnum); + } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { +- if (write) { +- if (chance(7, 8)) +- return 0; +- } else if (chance(19, 20)) ++ if (write && chance(7, 8)) + return 0; +- dbg_rcvry("failing in LPT LEB %d", lnum); ++ if (chance(19, 20)) ++ return 0; ++ ubifs_warn("failing in LPT LEB %d", lnum); + } else if (lnum >= c->orph_first && lnum <= c->orph_last) { +- if (write) { +- if (chance(1, 2)) +- return 0; +- } else if (chance(9, 10)) ++ if (write && chance(1, 2)) + return 0; +- dbg_rcvry("failing in orphan LEB %d", lnum); ++ if (chance(9, 10)) ++ return 0; ++ ubifs_warn("failing in orphan LEB %d", lnum); + } else if (lnum == c->ihead_lnum) { + if (chance(99, 100)) + return 0; +- dbg_rcvry("failing in index head LEB %d", lnum); ++ ubifs_warn("failing in index head LEB %d", lnum); + } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { + if (chance(9, 10)) + return 0; +- dbg_rcvry("failing in GC head LEB %d", lnum); ++ ubifs_warn("failing in GC head LEB %d", lnum); + } else if (write && !RB_EMPTY_ROOT(&c->buds) && + !ubifs_search_bud(c, lnum)) { + if (chance(19, 20)) + return 0; +- dbg_rcvry("failing in non-bud LEB %d", lnum); ++ ubifs_warn("failing in non-bud LEB %d", lnum); + } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || + c->cmt_state == COMMIT_RUNNING_REQUIRED) { + if (chance(999, 1000)) + return 0; +- dbg_rcvry("failing in bud LEB %d commit running", lnum); ++ ubifs_warn("failing in bud LEB %d commit running", lnum); + } else { + if (chance(9999, 10000)) + return 0; +- dbg_rcvry("failing in bud LEB %d commit not running", lnum); ++ ubifs_warn("failing in bud LEB %d commit not running", lnum); + } +- ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); +- d->failure_mode = 1; ++ ++ d->pc_happened = 1; ++ ubifs_warn("========== Power cut emulated =========="); + dump_stack(); + return 1; + } + +-static void cut_data(const void *buf, int len) ++static void cut_data(const void *buf, unsigned int len) + { +- int flen, i; ++ unsigned int from, to, i, ffs = chance(1, 2); + unsigned char *p = (void *)buf; + +- flen = (len * (long long)simple_rand()) >> 15; +- for (i = flen; i < len; i++) +- p[i] = 0xff; +-} ++ from = random32() % (len + 1); ++ if (chance(1, 2)) ++ to = random32() % (len - from + 1); ++ else ++ to = len; + +-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, +- int len, int check) +-{ +- if (in_failure_mode(desc)) +- return -EIO; +- return ubi_leb_read(desc, lnum, buf, offset, len, check); ++ if (from < to) ++ ubifs_warn("filled bytes %u-%u with %s", from, to - 1, ++ ffs ? "0xFFs" : "random data"); ++ ++ if (ffs) ++ for (i = from; i < to; i++) ++ p[i] = 0xFF; ++ else ++ for (i = from; i < to; i++) ++ p[i] = random32() % 0x100; + } + +-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int offset, int len, int dtype) ++int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, ++ int offs, int len, int dtype) + { + int err, failing; + +- if (in_failure_mode(desc)) +- return -EIO; +- failing = do_fail(desc, lnum, 1); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ ++ failing = power_cut_emulated(c, lnum, 1); + if (failing) + cut_data(buf, len); +- err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) + return err; + if (failing) +- return -EIO; ++ return -EROFS; + return 0; + } + +-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, ++int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, + int len, int dtype) + { + int err; + +- if (do_fail(desc, lnum, 1)) +- return -EIO; +- err = ubi_leb_change(desc, lnum, buf, len, dtype); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 1)) ++ return -EROFS; ++ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) + return err; +- if (do_fail(desc, lnum, 1)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 1)) ++ return -EROFS; + return 0; + } + +-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) ++int dbg_leb_unmap(struct ubifs_info *c, int lnum) + { + int err; + +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_erase(desc, lnum); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; ++ err = ubi_leb_unmap(c->ubi, lnum); + if (err) + return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; + return 0; + } + +-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) ++int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) + { + int err; + +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_unmap(desc, lnum); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; ++ err = ubi_leb_map(c->ubi, lnum, dtype); + if (err) + return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; + return 0; + } + +-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +-{ +- if (in_failure_mode(desc)) +- return -EIO; +- return ubi_is_mapped(desc, lnum); +-} ++/* ++ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which ++ * contain the stuff specific to particular file-system mounts. ++ */ ++static struct dentry *dfs_rootdir; + +-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) ++static int dfs_file_open(struct inode *inode, struct file *file) + { +- int err; +- +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_map(desc, lnum, dtype); +- if (err) +- return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- return 0; ++ file->private_data = inode->i_private; ++ return nonseekable_open(inode, file); + } + + /** +- * ubifs_debugging_init - initialize UBIFS debugging. +- * @c: UBIFS file-system description object ++ * provide_user_output - provide output to the user reading a debugfs file. ++ * @val: boolean value for the answer ++ * @u: the buffer to store the answer at ++ * @count: size of the buffer ++ * @ppos: position in the @u output buffer + * +- * This function initializes debugging-related data for the file system. +- * Returns zero in case of success and a negative error code in case of ++ * This is a simple helper function which stores @val boolean value in the user ++ * buffer when the user reads one of UBIFS debugfs files. Returns amount of ++ * bytes written to @u in case of success and a negative error code in case of + * failure. + */ +-int ubifs_debugging_init(struct ubifs_info *c) ++static int provide_user_output(int val, char __user *u, size_t count, ++ loff_t *ppos) + { +- c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); +- if (!c->dbg) +- return -ENOMEM; +- +- c->dbg->buf = vmalloc(c->leb_size); +- if (!c->dbg->buf) +- goto out; ++ char buf[3]; + +- failure_mode_init(c); +- return 0; ++ if (val) ++ buf[0] = '1'; ++ else ++ buf[0] = '0'; ++ buf[1] = '\n'; ++ buf[2] = 0x00; + +-out: +- kfree(c->dbg); +- return -ENOMEM; ++ return simple_read_from_buffer(u, count, ppos, buf, 2); + } + +-/** +- * ubifs_debugging_exit - free debugging data. +- * @c: UBIFS file-system description object +- */ +-void ubifs_debugging_exit(struct ubifs_info *c) ++static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, ++ loff_t *ppos) + { +- failure_mode_exit(c); +- vfree(c->dbg->buf); +- kfree(c->dbg); +-} ++ struct dentry *dent = file->f_path.dentry; ++ struct ubifs_info *c = file->private_data; ++ struct ubifs_debug_info *d = c->dbg; ++ int val; ++ ++ if (dent == d->dfs_chk_gen) ++ val = d->chk_gen; ++ else if (dent == d->dfs_chk_index) ++ val = d->chk_index; ++ else if (dent == d->dfs_chk_orph) ++ val = d->chk_orph; ++ else if (dent == d->dfs_chk_lprops) ++ val = d->chk_lprops; ++ else if (dent == d->dfs_chk_fs) ++ val = d->chk_fs; ++ else if (dent == d->dfs_tst_rcvry) ++ val = d->tst_rcvry; ++ else ++ return -EINVAL; + +-/* +- * Root directory for UBIFS stuff in debugfs. Contains sub-directories which +- * contain the stuff specific to particular file-system mounts. +- */ +-static struct dentry *dfs_rootdir; ++ return provide_user_output(val, u, count, ppos); ++} + + /** +- * dbg_debugfs_init - initialize debugfs file-system. ++ * interpret_user_input - interpret user debugfs file input. ++ * @u: user-provided buffer with the input ++ * @count: buffer size + * +- * UBIFS uses debugfs file-system to expose various debugging knobs to +- * user-space. This function creates "ubifs" directory in the debugfs +- * file-system. Returns zero in case of success and a negative error code in +- * case of failure. ++ * This is a helper function which interpret user input to a boolean UBIFS ++ * debugfs file. Returns %0 or %1 in case of success and a negative error code ++ * in case of failure. + */ +-int dbg_debugfs_init(void) ++static int interpret_user_input(const char __user *u, size_t count) + { +- dfs_rootdir = debugfs_create_dir("ubifs", NULL); +- if (IS_ERR(dfs_rootdir)) { +- int err = PTR_ERR(dfs_rootdir); +- ubifs_err("cannot create \"ubifs\" debugfs directory, " +- "error %d\n", err); +- return err; +- } ++ size_t buf_size; ++ char buf[8]; + +- return 0; +-} ++ buf_size = min_t(size_t, count, (sizeof(buf) - 1)); ++ if (copy_from_user(buf, u, buf_size)) ++ return -EFAULT; + +-/** +- * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. +- */ +-void dbg_debugfs_exit(void) +-{ +- debugfs_remove(dfs_rootdir); +-} ++ if (buf[0] == '1') ++ return 1; ++ else if (buf[0] == '0') ++ return 0; + +-static int open_debugfs_file(struct inode *inode, struct file *file) +-{ +- file->private_data = inode->i_private; +- return 0; ++ return -EINVAL; + } + +-static ssize_t write_debugfs_file(struct file *file, const char __user *buf, +- size_t count, loff_t *ppos) ++static ssize_t dfs_file_write(struct file *file, const char __user *u, ++ size_t count, loff_t *ppos) + { + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; ++ struct dentry *dent = file->f_path.dentry; ++ int val; + +- if (file->f_path.dentry == d->dfs_dump_lprops) ++ /* ++ * TODO: this is racy - the file-system might have already been ++ * unmounted and we'd oops in this case. The plan is to fix it with ++ * help of 'iterate_supers_type()' which we should have in v3.0: when ++ * a debugfs opened, we rember FS's UUID in file->private_data. Then ++ * whenever we access the FS via a debugfs file, we iterate all UBIFS ++ * superblocks and fine the one with the same UUID, and take the ++ * locking right. ++ * ++ * The other way to go suggested by Al Viro is to create a separate ++ * 'ubifs-debug' file-system instead. ++ */ ++ if (file->f_path.dentry == d->dfs_dump_lprops) { + dbg_dump_lprops(c); +- else if (file->f_path.dentry == d->dfs_dump_budg) { +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); +- } else if (file->f_path.dentry == d->dfs_dump_tnc) { ++ return count; ++ } ++ if (file->f_path.dentry == d->dfs_dump_budg) { ++ dbg_dump_budg(c, &c->bi); ++ return count; ++ } ++ if (file->f_path.dentry == d->dfs_dump_tnc) { + mutex_lock(&c->tnc_mutex); + dbg_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); +- } else ++ return count; ++ } ++ ++ val = interpret_user_input(u, count); ++ if (val < 0) ++ return val; ++ ++ if (dent == d->dfs_chk_gen) ++ d->chk_gen = val; ++ else if (dent == d->dfs_chk_index) ++ d->chk_index = val; ++ else if (dent == d->dfs_chk_orph) ++ d->chk_orph = val; ++ else if (dent == d->dfs_chk_lprops) ++ d->chk_lprops = val; ++ else if (dent == d->dfs_chk_fs) ++ d->chk_fs = val; ++ else if (dent == d->dfs_tst_rcvry) ++ d->tst_rcvry = val; ++ else + return -EINVAL; + +- *ppos += count; + return count; + } + + static const struct file_operations dfs_fops = { +- .open = open_debugfs_file, +- .write = write_debugfs_file, ++ .open = dfs_file_open, ++ .read = dfs_file_read, ++ .write = dfs_file_write, + .owner = THIS_MODULE, +- .llseek = default_llseek, ++ .llseek = no_llseek, + }; + + /** +@@ -2798,46 +2915,94 @@ static const struct file_operations dfs_fops = { + */ + int dbg_debugfs_init_fs(struct ubifs_info *c) + { +- int err; ++ int err, n; + const char *fname; + struct dentry *dent; + struct ubifs_debug_info *d = c->dbg; + +- sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); +- d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); +- if (IS_ERR(d->dfs_dir)) { +- err = PTR_ERR(d->dfs_dir); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- d->dfs_dir_name, err); ++ n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, ++ c->vi.ubi_num, c->vi.vol_id); ++ if (n == UBIFS_DFS_DIR_LEN) { ++ /* The array size is too small */ ++ fname = UBIFS_DFS_DIR_NAME; ++ dent = ERR_PTR(-EINVAL); + goto out; + } + ++ fname = d->dfs_dir_name; ++ dent = debugfs_create_dir(fname, dfs_rootdir); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out; ++ d->dfs_dir = dent; ++ + fname = "dump_lprops"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_lprops = dent; + + fname = "dump_budg"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_budg = dent; + + fname = "dump_tnc"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_tnc = dent; + ++ fname = "chk_general"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_gen = dent; ++ ++ fname = "chk_index"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_index = dent; ++ ++ fname = "chk_orphans"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_orph = dent; ++ ++ fname = "chk_lprops"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_lprops = dent; ++ ++ fname = "chk_fs"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_fs = dent; ++ ++ fname = "tst_recovery"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_tst_rcvry = dent; ++ + return 0; + + out_remove: +- err = PTR_ERR(dent); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- fname, err); + debugfs_remove_recursive(d->dfs_dir); + out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", ++ fname, err); + return err; + } + +@@ -2850,4 +3015,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) + debugfs_remove_recursive(c->dbg->dfs_dir); + } + ++struct ubifs_global_debug_info ubifs_dbg; ++ ++static struct dentry *dfs_chk_gen; ++static struct dentry *dfs_chk_index; ++static struct dentry *dfs_chk_orph; ++static struct dentry *dfs_chk_lprops; ++static struct dentry *dfs_chk_fs; ++static struct dentry *dfs_tst_rcvry; ++ ++static ssize_t dfs_global_file_read(struct file *file, char __user *u, ++ size_t count, loff_t *ppos) ++{ ++ struct dentry *dent = file->f_path.dentry; ++ int val; ++ ++ if (dent == dfs_chk_gen) ++ val = ubifs_dbg.chk_gen; ++ else if (dent == dfs_chk_index) ++ val = ubifs_dbg.chk_index; ++ else if (dent == dfs_chk_orph) ++ val = ubifs_dbg.chk_orph; ++ else if (dent == dfs_chk_lprops) ++ val = ubifs_dbg.chk_lprops; ++ else if (dent == dfs_chk_fs) ++ val = ubifs_dbg.chk_fs; ++ else if (dent == dfs_tst_rcvry) ++ val = ubifs_dbg.tst_rcvry; ++ else ++ return -EINVAL; ++ ++ return provide_user_output(val, u, count, ppos); ++} ++ ++static ssize_t dfs_global_file_write(struct file *file, const char __user *u, ++ size_t count, loff_t *ppos) ++{ ++ struct dentry *dent = file->f_path.dentry; ++ int val; ++ ++ val = interpret_user_input(u, count); ++ if (val < 0) ++ return val; ++ ++ if (dent == dfs_chk_gen) ++ ubifs_dbg.chk_gen = val; ++ else if (dent == dfs_chk_index) ++ ubifs_dbg.chk_index = val; ++ else if (dent == dfs_chk_orph) ++ ubifs_dbg.chk_orph = val; ++ else if (dent == dfs_chk_lprops) ++ ubifs_dbg.chk_lprops = val; ++ else if (dent == dfs_chk_fs) ++ ubifs_dbg.chk_fs = val; ++ else if (dent == dfs_tst_rcvry) ++ ubifs_dbg.tst_rcvry = val; ++ else ++ return -EINVAL; ++ ++ return count; ++} ++ ++static const struct file_operations dfs_global_fops = { ++ .read = dfs_global_file_read, ++ .write = dfs_global_file_write, ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++}; ++ ++/** ++ * dbg_debugfs_init - initialize debugfs file-system. ++ * ++ * UBIFS uses debugfs file-system to expose various debugging knobs to ++ * user-space. This function creates "ubifs" directory in the debugfs ++ * file-system. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++int dbg_debugfs_init(void) ++{ ++ int err; ++ const char *fname; ++ struct dentry *dent; ++ ++ fname = "ubifs"; ++ dent = debugfs_create_dir(fname, NULL); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out; ++ dfs_rootdir = dent; ++ ++ fname = "chk_general"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_gen = dent; ++ ++ fname = "chk_index"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_index = dent; ++ ++ fname = "chk_orphans"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_orph = dent; ++ ++ fname = "chk_lprops"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_lprops = dent; ++ ++ fname = "chk_fs"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_fs = dent; ++ ++ fname = "tst_recovery"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_tst_rcvry = dent; ++ ++ return 0; ++ ++out_remove: ++ debugfs_remove_recursive(dfs_rootdir); ++out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", ++ fname, err); ++ return err; ++} ++ ++/** ++ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. ++ */ ++void dbg_debugfs_exit(void) ++{ ++ debugfs_remove_recursive(dfs_rootdir); ++} ++ ++/** ++ * ubifs_debugging_init - initialize UBIFS debugging. ++ * @c: UBIFS file-system description object ++ * ++ * This function initializes debugging-related data for the file system. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_debugging_init(struct ubifs_info *c) ++{ ++ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); ++ if (!c->dbg) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * ubifs_debugging_exit - free debugging data. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_debugging_exit(struct ubifs_info *c) ++{ ++ kfree(c->dbg); ++} ++ + #endif /* CONFIG_UBIFS_FS_DEBUG */ +diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h +index 555ba13..13917ce 100644 +--- a/fs/ubifs/debug.h ++++ b/fs/ubifs/debug.h +@@ -31,17 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, + + #ifdef CONFIG_UBIFS_FS_DEBUG + ++/* ++ * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" ++ * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. ++ */ ++#define UBIFS_DFS_DIR_NAME "ubi%d_%d" ++#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) ++ + /** + * ubifs_debug_info - per-FS debugging information. +- * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' +- * @failure_mode: failure mode for recovery testing +- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls +- * @fail_timeout: time in jiffies when delay of failure mode expires +- * @fail_cnt: current number of calls to failure mode I/O functions +- * @fail_cnt_max: number of calls by which to delay failure mode ++ * ++ * @pc_happened: non-zero if an emulated power cut happened ++ * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls ++ * @pc_timeout: time in jiffies when delay of failure mode expires ++ * @pc_cnt: current number of calls to failure mode I/O functions ++ * @pc_cnt_max: number of calls by which to delay failure mode ++ * + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker +@@ -51,24 +59,40 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, + * @new_ihead_offs: used by debugging to check @c->ihead_offs + * + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') +- * @saved_free: saved free space (used by 'dbg_save_space_info()') ++ * @saved_bi: saved budgeting information ++ * @saved_free: saved amount of free space ++ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt ++ * ++ * @chk_gen: if general extra checks are enabled ++ * @chk_index: if index xtra checks are enabled ++ * @chk_orph: if orphans extra checks are enabled ++ * @chk_lprops: if lprops extra checks are enabled ++ * @chk_fs: if UBIFS contents extra checks are enabled ++ * @tst_rcvry: if UBIFS recovery testing mode enabled + * +- * dfs_dir_name: name of debugfs directory containing this file-system's files +- * dfs_dir: direntry object of the file-system debugfs directory +- * dfs_dump_lprops: "dump lprops" debugfs knob +- * dfs_dump_budg: "dump budgeting information" debugfs knob +- * dfs_dump_tnc: "dump TNC" debugfs knob ++ * @dfs_dir_name: name of debugfs directory containing this file-system's files ++ * @dfs_dir: direntry object of the file-system debugfs directory ++ * @dfs_dump_lprops: "dump lprops" debugfs knob ++ * @dfs_dump_budg: "dump budgeting information" debugfs knob ++ * @dfs_dump_tnc: "dump TNC" debugfs knob ++ * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks ++ * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks ++ * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks ++ * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks ++ * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks ++ * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing + */ + struct ubifs_debug_info { +- void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; +- int failure_mode; +- int fail_delay; +- unsigned long fail_timeout; +- unsigned int fail_cnt; +- unsigned int fail_cnt_max; ++ ++ int pc_happened; ++ int pc_delay; ++ unsigned long pc_timeout; ++ unsigned int pc_cnt; ++ unsigned int pc_cnt_max; ++ + long long chk_lpt_sz; + long long chk_lpt_sz2; + long long chk_lpt_wastage; +@@ -78,13 +102,47 @@ struct ubifs_debug_info { + int new_ihead_offs; + + struct ubifs_lp_stats saved_lst; ++ struct ubifs_budg_info saved_bi; + long long saved_free; ++ int saved_idx_gc_cnt; ++ ++ unsigned int chk_gen:1; ++ unsigned int chk_index:1; ++ unsigned int chk_orph:1; ++ unsigned int chk_lprops:1; ++ unsigned int chk_fs:1; ++ unsigned int tst_rcvry:1; + +- char dfs_dir_name[100]; ++ char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; ++ struct dentry *dfs_chk_gen; ++ struct dentry *dfs_chk_index; ++ struct dentry *dfs_chk_orph; ++ struct dentry *dfs_chk_lprops; ++ struct dentry *dfs_chk_fs; ++ struct dentry *dfs_tst_rcvry; ++}; ++ ++/** ++ * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. ++ * ++ * @chk_gen: if general extra checks are enabled ++ * @chk_index: if index xtra checks are enabled ++ * @chk_orph: if orphans extra checks are enabled ++ * @chk_lprops: if lprops extra checks are enabled ++ * @chk_fs: if UBIFS contents extra checks are enabled ++ * @tst_rcvry: if UBIFS recovery testing mode enabled ++ */ ++struct ubifs_global_debug_info { ++ unsigned int chk_gen:1; ++ unsigned int chk_index:1; ++ unsigned int chk_orph:1; ++ unsigned int chk_lprops:1; ++ unsigned int chk_fs:1; ++ unsigned int tst_rcvry:1; + }; + + #define ubifs_assert(expr) do { \ +@@ -103,173 +161,90 @@ struct ubifs_debug_info { + } \ + } while (0) + +-#define dbg_dump_stack() do { \ +- if (!dbg_failure_mode) \ +- dump_stack(); \ +-} while (0) +- +-/* Generic debugging messages */ +-#define dbg_msg(fmt, ...) do { \ +- spin_lock(&dbg_lock); \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ +- __func__, ##__VA_ARGS__); \ +- spin_unlock(&dbg_lock); \ +-} while (0) +- +-#define dbg_do_msg(typ, fmt, ...) do { \ +- if (ubifs_msg_flags & typ) \ +- dbg_msg(fmt, ##__VA_ARGS__); \ +-} while (0) ++#define dbg_dump_stack() dump_stack() + + #define dbg_err(fmt, ...) do { \ +- spin_lock(&dbg_lock); \ + ubifs_err(fmt, ##__VA_ARGS__); \ +- spin_unlock(&dbg_lock); \ + } while (0) + +-const char *dbg_key_str0(const struct ubifs_info *c, +- const union ubifs_key *key); +-const char *dbg_key_str1(const struct ubifs_info *c, +- const union ubifs_key *key); ++#define ubifs_dbg_msg(type, fmt, ...) \ ++ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) + +-/* +- * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message +- * macros. +- */ +-#define DBGKEY(key) dbg_key_str0(c, (key)) +-#define DBGKEY1(key) dbg_key_str1(c, (key)) ++#define DBG_KEY_BUF_LEN 32 ++#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ ++ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ ++ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ ++ dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ ++} while (0) + +-/* General messages */ +-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) ++/* Just a debugging messages not related to any specific UBIFS subsystem */ ++#define dbg_msg(fmt, ...) \ ++ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ ++ __func__, ##__VA_ARGS__) + ++/* General messages */ ++#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) + /* Additional journal messages */ +-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) +- ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) ++#define dbg_jnlk(key, fmt, ...) \ ++ ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) + /* Additional TNC messages */ +-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) +- ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) ++#define dbg_tnck(key, fmt, ...) \ ++ ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) + /* Additional lprops messages */ +-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) +- ++#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) + /* Additional LEB find messages */ +-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) +- ++#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) + /* Additional mount messages */ +-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) +- ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) ++#define dbg_mntk(key, fmt, ...) \ ++ ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) + /* Additional I/O messages */ +-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) +- ++#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) + /* Additional commit messages */ +-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) +- ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) + /* Additional budgeting messages */ +-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) +- ++#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) + /* Additional log messages */ +-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) +- ++#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) + /* Additional gc messages */ +-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) +- ++#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) + /* Additional scan messages */ +-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) +- ++#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) + /* Additional recovery messages */ +-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) + +-/* +- * Debugging message type flags (must match msg_type_names in debug.c). +- * +- * UBIFS_MSG_GEN: general messages +- * UBIFS_MSG_JNL: journal messages +- * UBIFS_MSG_MNT: mount messages +- * UBIFS_MSG_CMT: commit messages +- * UBIFS_MSG_FIND: LEB find messages +- * UBIFS_MSG_BUDG: budgeting messages +- * UBIFS_MSG_GC: garbage collection messages +- * UBIFS_MSG_TNC: TNC messages +- * UBIFS_MSG_LP: lprops messages +- * UBIFS_MSG_IO: I/O messages +- * UBIFS_MSG_LOG: log messages +- * UBIFS_MSG_SCAN: scan messages +- * UBIFS_MSG_RCVRY: recovery messages +- */ +-enum { +- UBIFS_MSG_GEN = 0x1, +- UBIFS_MSG_JNL = 0x2, +- UBIFS_MSG_MNT = 0x4, +- UBIFS_MSG_CMT = 0x8, +- UBIFS_MSG_FIND = 0x10, +- UBIFS_MSG_BUDG = 0x20, +- UBIFS_MSG_GC = 0x40, +- UBIFS_MSG_TNC = 0x80, +- UBIFS_MSG_LP = 0x100, +- UBIFS_MSG_IO = 0x200, +- UBIFS_MSG_LOG = 0x400, +- UBIFS_MSG_SCAN = 0x800, +- UBIFS_MSG_RCVRY = 0x1000, +-}; +- +-/* Debugging message type flags for each default debug message level */ +-#define UBIFS_MSG_LVL_0 0 +-#define UBIFS_MSG_LVL_1 0x1 +-#define UBIFS_MSG_LVL_2 0x7f +-#define UBIFS_MSG_LVL_3 0xffff +- +-/* +- * Debugging check flags (must match chk_names in debug.c). +- * +- * UBIFS_CHK_GEN: general checks +- * UBIFS_CHK_TNC: check TNC +- * UBIFS_CHK_IDX_SZ: check index size +- * UBIFS_CHK_ORPH: check orphans +- * UBIFS_CHK_OLD_IDX: check the old index +- * UBIFS_CHK_LPROPS: check lprops +- * UBIFS_CHK_FS: check the file-system +- */ +-enum { +- UBIFS_CHK_GEN = 0x1, +- UBIFS_CHK_TNC = 0x2, +- UBIFS_CHK_IDX_SZ = 0x4, +- UBIFS_CHK_ORPH = 0x8, +- UBIFS_CHK_OLD_IDX = 0x10, +- UBIFS_CHK_LPROPS = 0x20, +- UBIFS_CHK_FS = 0x40, +-}; ++extern struct ubifs_global_debug_info ubifs_dbg; + +-/* +- * Special testing flags (must match tst_names in debug.c). +- * +- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method +- * UBIFS_TST_RCVRY: failure mode for recovery testing +- */ +-enum { +- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, +- UBIFS_TST_RCVRY = 0x4, +-}; +- +-#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +-#else +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +-#endif +- +-#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +-#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +-#else +-#define UBIFS_CHK_FLAGS_DEFAULT 0 +-#endif +- +-extern spinlock_t dbg_lock; +- +-extern unsigned int ubifs_msg_flags; +-extern unsigned int ubifs_chk_flags; +-extern unsigned int ubifs_tst_flags; ++static inline int dbg_is_chk_gen(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); ++} ++static inline int dbg_is_chk_index(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_index || c->dbg->chk_index); ++} ++static inline int dbg_is_chk_orph(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); ++} ++static inline int dbg_is_chk_lprops(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); ++} ++static inline int dbg_is_chk_fs(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); ++} ++static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); ++} ++static inline int dbg_is_power_cut(const struct ubifs_info *c) ++{ ++ return !!c->dbg->pc_happened; ++} + + int ubifs_debugging_init(struct ubifs_info *c); + void ubifs_debugging_exit(struct ubifs_info *c); +@@ -280,17 +255,21 @@ const char *dbg_cstate(int cmt_state); + const char *dbg_jhead(int jhead); + const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); +-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); ++const char *dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, int len); ++void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); + void dbg_dump_node(const struct ubifs_info *c, const void *node); + void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, + int offs); + void dbg_dump_budget_req(const struct ubifs_budget_req *req); + void dbg_dump_lstats(const struct ubifs_lp_stats *lst); +-void dbg_dump_budg(struct ubifs_info *c); ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); + void dbg_dump_lprops(struct ubifs_info *c); + void dbg_dump_lpt_info(struct ubifs_info *c); + void dbg_dump_leb(const struct ubifs_info *c, int lnum); ++void dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs); + void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); + void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); +@@ -313,14 +292,13 @@ int dbg_check_cats(struct ubifs_info *c); + int dbg_check_ltab(struct ubifs_info *c); + int dbg_chk_lpt_free_spc(struct ubifs_info *c); + int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); +-int dbg_check_synced_i_size(struct inode *inode); +-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); ++int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); ++int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); + int dbg_check_tnc(struct ubifs_info *c, int extra); + int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); + int dbg_check_filesystem(struct ubifs_info *c); + void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); +-int dbg_check_lprops(struct ubifs_info *c); + int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); + int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, +@@ -328,57 +306,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); + int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); + +-/* Force the use of in-the-gaps method for testing */ +- +-#define dbg_force_in_the_gaps_enabled \ +- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) +- +-int dbg_force_in_the_gaps(void); +- +-/* Failure mode for recovery testing */ +- +-#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) +- +-#ifndef UBIFS_DBG_PRESERVE_UBI +- +-#define ubi_leb_read dbg_leb_read +-#define ubi_leb_write dbg_leb_write +-#define ubi_leb_change dbg_leb_change +-#define ubi_leb_erase dbg_leb_erase +-#define ubi_leb_unmap dbg_leb_unmap +-#define ubi_is_mapped dbg_is_mapped +-#define ubi_leb_map dbg_leb_map +- +-#endif +- +-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, +- int len, int check); +-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int offset, int len, int dtype); +-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int len, int dtype); +-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); +- +-static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, +- int offset, int len) +-{ +- return dbg_leb_read(desc, lnum, buf, offset, len, 0); +-} +- +-static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, +- const void *buf, int offset, int len) +-{ +- return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +-} +- +-static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, +- const void *buf, int len) +-{ +- return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +-} ++int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype); ++int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype); ++int dbg_leb_unmap(struct ubifs_info *c, int lnum); ++int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); + + /* Debugfs-related stuff */ + int dbg_debugfs_init(void); +@@ -390,116 +323,158 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); + + /* Use "if (0)" to make compiler check arguments even if debugging is off */ + #define ubifs_assert(expr) do { \ +- if (0 && (expr)) \ ++ if (0) \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + } while (0) + +-#define dbg_err(fmt, ...) do { \ +- if (0) \ +- ubifs_err(fmt, ##__VA_ARGS__); \ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) + +-#define dbg_msg(fmt, ...) do { \ +- if (0) \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ +- current->pid, __func__, ##__VA_ARGS__); \ ++#define DBGKEY(key) ((char *)(key)) ++#define DBGKEY1(key) ((char *)(key)) ++ ++#define ubifs_dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ pr_debug(fmt "\n", ##__VA_ARGS__); \ + } while (0) + + #define dbg_dump_stack() + #define ubifs_assert_cmt_locked(c) + +-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +- +-#define DBGKEY(key) ((char *)(key)) +-#define DBGKEY1(key) ((char *)(key)) +- +-static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } +-static inline void ubifs_debugging_exit(struct ubifs_info *c) {} +-static inline const char *dbg_ntype(int type) { return ""; } +-static inline const char *dbg_cstate(int cmt_state) { return ""; } +-static inline const char *dbg_jhead(int jhead) { return ""; } +-static inline const char *dbg_get_key_dump(const struct ubifs_info *c, +- const union ubifs_key *key) { return ""; } +-static inline void dbg_dump_inode(const struct ubifs_info *c, +- const struct inode *inode) {} ++#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnlk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnck(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mntk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++ ++static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } ++static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } ++static inline const char *dbg_ntype(int type) { return ""; } ++static inline const char *dbg_cstate(int cmt_state) { return ""; } ++static inline const char *dbg_jhead(int jhead) { return ""; } ++static inline const char * ++dbg_get_key_dump(const struct ubifs_info *c, ++ const union ubifs_key *key) { return ""; } ++static inline const char * ++dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, ++ int len) { return ""; } ++static inline void dbg_dump_inode(struct ubifs_info *c, ++ const struct inode *inode) { return; } + static inline void dbg_dump_node(const struct ubifs_info *c, +- const void *node) {} ++ const void *node) { return; } + static inline void dbg_dump_lpt_node(const struct ubifs_info *c, +- void *node, int lnum, int offs) {} +-static inline void dbg_dump_budget_req(const struct ubifs_budget_req *req) {} +-static inline void dbg_dump_lstats(const struct ubifs_lp_stats *lst) {} +-static inline void dbg_dump_budg(struct ubifs_info *c) {} ++ void *node, int lnum, ++ int offs) { return; } ++static inline void ++dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } ++static inline void ++dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } ++static inline void ++dbg_dump_budg(struct ubifs_info *c, ++ const struct ubifs_budg_info *bi) { return; } + static inline void dbg_dump_lprop(const struct ubifs_info *c, +- const struct ubifs_lprops *lp) {} +-static inline void dbg_dump_lprops(struct ubifs_info *c) {} +-static inline void dbg_dump_lpt_info(struct ubifs_info *c) {} +-static inline void dbg_dump_leb(const struct ubifs_info *c, int lnum) {} +-static inline void dbg_dump_znode(const struct ubifs_info *c, +- const struct ubifs_znode *znode) {} ++ const struct ubifs_lprops *lp) { return; } ++static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } ++static inline void dbg_dump_leb(const struct ubifs_info *c, ++ int lnum) { return; } ++static inline void ++dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs) { return; } ++static inline void ++dbg_dump_znode(const struct ubifs_info *c, ++ const struct ubifs_znode *znode) { return; } + static inline void dbg_dump_heap(struct ubifs_info *c, +- struct ubifs_lpt_heap *heap, int cat) {} ++ struct ubifs_lpt_heap *heap, ++ int cat) { return; } + static inline void dbg_dump_pnode(struct ubifs_info *c, +- struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip) {} +-static inline void dbg_dump_tnc(struct ubifs_info *c) {} +-static inline void dbg_dump_index(struct ubifs_info *c) {} +-static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) {} ++ struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, ++ int iip) { return; } ++static inline void dbg_dump_tnc(struct ubifs_info *c) { return; } ++static inline void dbg_dump_index(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; } + + static inline int dbg_walk_index(struct ubifs_info *c, +- dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv) +- { return 0; } +- +-/* Checking functions */ +-static inline void dbg_save_space_info(struct ubifs_info *c) {} +-static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } +-static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } +-static inline int dbg_old_index_check_init(struct ubifs_info *c, +- struct ubifs_zbranch *zroot) { return 0; } +-static inline int dbg_check_old_index(struct ubifs_info *c, +- struct ubifs_zbranch *zroot) { return 0; } +-static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } +-static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } +-static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } ++ dbg_leaf_callback leaf_cb, ++ dbg_znode_callback znode_cb, ++ void *priv) { return 0; } ++static inline void dbg_save_space_info(struct ubifs_info *c) { return; } ++static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } ++static inline int ++dbg_old_index_check_init(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int ++dbg_check_old_index(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } ++static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } + static inline int dbg_chk_lpt_sz(struct ubifs_info *c, +- int action, int len) { return 0; } +-static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } +-static inline int dbg_check_dir_size(struct ubifs_info *c, +- const struct inode *dir) { return 0; } +-static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } ++ int action, int len) { return 0; } ++static inline int ++dbg_check_synced_i_size(const struct ubifs_info *c, ++ struct inode *inode) { return 0; } ++static inline int dbg_check_dir(struct ubifs_info *c, ++ const struct inode *dir) { return 0; } ++static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } + static inline int dbg_check_idx_size(struct ubifs_info *c, +- long long idx_size) { return 0; } +-static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } ++ long long idx_size) { return 0; } ++static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } + static inline void dbg_check_heap(struct ubifs_info *c, +- struct ubifs_lpt_heap *heap, int cat, int add_pos) {} ++ struct ubifs_lpt_heap *heap, ++ int cat, int add_pos) { return; } + static inline int dbg_check_lpt_nodes(struct ubifs_info *c, +- struct ubifs_cnode *cnode, int row, int col) { return 0; } ++ struct ubifs_cnode *cnode, int row, int col) { return 0; } + static inline int dbg_check_inode_size(struct ubifs_info *c, +- const struct inode *inode, loff_t size) { return 0; } +-static inline int dbg_check_data_nodes_order(struct ubifs_info *c, +- struct list_head *head) { return 0; } +-static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, +- struct list_head *head) { return 0; } +- +-#define dbg_force_in_the_gaps_enabled 0 +-static inline int dbg_force_in_the_gaps(void) { return 0; } +-#define dbg_failure_mode 0 +- +-static inline int dbg_debugfs_init(void) { return 0; } +-static inline void dbg_debugfs_exit(void) {} +-static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } +-static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } ++ const struct inode *inode, ++ loff_t size) { return 0; } ++static inline int ++dbg_check_data_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++static inline int ++dbg_check_nondata_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++ ++static inline int dbg_leb_write(struct ubifs_info *c, int lnum, ++ const void *buf, int offset, ++ int len, int dtype) { return 0; } ++static inline int dbg_leb_change(struct ubifs_info *c, int lnum, ++ const void *buf, int len, ++ int dtype) { return 0; } ++static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } ++static inline int dbg_leb_map(struct ubifs_info *c, int lnum, ++ int dtype) { return 0; } ++ ++static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } ++ ++static inline int dbg_debugfs_init(void) { return 0; } ++static inline void dbg_debugfs_exit(void) { return; } ++static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } ++static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } + + #endif /* !CONFIG_UBIFS_FS_DEBUG */ + #endif /* !__UBIFS_DEBUG_H__ */ +diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c +index 14f64b6..9c5e3c5 100644 +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + * UBIFS has to fully control "clean <-> dirty" transitions of inodes + * to make budgeting work. + */ +- inode->i_flags |= (S_NOCMTIME); ++ inode->i_flags |= S_NOCMTIME; + + inode_init_owner(inode, dir, mode); + inode->i_mtime = inode->i_atime = inode->i_ctime = +@@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + + #ifdef CONFIG_UBIFS_FS_DEBUG + +-static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) ++static int dbg_check_name(const struct ubifs_info *c, ++ const struct ubifs_dent_node *dent, ++ const struct qstr *nm) + { +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + if (le16_to_cpu(dent->nlen) != nm->len) + return -EINVAL; +@@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) + + #else + +-#define dbg_check_name(dent, nm) 0 ++#define dbg_check_name(c, dent, nm) 0 + + #endif + +@@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, + goto out; + } + +- if (dbg_check_name(dent, &dentry->d_name)) { ++ if (dbg_check_name(c, dent, &dentry->d_name)) { + err = -EINVAL; + goto out; + } +@@ -540,7 +542,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + if (inode->i_nlink == 0) + return -ENOENT; + +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -595,7 +597,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) + inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -621,7 +623,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +@@ -711,7 +713,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c +index d77db7e..7cf738a 100644 +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c) + */ + static void release_existing_page_budget(struct ubifs_info *c) + { +- struct ubifs_budget_req req = { .dd_growth = c->page_budget}; ++ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; + + ubifs_release_budget(c, &req); + } +@@ -448,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { + /* + * We change whole page so no need to load it. But we +- * have to set the @PG_checked flag to make the further +- * code know that the page is new. This might be not +- * true, but it is better to budget more than to read +- * the page from the media. ++ * do not know whether this page exists on the media or ++ * not, so we assume the latter because it requires ++ * larger budget. The assumption is that it is better ++ * to budget a bit more than to read the page from the ++ * media. Thus, we are setting the @PG_checked flag ++ * here. + */ + SetPageChecked(page); + skipped_read = 1; +@@ -559,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, + dbg_gen("copied %d instead of %d, read page and repeat", + copied, len); + cancel_budget(c, page, ui, appending); ++ ClearPageChecked(page); + + /* + * Return 0 to force VFS to repeat the whole operation, or the +@@ -968,11 +971,11 @@ static int do_writepage(struct page *page, int len) + * the page locked, and it locks @ui_mutex. However, write-back does take inode + * @i_mutex, which means other VFS operations may be run on this inode at the + * same time. And the problematic one is truncation to smaller size, from where +- * we have to call 'truncate_setsize()', which first changes @inode->i_size, then +- * drops the truncated pages. And while dropping the pages, it takes the page +- * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with +- * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This +- * means that @inode->i_size is changed while @ui_mutex is unlocked. ++ * we have to call 'truncate_setsize()', which first changes @inode->i_size, ++ * then drops the truncated pages. And while dropping the pages, it takes the ++ * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' ++ * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. ++ * This means that @inode->i_size is changed while @ui_mutex is unlocked. + * + * XXX(truncate): with the new truncate sequence this is not true anymore, + * and the calls to truncate_setsize can be move around freely. They should +@@ -1186,7 +1189,7 @@ out_budg: + if (budgeted) + ubifs_release_budget(c, &req); + else { +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return err; +@@ -1260,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) + if (err) + return err; + +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -1309,6 +1312,13 @@ int ubifs_fsync(struct file *file, int datasync) + + dbg_gen("syncing inode %lu", inode->i_ino); + ++ if (c->ro_mount) ++ /* ++ * For some really strange reasons VFS does not filter out ++ * 'fsync()' for R/O mounted file-systems as per 2.6.39. ++ */ ++ return 0; ++ + /* + * VFS has already synchronized dirty pages for this inode. Synchronize + * the inode unless this is a 'datasync()' call. +@@ -1426,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) + } + + /* +- * mmap()d file has taken write protection fault and is being made +- * writable. UBIFS must ensure page is budgeted for. ++ * mmap()d file has taken write protection fault and is being made writable. ++ * UBIFS must ensure page is budgeted for. + */ +-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ++static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, ++ struct vm_fault *vmf) + { + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +@@ -1530,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) + { + int err; + +- /* 'generic_file_mmap()' takes care of NOMMU case */ + err = generic_file_mmap(file, vma); + if (err) + return err; +diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c +index 1d54383..2559d17 100644 +--- a/fs/ubifs/find.c ++++ b/fs/ubifs/find.c +@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + * But if the index takes fewer LEBs than it is reserved for it, + * this function must avoid picking those reserved LEBs. + */ +- if (c->min_idx_lebs >= c->lst.idx_lebs) { +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + exclude_index = 1; + } + spin_unlock(&c->space_lock); +@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + pick_free = 0; + } else { + spin_lock(&c->space_lock); +- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); ++ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); + spin_unlock(&c->space_lock); + } + +@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, + + /* Check if there are enough empty LEBs for commit */ + spin_lock(&c->space_lock); +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c +index 151f108..ded29f6 100644 +--- a/fs/ubifs/gc.c ++++ b/fs/ubifs/gc.c +@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c) + if (err) + return err; + ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ return err; ++ + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); + if (err) + return err; +@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c) + * This function compares data nodes @a and @b. Returns %1 if @a has greater + * inode or block number, and %-1 otherwise. + */ +-int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) + { + ino_t inuma, inumb; + struct ubifs_info *c = priv; +@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) + * first and sorted by length in descending order. Directory entry nodes go + * after inode nodes and are sorted in ascending hash valuer order. + */ +-int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++static int nondata_nodes_cmp(void *priv, struct list_head *a, ++ struct list_head *b) + { + ino_t inuma, inumb; + struct ubifs_info *c = priv; +@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) + ubifs_assert(c->gc_lnum != lnum); + ubifs_assert(wbuf->lnum != lnum); + ++ if (lp->free + lp->dirty == c->leb_size) { ++ /* Special case - a free LEB */ ++ dbg_gc("LEB %d is free, return it", lp->lnum); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ ++ if (lp->free != c->leb_size) { ++ /* ++ * Write buffers must be sync'd before unmapping ++ * freeable LEBs, because one of them may contain data ++ * which obsoletes something in 'lp->pnum'. ++ */ ++ err = gc_sync_wbufs(c); ++ if (err) ++ return err; ++ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, ++ 0, 0, 0, 0); ++ if (err) ++ return err; ++ } ++ err = ubifs_leb_unmap(c, lp->lnum); ++ if (err) ++ return err; ++ ++ if (c->gc_lnum == -1) { ++ c->gc_lnum = lnum; ++ return LEB_RETAINED; ++ } ++ ++ return LEB_FREED; ++ } ++ + /* + * We scan the entire LEB even though we only really need to scan up to + * (c->leb_size - lp->free). +@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) + "(min. space %d)", lp.lnum, lp.free, lp.dirty, + lp.free + lp.dirty, min_space); + +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- dbg_gc("LEB %d is free, return it", lp.lnum); +- /* +- * ubifs_find_dirty_leb() doesn't return freeable index +- * LEBs. +- */ +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- if (lp.free != c->leb_size) { +- /* +- * Write buffers must be sync'd before +- * unmapping freeable LEBs, because one of them +- * may contain data which obsoletes something +- * in 'lp.pnum'. +- */ +- ret = gc_sync_wbufs(c); +- if (ret) +- goto out; +- ret = ubifs_change_one_lp(c, lp.lnum, +- c->leb_size, 0, 0, 0, +- 0); +- if (ret) +- goto out; +- } +- ret = ubifs_leb_unmap(c, lp.lnum); +- if (ret) +- goto out; +- ret = lp.lnum; +- break; +- } +- + space_before = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum == -1) + space_before = 0; +diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c +index d821731..9228950 100644 +--- a/fs/ubifs/io.c ++++ b/fs/ubifs/io.c +@@ -31,6 +31,26 @@ + * buffer is full or when it is not used for some time (by timer). This is + * similar to the mechanism is used by JFFS2. + * ++ * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum ++ * write size (@c->max_write_size). The latter is the maximum amount of bytes ++ * the underlying flash is able to program at a time, and writing in ++ * @c->max_write_size units should presumably be faster. Obviously, ++ * @c->min_io_size <= @c->max_write_size. Write-buffers are of ++ * @c->max_write_size bytes in size for maximum performance. However, when a ++ * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size ++ * boundary) which contains data is written, not the whole write-buffer, ++ * because this is more space-efficient. ++ * ++ * This optimization adds few complications to the code. Indeed, on the one ++ * hand, we want to write in optimal @c->max_write_size bytes chunks, which ++ * also means aligning writes at the @c->max_write_size bytes offsets. On the ++ * other hand, we do not want to waste space when synchronizing the write ++ * buffer, so during synchronization we writes in smaller chunks. And this makes ++ * the next write offset to be not aligned to @c->max_write_size bytes. So the ++ * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned ++ * to @c->max_write_size bytes again. We do this by temporarily shrinking ++ * write-buffer size (@wbuf->size). ++ * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many +@@ -46,8 +66,8 @@ + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * +- * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes +- * every time they are read from the flash media. ++ * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when ++ * they are read from the flash media. + */ + + #include <linux/crc32.h> +@@ -66,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) + c->no_chk_data_crc = 0; + c->vfs_sb->s_flags |= MS_RDONLY; + ubifs_warn("switched to read-only mode, error %d", err); ++ dump_stack(); ++ } ++} ++ ++/* ++ * Below are simple wrappers over UBI I/O functions which include some ++ * additional checks and UBIFS debugging stuff. See corresponding UBI function ++ * for more information. ++ */ ++ ++int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, ++ int len, int even_ebadmsg) ++{ ++ int err; ++ ++ err = ubi_read(c->ubi, lnum, buf, offs, len); ++ /* ++ * In case of %-EBADMSG print the error message only if the ++ * @even_ebadmsg is true. ++ */ ++ if (err && (err != -EBADMSG || even_ebadmsg)) { ++ ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", ++ len, lnum, offs, err); + dbg_dump_stack(); + } ++ return err; ++} ++ ++int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); ++ else ++ err = dbg_leb_write(c, lnum, buf, offs, len, dtype); ++ if (err) { ++ ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", ++ len, lnum, offs, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); ++ else ++ err = dbg_leb_change(c, lnum, buf, len, dtype); ++ if (err) { ++ ubifs_err("changing %d bytes in LEB %d failed, error %d", ++ len, lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_unmap(struct ubifs_info *c, int lnum) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_unmap(c->ubi, lnum); ++ else ++ err = dbg_leb_unmap(c, lnum); ++ if (err) { ++ ubifs_err("unmap LEB %d failed, error %d", lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_map(c->ubi, lnum, dtype); ++ else ++ err = dbg_leb_map(c, lnum, dtype); ++ if (err) { ++ ubifs_err("mapping LEB %d failed, error %d", lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_is_mapped(const struct ubifs_info *c, int lnum) ++{ ++ int err; ++ ++ err = ubi_is_mapped(c->ubi, lnum); ++ if (err < 0) { ++ ubifs_err("ubi_is_mapped failed for LEB %d, error %d", ++ lnum, err); ++ dbg_dump_stack(); ++ } ++ return err; + } + + /** +@@ -88,8 +225,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is +- * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is +- * ignored and CRC is checked. ++ * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are ++ * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC ++ * is checked. This is because during mounting or re-mounting from R/O mode to ++ * R/W mode we may read journal nodes (when replying the journal or doing the ++ * recovery) and the journal nodes may potentially be corrupted, so checking is ++ * required. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. +@@ -131,8 +272,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + node_len > c->ranges[type].max_len) + goto out_len; + +- if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && +- c->no_chk_data_crc) ++ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && ++ !c->remounting_rw && c->no_chk_data_crc) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -343,11 +484,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. ++ * ++ * Note, although write-buffers are of @c->max_write_size, this function does ++ * not necessarily writes all @c->max_write_size bytes to the flash. Instead, ++ * if the write-buffer is only partially filled with data, only the used part ++ * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. ++ * This way we waste less space. + */ + int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + { + struct ubifs_info *c = wbuf->c; +- int err, dirt; ++ int err, dirt, sync_len; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) +@@ -357,27 +504,49 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + dbg_io("LEB %d:%d, %d bytes, jhead %s", + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); + ubifs_assert(!(wbuf->avail & 7)); +- ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); ++ ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->ro_error) + return -EROFS; + +- ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); +- if (err) { +- ubifs_err("cannot write %d bytes to LEB %d:%d", +- c->min_io_size, wbuf->lnum, wbuf->offs); +- dbg_dump_stack(); ++ /* ++ * Do not write whole write buffer but write only the minimum necessary ++ * amount of min. I/O units. ++ */ ++ sync_len = ALIGN(wbuf->used, c->min_io_size); ++ dirt = sync_len - wbuf->used; ++ if (dirt) ++ ubifs_pad(c, wbuf->buf + wbuf->used, dirt); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, ++ wbuf->dtype); ++ if (err) + return err; +- } +- +- dirt = wbuf->avail; + + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += sync_len; ++ /* ++ * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. ++ * But our goal is to optimize writes and make sure we write in ++ * @c->max_write_size chunks and to @c->max_write_size-aligned offset. ++ * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make ++ * sure that @wbuf->offs + @wbuf->size is aligned to ++ * @c->max_write_size. This way we make sure that after next ++ * write-buffer flush we are again at the optimal offset (aligned to ++ * @c->max_write_size). ++ */ ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -396,8 +565,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + * @dtype: data type + * + * This function targets the write-buffer to logical eraseblock @lnum:@offs. +- * The write-buffer is synchronized if it is not empty. Returns zero in case of +- * success and a negative error code in case of failure. ++ * The write-buffer has to be empty. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype) +@@ -409,18 +578,18 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); +- +- if (wbuf->used > 0) { +- int err = ubifs_wbuf_sync_nolock(wbuf); +- +- if (err) +- return err; +- } ++ ubifs_assert(wbuf->used == 0); + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; +- wbuf->avail = c->min_io_size; ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + wbuf->dtype = dtype; +@@ -500,8 +669,9 @@ out_timers: + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it +- * does not take whole minimal I/O unit. Instead, the node will sit in RAM +- * until the write-buffer is synchronized (e.g., by timer). ++ * does not take whole max. write unit (@c->max_write_size). Instead, the node ++ * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or ++ * because more data are appended to the write-buffer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more +@@ -510,7 +680,7 @@ out_timers: + int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + { + struct ubifs_info *c = wbuf->c; +- int err, written, n, aligned_len = ALIGN(len, 8), offs; ++ int err, written, n, aligned_len = ALIGN(len, 8); + + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), +@@ -518,9 +688,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); +- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ++ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; +@@ -542,15 +718,19 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + if (aligned_len == wbuf->avail) { + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, +- wbuf->offs, c->min_io_size, +- wbuf->dtype); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, ++ wbuf->offs, wbuf->size, ++ wbuf->dtype); + if (err) + goto out; + + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += wbuf->size; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -564,39 +744,63 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + goto exit; + } + +- /* +- * The node is large enough and does not fit entirely within current +- * minimal I/O unit. We have to fill and flush write-buffer and switch +- * to the next min. I/O unit. +- */ +- dbg_io("flush jhead %s wbuf to LEB %d:%d", +- dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); +- memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); +- if (err) +- goto out; ++ written = 0; ++ ++ if (wbuf->used) { ++ /* ++ * The node is large enough and does not fit entirely within ++ * current available space. We have to fill and flush ++ * write-buffer and switch to the next max. write unit. ++ */ ++ dbg_io("flush jhead %s wbuf to LEB %d:%d", ++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); ++ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; ++ ++ wbuf->offs += wbuf->size; ++ len -= wbuf->avail; ++ aligned_len -= wbuf->avail; ++ written += wbuf->avail; ++ } else if (wbuf->offs & (c->max_write_size - 1)) { ++ /* ++ * The write-buffer offset is not aligned to ++ * @c->max_write_size and @wbuf->size is less than ++ * @c->max_write_size. Write @wbuf->size bytes to make sure the ++ * following writes are done in optimal @c->max_write_size ++ * chunks. ++ */ ++ dbg_io("write %d bytes to LEB %d:%d", ++ wbuf->size, wbuf->lnum, wbuf->offs); ++ err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; + +- offs = wbuf->offs + c->min_io_size; +- len -= wbuf->avail; +- aligned_len -= wbuf->avail; +- written = wbuf->avail; ++ wbuf->offs += wbuf->size; ++ len -= wbuf->size; ++ aligned_len -= wbuf->size; ++ written += wbuf->size; ++ } + + /* +- * The remaining data may take more whole min. I/O units, so write the +- * remains multiple to min. I/O unit size directly to the flash media. ++ * The remaining data may take more whole max. write units, so write the ++ * remains multiple to max. write unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ +- n = aligned_len >> c->min_io_shift; ++ n = aligned_len >> c->max_write_shift; + if (n) { +- n <<= c->min_io_shift; +- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); +- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, +- wbuf->dtype); ++ n <<= c->max_write_shift; ++ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, ++ wbuf->offs); ++ err = ubifs_leb_write(c, wbuf->lnum, buf + written, ++ wbuf->offs, n, wbuf->dtype); + if (err) + goto out; +- offs += n; ++ wbuf->offs += n; + aligned_len -= n; + len -= n; + written += n; +@@ -606,14 +810,17 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + if (aligned_len) + /* + * And now we have what's left and what does not take whole +- * min. I/O unit, so write it to the write-buffer and we are ++ * max. write unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + +- wbuf->offs = offs; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size - aligned_len; + wbuf->used = aligned_len; +- wbuf->avail = c->min_io_size - aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +@@ -666,18 +873,15 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); + + if (c->ro_error) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); +- err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); +- if (err) { +- ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", +- buf_len, lnum, offs, err); ++ err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); ++ if (err) + dbg_dump_node(c, buf); +- dbg_dump_stack(); +- } + + return err; + } +@@ -729,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + + if (rlen > 0) { + /* Read everything that goes before write-buffer */ +- err = ubi_read(c->ubi, lnum, buf, offs, rlen); +- if (err && err != -EBADMSG) { +- ubifs_err("failed to read node %d from LEB %d:%d, " +- "error %d", type, lnum, offs, err); +- dbg_dump_stack(); ++ err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); ++ if (err && err != -EBADMSG) + return err; +- } + } + + if (type != ch->node_type) { +@@ -790,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + +- err = ubi_read(c->ubi, lnum, buf, offs, len); +- if (err && err != -EBADMSG) { +- ubifs_err("cannot read node %d from LEB %d:%d, error %d", +- type, lnum, offs, err); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 0); ++ if (err && err != -EBADMSG) + return err; +- } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", +@@ -837,11 +1034,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) + { + size_t size; + +- wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); ++ wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + +- size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); ++ size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); +@@ -851,7 +1048,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; +- wbuf->avail = c->min_io_size; ++ /* ++ * If the LEB starts at the max. write size aligned address, then ++ * write-buffer size has to be set to @c->max_write_size. Otherwise, ++ * set it to something smaller so that it ends at the closest max. ++ * write size boundary. ++ */ ++ size = c->max_write_size - (c->leb_start % c->max_write_size); ++ wbuf->avail = wbuf->size = size; + wbuf->dtype = UBI_UNKNOWN; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); +diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c +index 914f1bd..2f438ab 100644 +--- a/fs/ubifs/journal.c ++++ b/fs/ubifs/journal.c +@@ -141,14 +141,8 @@ again: + * LEB with some empty space. + */ + lnum = ubifs_find_free_space(c, len, &offs, squeeze); +- if (lnum >= 0) { +- /* Found an LEB, add it to the journal head */ +- err = ubifs_add_bud_to_log(c, jhead, lnum, offs); +- if (err) +- goto out_return; +- /* A new bud was successfully allocated and added to the log */ ++ if (lnum >= 0) + goto out; +- } + + err = lnum; + if (err != -ENOSPC) +@@ -203,12 +197,23 @@ again: + return 0; + } + +- err = ubifs_add_bud_to_log(c, jhead, lnum, 0); +- if (err) +- goto out_return; + offs = 0; + + out: ++ /* ++ * Make sure we synchronize the write-buffer before we add the new bud ++ * to the log. Otherwise we may have a power cut after the log ++ * reference node for the last bud (@lnum) is written but before the ++ * write-buffer data are written to the next-to-last bud ++ * (@wbuf->lnum). And the effect would be that the recovery would see ++ * that there is corruption in the next-to-last bud. ++ */ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ goto out_return; ++ err = ubifs_add_bud_to_log(c, jhead, lnum, offs); ++ if (err) ++ goto out_return; + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); + if (err) + goto out_unlock; +@@ -380,10 +385,8 @@ out: + if (err == -ENOSPC) { + /* This are some budgeting problems, print useful information */ + down_write(&c->commit_sem); +- spin_lock(&c->space_lock); + dbg_dump_stack(); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + cmt_retries = dbg_check_lprops(c); + up_write(&c->commit_sem); +@@ -666,6 +669,7 @@ out_free: + + out_release: + release_head(c, BASEHD); ++ kfree(dent); + out_ro: + ubifs_ro_mode(c, err); + if (last_reference) +@@ -690,17 +694,26 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + { + struct ubifs_data_node *data; + int err, lnum, offs, compr_type, out_len; +- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; ++ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; + struct ubifs_inode *ui = ubifs_inode(inode); + +- dbg_jnl("ino %lu, blk %u, len %d, key %s", +- (unsigned long)key_inum(c, key), key_block(c, key), len, +- DBGKEY(key)); ++ dbg_jnlk(key, "ino %lu, blk %u, len %d, key ", ++ (unsigned long)key_inum(c, key), key_block(c, key), len); + ubifs_assert(len <= UBIFS_BLOCK_SIZE); + +- data = kmalloc(dlen, GFP_NOFS); +- if (!data) +- return -ENOMEM; ++ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); ++ if (!data) { ++ /* ++ * Fall-back to the write reserve buffer. Note, we might be ++ * currently on the memory reclaim path, when the kernel is ++ * trying to free some memory by writing out dirty pages. The ++ * write reserve buffer helps us to guarantee that we are ++ * always able to write the data. ++ */ ++ allocated = 0; ++ mutex_lock(&c->write_reserve_mutex); ++ data = c->write_reserve_buf; ++ } + + data->ch.node_type = UBIFS_DATA_NODE; + key_write(c, key, &data->key); +@@ -736,7 +749,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + goto out_ro; + + finish_reservation(c); +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return 0; + + out_release: +@@ -745,7 +761,10 @@ out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); + out_free: +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return err; + } + +@@ -1157,7 +1176,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + dn = (void *)trun + UBIFS_TRUN_NODE_SZ; + blk = new_size >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &key, inum, blk); +- dbg_jnl("last block key %s", DBGKEY(&key)); ++ dbg_jnlk(&key, "last block key "); + err = ubifs_tnc_lookup(c, &key, dn); + if (err == -ENOENT) + dlen = 0; /* Not found (so it is a hole) */ +diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c +index 4d0cb12..f9fd068 100644 +--- a/fs/ubifs/log.c ++++ b/fs/ubifs/log.c +@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) + } + + /** +- * next_log_lnum - switch to the next log LEB. +- * @c: UBIFS file-system description object +- * @lnum: current log LEB +- */ +-static inline int next_log_lnum(const struct ubifs_info *c, int lnum) +-{ +- lnum += 1; +- if (lnum > c->log_last) +- lnum = UBIFS_LOG_LNUM; +- +- return lnum; +-} +- +-/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) + } + + /** +- * ubifs_create_buds_lists - create journal head buds lists for remount rw. +- * @c: UBIFS file-system description object +- */ +-void ubifs_create_buds_lists(struct ubifs_info *c) +-{ +- struct rb_node *p; +- +- spin_lock(&c->buds_lock); +- p = rb_first(&c->buds); +- while (p) { +- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); +- struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; +- +- list_add_tail(&bud->list, &jhead->buds_list); +- p = rb_next(p); +- } +- spin_unlock(&c->buds_lock); +-} +- +-/** + * ubifs_add_bud_to_log - add a new bud to the log. + * @c: UBIFS file-system description object + * @jhead: journal head the bud belongs to +@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -296,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + * an unclean reboot, because the target LEB might have been + * unmapped, but not yet physically erased. + */ +- err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); ++ err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); + if (err) + goto out_unlock; + } +@@ -317,8 +283,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + return 0; + + out_unlock: +- if (err != -EAGAIN) +- ubifs_ro_mode(c, err); + mutex_unlock(&c->log_mutex); + kfree(ref); + kfree(bud); +@@ -445,7 +409,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) + + /* Switch to the next log LEB */ + if (c->lhead_offs) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -466,7 +430,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -553,7 +517,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; +- lnum = next_log_lnum(c, lnum)) { ++ lnum = ubifs_next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) +@@ -662,7 +626,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, + err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + if (err) + return err; +- *lnum = next_log_lnum(c, *lnum); ++ *lnum = ubifs_next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); +@@ -732,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); +@@ -752,7 +716,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; +@@ -786,7 +750,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) + struct ubifs_bud *bud; + long long bud_bytes = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + spin_lock(&c->buds_lock); +diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c +index 4d4ca38..f8a181e 100644 +--- a/fs/ubifs/lprops.c ++++ b/fs/ubifs/lprops.c +@@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) + pnode = (struct ubifs_pnode *)container_of(lprops - pos, + struct ubifs_pnode, + lprops[0]); +- return !test_bit(COW_ZNODE, &pnode->flags) && ++ return !test_bit(COW_CNODE, &pnode->flags) && + test_bit(DIRTY_CNODE, &pnode->flags); + } + +@@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c) + struct list_head *pos; + int i, cat; + +- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return 0; + + list_for_each_entry(lprops, &c->empty_list, list) { +@@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + { + int i = 0, j, err = 0; + +- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return; + + for (i = 0; i < heap->cnt; i++) { +@@ -1007,21 +1007,11 @@ out: + } + + /** +- * struct scan_check_data - data provided to scan callback function. +- * @lst: LEB properties statistics +- * @err: error code +- */ +-struct scan_check_data { +- struct ubifs_lp_stats lst; +- int err; +-}; +- +-/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory +- * @data: information passed to and from the caller of the scan ++ * @lst: lprops statistics to update + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree +@@ -1030,12 +1020,12 @@ struct scan_check_data { + */ + static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, +- struct scan_check_data *data) ++ struct ubifs_lp_stats *lst) + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_lp_stats *lst = &data->lst; +- int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; ++ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; ++ void *buf = NULL; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { +@@ -1043,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c, + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); +- goto out; ++ return -EINVAL; + } + } + +@@ -1077,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c, + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + } +@@ -1089,36 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c, + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); +- if (IS_ERR(sleb)) { +- /* +- * After an unclean unmount, empty and freeable LEBs +- * may contain garbage. +- */ +- if (lp->free == c->leb_size) { +- ubifs_err("scan errors were in empty LEB " +- "- continuing checking"); +- lst->empty_lebs += 1; +- lst->total_free += c->leb_size; +- lst->total_dark += ubifs_calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; +- } ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) ++ return -ENOMEM; + +- if (lp->free + lp->dirty == c->leb_size && +- !(lp->flags & LPROPS_INDEX)) { +- ubifs_err("scan errors were in freeable LEB " +- "- continuing checking"); +- lst->total_free += lp->free; +- lst->total_dirty += lp->dirty; +- lst->total_dark += ubifs_calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; ++ /* ++ * After an unclean unmount, empty and freeable LEBs ++ * may contain garbage - do not scan them. ++ */ ++ if (lp->free == c->leb_size) { ++ lst->empty_lebs += 1; ++ lst->total_free += c->leb_size; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ if (lp->free + lp->dirty == c->leb_size && ++ !(lp->flags & LPROPS_INDEX)) { ++ lst->total_free += lp->free; ++ lst->total_dirty += lp->dirty; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); ++ if (IS_ERR(sleb)) { ++ ret = PTR_ERR(sleb); ++ if (ret == -EUCLEAN) { ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); + } +- data->err = PTR_ERR(sleb); +- return LPT_SCAN_STOP; ++ goto out; + } + + is_idx = -1; +@@ -1236,6 +1230,7 @@ static int scan_check_cb(struct ubifs_info *c, + } + + ubifs_scan_destroy(sleb); ++ vfree(buf); + return LPT_SCAN_CONTINUE; + + out_print: +@@ -1245,9 +1240,10 @@ out_print: + dbg_dump_leb(c, lnum); + out_destroy: + ubifs_scan_destroy(sleb); ++ ret = -EINVAL; + out: +- data->err = -EINVAL; +- return LPT_SCAN_STOP; ++ vfree(buf); ++ return ret; + } + + /** +@@ -1264,10 +1260,9 @@ out: + int dbg_check_lprops(struct ubifs_info *c) + { + int i, err; +- struct scan_check_data data; +- struct ubifs_lp_stats *lst = &data.lst; ++ struct ubifs_lp_stats lst; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + /* +@@ -1280,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c) + return err; + } + +- memset(lst, 0, sizeof(struct ubifs_lp_stats)); +- +- data.err = 0; ++ memset(&lst, 0, sizeof(struct ubifs_lp_stats)); + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, +- &data); ++ &lst); + if (err && err != -ENOSPC) + goto out; +- if (data.err) { +- err = data.err; +- goto out; +- } + +- if (lst->empty_lebs != c->lst.empty_lebs || +- lst->idx_lebs != c->lst.idx_lebs || +- lst->total_free != c->lst.total_free || +- lst->total_dirty != c->lst.total_dirty || +- lst->total_used != c->lst.total_used) { ++ if (lst.empty_lebs != c->lst.empty_lebs || ++ lst.idx_lebs != c->lst.idx_lebs || ++ lst.total_free != c->lst.total_free || ++ lst.total_dirty != c->lst.total_dirty || ++ lst.total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", +- lst->empty_lebs, lst->idx_lebs, lst->total_free, +- lst->total_dirty, lst->total_used); ++ lst.empty_lebs, lst.idx_lebs, lst.total_free, ++ lst.total_dirty, lst.total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, +@@ -1311,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c) + goto out; + } + +- if (lst->total_dead != c->lst.total_dead || +- lst->total_dark != c->lst.total_dark) { ++ if (lst.total_dead != c->lst.total_dead || ++ lst.total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", +- lst->total_dead, lst->total_dark); ++ lst.total_dead, lst.total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; +diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c +index 72775d3..66d59d0 100644 +--- a/fs/ubifs/lpt.c ++++ b/fs/ubifs/lpt.c +@@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + set_ltab(c, lnum, c->leb_size - alen, + alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + + /* Write remaining buffer */ + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + +@@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { +- err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); ++ err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); + if (err) + goto out; + err = ubifs_unpack_nnode(c, buf, nnode); +@@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + + out: + ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); ++ dbg_dump_stack(); + kfree(nnode); + return err; + } +@@ -1270,10 +1271,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); +- if (!pnode) { +- err = -ENOMEM; +- goto out; +- } ++ if (!pnode) ++ return -ENOMEM; ++ + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB +@@ -1291,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { +- err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); ++ err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); + if (err) + goto out; + err = unpack_pnode(c, buf, pnode); +@@ -1313,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + out: + ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + dbg_dump_pnode(c, pnode, parent, iip); ++ dbg_dump_stack(); + dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + kfree(pnode); + return err; +@@ -1332,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c) + buf = vmalloc(c->ltab_sz); + if (!buf) + return -ENOMEM; +- err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); ++ err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); + if (err) + goto out; + err = unpack_ltab(c, buf); +@@ -1355,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c) + buf = vmalloc(c->lsave_sz); + if (!buf) + return -ENOMEM; +- err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); ++ err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, ++ c->lsave_sz, 1); + if (err) + goto out; + err = unpack_lsave(c, buf); +@@ -1815,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { +- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, +- c->nnode_sz); ++ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, ++ c->nnode_sz, 1); + if (err) + return ERR_PTR(err); + err = ubifs_unpack_nnode(c, buf, nnode); +@@ -1884,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, + ubifs_assert(branch->lnum >= c->lpt_first && + branch->lnum <= c->lpt_last); + ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); +- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, +- c->pnode_sz); ++ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, ++ c->pnode_sz, 1); + if (err) + return ERR_PTR(err); + err = unpack_pnode(c, buf, pnode); +@@ -1984,12 +1986,11 @@ again: + + if (path[h].in_tree) + continue; +- nnode = kmalloc(sz, GFP_NOFS); ++ nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } +- memcpy(nnode, &path[h].nnode, sz); + parent = nnode->parent; + parent->nbranch[nnode->iip].nnode = nnode; + path[h].ptr.nnode = nnode; +@@ -2002,12 +2003,11 @@ again: + const size_t sz = sizeof(struct ubifs_pnode); + struct ubifs_nnode *parent; + +- pnode = kmalloc(sz, GFP_NOFS); ++ pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } +- memcpy(pnode, &path[h].pnode, sz); + parent = pnode->parent; + parent->nbranch[pnode->iip].pnode = pnode; + path[h].ptr.pnode = pnode; +@@ -2225,7 +2225,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + struct ubifs_cnode *cn; + int num, iip = 0, err; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + while (cnode) { +diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c +index 5c90dec..cddd6bd 100644 +--- a/fs/ubifs/lpt_commit.c ++++ b/fs/ubifs/lpt_commit.c +@@ -27,8 +27,15 @@ + + #include <linux/crc16.h> + #include <linux/slab.h> ++#include <linux/random.h> + #include "ubifs.h" + ++#ifdef CONFIG_UBIFS_FS_DEBUG ++static int dbg_populate_lsave(struct ubifs_info *c); ++#else ++#define dbg_populate_lsave(c) 0 ++#endif ++ + /** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object +@@ -110,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) + return 0; + cnt += 1; + while (1) { +- ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); +- __set_bit(COW_ZNODE, &cnode->flags); ++ ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); ++ __set_bit(COW_CNODE, &cnode->flags); + cnext = next_dirty_cnode(cnode); + if (!cnext) { + cnode->cnext = c->lpt_cnext; +@@ -459,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c) + */ + clear_bit(DIRTY_CNODE, &cnode->flags); + smp_mb__before_clear_bit(); +- clear_bit(COW_ZNODE, &cnode->flags); ++ clear_bit(COW_CNODE, &cnode->flags); + smp_mb__after_clear_bit(); + offs += len; + dbg_chk_lpt_sz(c, 1, len); +@@ -586,7 +593,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + if (nnode->nbranch[iip].lnum) + break; + } +- } while (iip >= UBIFS_LPT_FANOUT); ++ } while (iip >= UBIFS_LPT_FANOUT); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); +@@ -815,6 +822,10 @@ static void populate_lsave(struct ubifs_info *c) + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } ++ ++ if (dbg_populate_lsave(c)) ++ return; ++ + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) +@@ -1150,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) + void *buf = c->lpt_buf; + + dbg_lp("LEB %d", lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) + return err; +- } ++ + while (1) { + if (!is_a_node(c, buf, len)) { + int pad_len; +@@ -1628,29 +1639,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) + { + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; +- void *buf = c->dbg->buf; ++ void *buf, *p; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + +- dbg_lp("LEB %d", lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); +- return err; ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for ltab checking"); ++ return 0; + } ++ ++ dbg_lp("LEB %d", lnum); ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) ++ goto out; ++ + while (1) { +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int i, pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } +- if (!dbg_is_all_ff(buf, len)) { ++ if (!dbg_is_all_ff(p, len)) { + dbg_msg("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; +@@ -1668,16 +1685,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } +- return err; ++ goto out; + } +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; +- buf += node_len; ++ p += node_len; + len -= node_len; + } ++ ++ err = 0; ++out: ++ vfree(buf); ++ return err; + } + + /** +@@ -1690,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c) + { + int lnum, err, i, cnt; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + /* Bring the entire tree into memory */ +@@ -1733,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) + long long free = 0; + int i; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + for (i = 0; i < c->lpt_lebs; i++) { +@@ -1775,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) + long long chk_lpt_sz, lpt_sz; + int err = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + switch (action) { +@@ -1870,25 +1892,30 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) + static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + { + int err, len = c->leb_size, node_type, node_num, node_len, offs; +- void *buf = c->dbg->buf; ++ void *buf, *p; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to dump LPT"); + return; + } ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) ++ goto out; ++ + while (1) { + offs = c->leb_size - len; +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { + printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + continue; + } +@@ -1898,7 +1925,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + break; + } + +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { +@@ -1923,7 +1950,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + else + printk(KERN_DEBUG "LEB %d:%d, nnode, ", + lnum, offs); +- err = ubifs_unpack_nnode(c, buf, &nnode); ++ err = ubifs_unpack_nnode(c, p, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); +@@ -1944,15 +1971,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); +- return; ++ goto out; + } + +- buf += node_len; ++ p += node_len; + len -= node_len; + } + + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); ++out: ++ vfree(buf); ++ return; + } + + /** +@@ -1974,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c) + current->pid); + } + ++/** ++ * dbg_populate_lsave - debugging version of 'populate_lsave()' ++ * @c: UBIFS file-system description object ++ * ++ * This is a debugging version for 'populate_lsave()' which populates lsave ++ * with random LEBs instead of useful LEBs, which is good for test coverage. ++ * Returns zero if lsave has not been populated (this debugging feature is ++ * disabled) an non-zero if lsave has been populated. ++ */ ++static int dbg_populate_lsave(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ int i; ++ ++ if (!dbg_is_chk_gen(c)) ++ return 0; ++ if (random32() & 3) ++ return 0; ++ ++ for (i = 0; i < c->lsave_cnt; i++) ++ c->lsave[i] = c->main_first; ++ ++ list_for_each_entry(lprops, &c->empty_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->freeable_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ ++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ ++ return 1; ++} ++ + #endif /* CONFIG_UBIFS_FS_DEBUG */ +diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c +index 21f47af..278c238 100644 +--- a/fs/ubifs/master.c ++++ b/fs/ubifs/master.c +@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) + } + + main_sz = (long long)c->main_lebs * c->leb_size; +- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { ++ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { + err = 9; + goto out; + } +@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) + } + + if (c->lst.total_dead + c->lst.total_dark + +- c->lst.total_used + c->old_idx_sz > main_sz) { ++ c->lst.total_used + c->bi.old_idx_sz > main_sz) { + err = 21; + goto out; + } +@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); +- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); ++ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); +@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) + c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); + c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); + +- c->calc_idx_sz = c->old_idx_sz; ++ c->calc_idx_sz = c->bi.old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; +diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h +index c3de04d..ee7cb5e 100644 +--- a/fs/ubifs/misc.h ++++ b/fs/ubifs/misc.h +@@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) + } + + /** ++ * ubifs_zn_obsolete - check if znode is obsolete. ++ * @znode: znode to check ++ * ++ * This helper function returns %1 if @znode is obsolete and %0 otherwise. ++ */ ++static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) ++{ ++ return !!test_bit(OBSOLETE_ZNODE, &znode->flags); ++} ++ ++/** ++ * ubifs_zn_cow - check if znode has to be copied on write. ++ * @znode: znode to check ++ * ++ * This helper function returns %1 if @znode is has COW flag set and %0 ++ * otherwise. ++ */ ++static inline int ubifs_zn_cow(const struct ubifs_znode *znode) ++{ ++ return !!test_bit(COW_ZNODE, &znode->flags); ++} ++ ++/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +@@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) + } + + /** +- * ubifs_leb_unmap - unmap an LEB. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to unmap +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_unmap(c->ubi, lnum); +- if (err) { +- ubifs_err("unmap LEB %d failed, error %d", lnum, err); +- return err; +- } +- +- return 0; +-} +- +-/** +- * ubifs_leb_write - write to a LEB. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to write +- * @buf: buffer to write from +- * @offs: offset within LEB to write to +- * @len: length to write +- * @dtype: data type +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, +- const void *buf, int offs, int len, int dtype) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); +- if (err) { +- ubifs_err("writing %d bytes at %d:%d, error %d", +- len, lnum, offs, err); +- return err; +- } +- +- return 0; +-} +- +-/** +- * ubifs_leb_change - atomic LEB change. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to write +- * @buf: buffer to write from +- * @len: length to write +- * @dtype: data type +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, +- const void *buf, int len, int dtype) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); +- if (err) { +- ubifs_err("changing %d bytes in LEB %d, error %d", +- len, lnum, err); +- return err; +- } +- +- return 0; +-} +- +-/** + * ubifs_encode_dev - encode device node IDs. + * @dev: UBIFS device node information + * @rdev: device IDs to encode +@@ -340,4 +283,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) + mutex_unlock(&c->lp_mutex); + } + ++/** ++ * ubifs_next_log_lnum - switch to the next log LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: current log LEB ++ * ++ * This helper function returns the log LEB number which goes next after LEB ++ * 'lnum'. ++ */ ++static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) ++{ ++ lnum += 1; ++ if (lnum > c->log_last) ++ lnum = UBIFS_LOG_LNUM; ++ ++ return lnum; ++} ++ + #endif /* __UBIFS_MISC_H__ */ +diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c +index 82009c7..c542c73 100644 +--- a/fs/ubifs/orphan.c ++++ b/fs/ubifs/orphan.c +@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c) + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); + if (IS_ERR(sleb)) { + if (PTR_ERR(sleb) == -EUCLEAN) +- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); ++ sleb = ubifs_recover_leb(c, lnum, 0, ++ c->sbuf, -1); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -892,15 +893,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) + static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) + { + int lnum, err = 0; ++ void *buf; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to check orphans"); ++ return 0; ++ } ++ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -912,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) + break; + } + ++ vfree(buf); + return err; + } + +@@ -920,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c) + struct check_info ci; + int err; + +- if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) ++ if (!dbg_is_chk_orph(c)) + return 0; + + ci.last_ino = 0; +diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c +index 77e9b87..2a935b3 100644 +--- a/fs/ubifs/recovery.c ++++ b/fs/ubifs/recovery.c +@@ -28,6 +28,23 @@ + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. ++ * ++ * The general UBIFS approach to the recovery is that it recovers from ++ * corruptions which could be caused by power cuts, but it refuses to recover ++ * from corruption caused by other reasons. And UBIFS tries to distinguish ++ * between these 2 reasons of corruptions and silently recover in the former ++ * case and loudly complain in the latter case. ++ * ++ * UBIFS writes only to erased LEBs, so it writes only to the flash space ++ * containing only 0xFFs. UBIFS also always writes strictly from the beginning ++ * of the LEB to the end. And UBIFS assumes that the underlying flash media ++ * writes in @c->max_write_size bytes at a time. ++ * ++ * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. ++ * I/O unit corresponding to offset X to contain corrupted data, all the ++ * following min. I/O units have to contain empty space (all 0xFFs). If this is ++ * not true, the corruption cannot be the result of a power cut, and UBIFS ++ * refuses to mount. + */ + + #include <linux/crc32.h> +@@ -100,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, + if (!sbuf) + return -ENOMEM; + +- err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); ++ err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); + if (err && err != -EBADMSG) + goto out_free; + +@@ -196,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, + mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); + + ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); +- err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); + if (err) + goto out; +- err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); + if (err) + goto out; + out: +@@ -257,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) + if (cor1) + goto out_err; + mst = mst1; +- } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { ++ } else if (offs1 == 0 && ++ c->leb_size - offs2 - sz < sz) { + /* 1st LEB was unmapped and written, 2nd not */ + if (cor1) + goto out_err; +@@ -300,6 +318,32 @@ int ubifs_recover_master_node(struct ubifs_info *c) + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); ++ ++ /* ++ * We had to recover the master node, which means there was an ++ * unclean reboot. However, it is possible that the master node ++ * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. ++ * E.g., consider the following chain of events: ++ * ++ * 1. UBIFS was cleanly unmounted, so the master node is clean ++ * 2. UBIFS is being mounted R/W and starts changing the master ++ * node in the first (%UBIFS_MST_LNUM). A power cut happens, ++ * so this LEB ends up with some amount of garbage at the ++ * end. ++ * 3. UBIFS is being mounted R/O. We reach this place and ++ * recover the master node from the second LEB ++ * (%UBIFS_MST_LNUM + 1). But we cannot update the media ++ * because we are being mounted R/O. We have to defer the ++ * operation. ++ * 4. However, this master node (@c->mst_node) is marked as ++ * clean (since the step 1). And if we just return, the ++ * mount code will be confused and won't recover the master ++ * node when it is re-mounter R/W later. ++ * ++ * Thus, to force the recovery by marking the master node as ++ * dirty. ++ */ ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; +@@ -362,8 +406,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data +- * is in @buf, otherwise %0 is returned. The determination is made by checking +- * for subsequent empty space starting from the next @c->min_io_size boundary. ++ * is in @buf, otherwise %0 is returned. The determination is made by checking ++ * for subsequent empty space starting from the next @c->max_write_size ++ * boundary. + */ + static int is_last_write(const struct ubifs_info *c, void *buf, int offs) + { +@@ -371,10 +416,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) + uint8_t *p; + + /* +- * Round up to the next @c->min_io_size boundary i.e. @offs is in the +- * last wbuf written. After that should be empty space. ++ * Round up to the next @c->max_write_size boundary i.e. @offs is in ++ * the last wbuf written. After that should be empty space. + */ +- empty_offs = ALIGN(offs + 1, c->min_io_size); ++ empty_offs = ALIGN(offs + 1, c->max_write_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; + return is_empty(p, check_len); +@@ -429,7 +474,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + int skip, dlen = le32_to_cpu(ch->len); + + /* Check for empty space after the corrupt node's common header */ +- skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; ++ skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* +@@ -441,7 +486,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + return 0; + } + /* Now we know the corrupt node's length we can skip over it */ +- skip = ALIGN(offs + dlen, c->min_io_size) - offs; ++ skip = ALIGN(offs + dlen, c->max_write_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; +@@ -495,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int len = ALIGN(endpt, c->min_io_size); + + if (start) { +- err = ubi_read(c->ubi, lnum, sleb->buf, 0, +- start); ++ err = ubifs_leb_read(c, lnum, sleb->buf, 0, ++ start, 1); + if (err) + return err; + } +@@ -510,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + ubifs_pad(c, buf, pad_len); + } + } +- err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, +- UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, sleb->buf, len, ++ UBI_UNKNOWN); + if (err) + return err; + } +@@ -520,16 +565,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + } + + /** +- * drop_incomplete_group - drop nodes from an incomplete group. ++ * drop_last_group - drop the last group of nodes. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * +- * This function returns %1 if nodes are dropped and %0 otherwise. ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * group of nodes of the scanned LEB. + */ +-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) ++static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) + { +- int dropped = 0; +- + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; +@@ -538,15 +582,40 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) +- return dropped; +- dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); ++ break; ++ ++ dbg_rcvry("dropping grouped node at %d:%d", ++ sleb->lnum, snod->offs); ++ *offs = snod->offs; ++ list_del(&snod->list); ++ kfree(snod); ++ sleb->nodes_cnt -= 1; ++ } ++} ++ ++/** ++ * drop_last_node - drop the last node. ++ * @sleb: scanned LEB information ++ * @offs: offset of dropped nodes is returned here ++ * @grouped: non-zero if whole group of nodes have to be dropped ++ * ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * node of the scanned LEB. ++ */ ++static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) ++{ ++ struct ubifs_scan_node *snod; ++ ++ if (!list_empty(&sleb->nodes)) { ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, ++ list); ++ ++ dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; +- dropped = 1; + } +- return dropped; + } + + /** +@@ -555,7 +624,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use +- * @grouped: nodes may be grouped for recovery ++ * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not ++ * belong to any journal head) + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. +@@ -563,25 +633,21 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + * found, and a negative error code in case of failure. + */ + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped) ++ int offs, void *sbuf, int jhead) + { +- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; +- int empty_chkd = 0, start = offs; ++ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; ++ int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + +- dbg_rcvry("%d:%d", lnum, offs); ++ dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + +- if (sleb->ecc) +- need_clean = 1; +- ++ ubifs_assert(len >= 8); + while (len >= 8) { +- int ret; +- + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + +@@ -591,8 +657,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + * Scan quietly until there is an error from which we cannot + * recover + */ +- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); +- ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; +@@ -605,104 +670,127 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + offs += node_len; + buf += node_len; + len -= node_len; +- continue; +- } +- +- if (ret > 0) { ++ } else if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; +- continue; +- } +- +- if (ret == SCANNED_EMPTY_SPACE) { +- if (!is_empty(buf, len)) { +- if (!is_last_write(c, buf, offs)) +- break; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } +- empty_chkd = 1; ++ } else if (ret == SCANNED_EMPTY_SPACE || ++ ret == SCANNED_GARBAGE || ++ ret == SCANNED_A_BAD_PAD_NODE || ++ ret == SCANNED_A_CORRUPT_NODE) { ++ dbg_rcvry("found corruption (%d) at %d:%d", ++ ret, lnum, offs); + break; +- } +- +- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (ret == SCANNED_A_CORRUPT_NODE) +- if (no_more_nodes(c, buf, len, lnum, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (quiet) { +- /* Redo the last scan but noisily */ +- quiet = 0; +- continue; +- } +- +- switch (ret) { +- case SCANNED_GARBAGE: +- dbg_err("garbage"); +- goto corrupted; +- case SCANNED_A_CORRUPT_NODE: +- case SCANNED_A_BAD_PAD_NODE: +- dbg_err("bad node"); +- goto corrupted; +- default: +- dbg_err("unknown"); ++ } else { ++ dbg_err("unexpected return value %d", ret); + err = -EINVAL; + goto error; + } + } + +- if (!empty_chkd && !is_empty(buf, len)) { +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } else { ++ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { ++ if (!is_last_write(c, buf, offs)) ++ goto corrupted_rescan; ++ } else if (ret == SCANNED_A_CORRUPT_NODE) { ++ if (!no_more_nodes(c, buf, len, lnum, offs)) ++ goto corrupted_rescan; ++ } else if (!is_empty(buf, len)) { ++ if (!is_last_write(c, buf, offs)) { + int corruption = first_non_ff(buf, len); + ++ /* ++ * See header comment for this file for more ++ * explanations about the reasons we have this check. ++ */ + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ +- offs = corruption; ++ offs += corruption; + buf += corruption; + goto corrupted; + } + } + +- /* Drop nodes from incomplete group */ +- if (grouped && drop_incomplete_group(sleb, &offs)) { +- buf = sbuf + offs; +- len = c->leb_size - offs; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } ++ min_io_unit = round_down(offs, c->min_io_size); ++ if (grouped) ++ /* ++ * If nodes are grouped, always drop the incomplete group at ++ * the end. ++ */ ++ drop_last_group(sleb, &offs); + +- if (offs % c->min_io_size) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; ++ if (jhead == GCHD) { ++ /* ++ * If this LEB belongs to the GC head then while we are in the ++ * middle of the same min. I/O unit keep dropping nodes. So ++ * basically, what we want is to make sure that the last min. ++ * I/O unit where we saw the corruption is dropped completely ++ * with all the uncorrupted nodes which may possibly sit there. ++ * ++ * In other words, let's name the min. I/O unit where the ++ * corruption starts B, and the previous min. I/O unit A. The ++ * below code tries to deal with a situation when half of B ++ * contains valid nodes or the end of a valid node, and the ++ * second half of B contains corrupted data or garbage. This ++ * means that UBIFS had been writing to B just before the power ++ * cut happened. I do not know how realistic is this scenario ++ * that half of the min. I/O unit had been written successfully ++ * and the other half not, but this is possible in our 'failure ++ * mode emulation' infrastructure at least. ++ * ++ * So what is the problem, why we need to drop those nodes? Why ++ * can't we just clean-up the second half of B by putting a ++ * padding node there? We can, and this works fine with one ++ * exception which was reproduced with power cut emulation ++ * testing and happens extremely rarely. ++ * ++ * Imagine the file-system is full, we run GC which starts ++ * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is ++ * the current GC head LEB). The @c->gc_lnum is -1, which means ++ * that GC will retain LEB X and will try to continue. Imagine ++ * that LEB X is currently the dirtiest LEB, and the amount of ++ * used space in LEB Y is exactly the same as amount of free ++ * space in LEB X. ++ * ++ * And a power cut happens when nodes are moved from LEB X to ++ * LEB Y. We are here trying to recover LEB Y which is the GC ++ * head LEB. We find the min. I/O unit B as described above. ++ * Then we clean-up LEB Y by padding min. I/O unit. And later ++ * 'ubifs_rcvry_gc_commit()' function fails, because it cannot ++ * find a dirty LEB which could be GC'd into LEB Y! Even LEB X ++ * does not match because the amount of valid nodes there does ++ * not fit the free space in LEB Y any more! And this is ++ * because of the padding node which we added to LEB Y. The ++ * user-visible effect of this which I once observed and ++ * analysed is that we cannot mount the file-system with ++ * -ENOSPC error. ++ * ++ * So obviously, to make sure that situation does not happen we ++ * should free min. I/O unit B in LEB Y completely and the last ++ * used min. I/O unit in LEB Y should be A. This is basically ++ * what the below code tries to do. ++ */ ++ while (offs > min_io_unit) ++ drop_last_node(sleb, &offs); + } + ++ buf = sbuf + offs; ++ len = c->leb_size - offs; ++ ++ clean_buf(c, &buf, lnum, &offs, &len); + ubifs_end_scan(c, sleb, lnum, offs); + +- if (need_clean) { +- err = fix_unclean_leb(c, sleb, start); +- if (err) +- goto error; +- } ++ err = fix_unclean_leb(c, sleb, start); ++ if (err) ++ goto error; + + return sleb; + ++corrupted_rescan: ++ /* Re-scan the corrupted data with verbose messages */ ++ dbg_err("corruptio %d", ret); ++ ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +@@ -733,7 +821,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, + return -ENOMEM; + if (c->leb_size - offs < UBIFS_CS_NODE_SZ) + goto out_err; +- err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); ++ err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, ++ UBIFS_CS_NODE_SZ, 0); + if (err && err != -EBADMSG) + goto out_free; + ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); +@@ -819,7 +908,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + } + ubifs_scan_destroy(sleb); + } +- return ubifs_recover_leb(c, lnum, offs, sbuf, 0); ++ return ubifs_recover_leb(c, lnum, offs, sbuf, -1); + } + + /** +@@ -833,15 +922,10 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + * + * This function returns %0 on success and a negative error code on failure. + */ +-static int recover_head(const struct ubifs_info *c, int lnum, int offs, +- void *sbuf) ++static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) + { +- int len, err; ++ int len = c->max_write_size, err; + +- if (c->min_io_size > 1) +- len = c->min_io_size; +- else +- len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + +@@ -849,15 +933,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, + return 0; + + /* Read at the head location and check it is empty flash */ +- err = ubi_read(c->ubi, lnum, sbuf, offs, len); ++ err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); + if (err || !is_empty(sbuf, len)) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); +- err = ubi_read(c->ubi, lnum, sbuf, 0, offs); ++ err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); + if (err) + return err; +- return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); ++ return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); + } + + return 0; +@@ -880,7 +964,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, + * + * This function returns %0 on success and a negative error code on failure. + */ +-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) ++int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) + { + int err; + +@@ -900,7 +984,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) + } + + /** +- * clean_an_unclean_leb - read and write a LEB to remove corruption. ++ * clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use +@@ -911,7 +995,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) + * + * This function returns %0 on success and a negative error code on failure. + */ +-static int clean_an_unclean_leb(const struct ubifs_info *c, ++static int clean_an_unclean_leb(struct ubifs_info *c, + struct ubifs_unclean_leb *ucleb, void *sbuf) + { + int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; +@@ -927,7 +1011,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + return 0; + } + +- err = ubi_read(c->ubi, lnum, buf, offs, len); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) + return err; + +@@ -987,7 +1071,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + } + + /* Write back the LEB atomically */ +- err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); + if (err) + return err; + +@@ -1007,7 +1091,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + * + * This function returns %0 on success and a negative error code on failure. + */ +-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) ++int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) + { + dbg_rcvry("recovery"); + while (!list_empty(&c->unclean_leb_list)) { +@@ -1026,6 +1110,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) + } + + /** ++ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty ++ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++static int grab_empty_leb(struct ubifs_info *c) ++{ ++ int lnum, err; ++ ++ /* ++ * Note, it is very important to first search for an empty LEB and then ++ * run the commit, not vice-versa. The reason is that there might be ++ * only one empty LEB at the moment, the one which has been the ++ * @c->gc_lnum just before the power cut happened. During the regular ++ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no ++ * one but GC can grab it. But at this moment this single empty LEB is ++ * not marked as taken, so if we run commit - what happens? Right, the ++ * commit will grab it and write the index there. Remember that the ++ * index always expands as long as there is free space, and it only ++ * starts consolidating when we run out of space. ++ * ++ * IOW, if we run commit now, we might not be able to find a free LEB ++ * after this. ++ */ ++ lnum = ubifs_find_free_leb_for_idx(c); ++ if (lnum < 0) { ++ dbg_err("could not find an empty LEB"); ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); ++ return lnum; ++ } ++ ++ /* Reset the index flag */ ++ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_INDEX, 0); ++ if (err) ++ return err; ++ ++ c->gc_lnum = lnum; ++ dbg_rcvry("found empty LEB %d, run commit", lnum); ++ ++ return ubifs_run_commit(c); ++} ++ ++/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * +@@ -1047,71 +1178,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + { + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; +- int lnum, err; ++ int err; ++ ++ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); + + c->gc_lnum = -1; +- if (wbuf->lnum == -1) { +- dbg_rcvry("no GC head LEB"); +- goto find_free; +- } +- /* +- * See whether the used space in the dirtiest LEB fits in the GC head +- * LEB. +- */ +- if (wbuf->offs == c->leb_size) { +- dbg_rcvry("no room in GC head LEB"); +- goto find_free; +- } ++ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) ++ return grab_empty_leb(c); ++ + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { +- /* +- * There are no dirty or empty LEBs subject to here being +- * enough for the index. Try to use +- * 'ubifs_find_free_leb_for_idx()', which will return any empty +- * LEBs (ignoring index requirements). If the index then +- * doesn't have enough LEBs the recovery commit will fail - +- * which is the same result anyway i.e. recovery fails. So +- * there is no problem ignoring index requirements and just +- * grabbing a free LEB since we have already established there +- * is not a dirty LEB we could have used instead. +- */ +- if (err == -ENOSPC) { +- dbg_rcvry("could not find a dirty LEB"); +- goto find_free; +- } +- return err; +- } +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- lnum = lp.lnum; +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- if (lp.free != c->leb_size) { +- err = ubifs_change_one_lp(c, lnum, c->leb_size, +- 0, 0, 0, 0); +- if (err) +- return err; +- } +- err = ubifs_leb_unmap(c, lnum); +- if (err) +- return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); +- } +- /* +- * There was no empty LEB so the used space in the dirtiest LEB must fit +- * in the GC head LEB. +- */ +- if (lp.free + lp.dirty < wbuf->offs) { +- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", +- lnum, wbuf->lnum, wbuf->offs); +- err = ubifs_return_leb(c, lnum); +- if (err) ++ if (err != -ENOSPC) + return err; +- goto find_free; ++ ++ dbg_rcvry("could not find a dirty LEB"); ++ return grab_empty_leb(c); + } ++ ++ ubifs_assert(!(lp.flags & LPROPS_INDEX)); ++ ubifs_assert(lp.free + lp.dirty >= wbuf->offs); ++ + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. +@@ -1120,11 +1206,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + err = ubifs_run_commit(c); + if (err) + return err; +- /* +- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC +- * - use locking to keep 'ubifs_assert()' happy. +- */ +- dbg_rcvry("GC'ing LEB %d", lnum); ++ ++ dbg_rcvry("GC'ing LEB %d", lp.lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { +@@ -1140,37 +1223,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + err = -EINVAL; + return err; + } +- if (err != LEB_RETAINED) { +- dbg_err("GC returned %d", err); ++ ++ ubifs_assert(err == LEB_RETAINED); ++ if (err != LEB_RETAINED) + return -EINVAL; +- } ++ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- return 0; + +-find_free: +- /* +- * There is no GC head LEB or the free space in the GC head LEB is too +- * small, or there are not dirty LEBs. Allocate gc_lnum by calling +- * 'ubifs_find_free_leb_for_idx()' so GC is not run. +- */ +- lnum = ubifs_find_free_leb_for_idx(c); +- if (lnum < 0) { +- dbg_err("could not find an empty LEB"); +- return lnum; +- } +- /* And reset the index flag */ +- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, +- LPROPS_INDEX, 0); +- if (err) +- return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); ++ dbg_rcvry("allocated LEB %d for GC", lp.lnum); ++ return 0; + } + + /** +@@ -1393,7 +1456,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) + if (i_size >= e->d_size) + return 0; + /* Read the LEB */ +- err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); ++ err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); + if (err) + goto out; + /* Change the size field and recalculate the CRC */ +@@ -1409,10 +1472,10 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) + len -= 1; + len = ALIGN(len + 1, c->min_io_size); + /* Atomically write the fixed LEB back again */ +- err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + if (err) + goto out; +- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", ++ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", + (unsigned long)e->inum, lnum, offs, i_size, e->d_size); + return 0; + +@@ -1461,20 +1524,27 @@ int ubifs_recover_size(struct ubifs_info *c) + e->i_size = le64_to_cpu(ino->size); + } + } ++ + if (e->exists && e->i_size < e->d_size) { +- if (!e->inode && c->ro_mount) { ++ if (c->ro_mount) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; ++ struct ubifs_inode *ui; ++ ++ ubifs_assert(!e->inode); + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); ++ ++ ui = ubifs_inode(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + (unsigned long)e->inum, +- e->d_size, inode->i_size); ++ inode->i_size, e->d_size); + inode->i_size = e->d_size; +- ubifs_inode(inode)->ui_size = e->d_size; ++ ui->ui_size = e->d_size; ++ ui->synced_i_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; +@@ -1489,9 +1559,11 @@ int ubifs_recover_size(struct ubifs_info *c) + iput(e->inode); + } + } ++ + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } ++ + return 0; + } +diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c +index eed0fcf..b007637 100644 +--- a/fs/ubifs/replay.c ++++ b/fs/ubifs/replay.c +@@ -33,43 +33,32 @@ + */ + + #include "ubifs.h" +- +-/* +- * Replay flags. +- * +- * REPLAY_DELETION: node was deleted +- * REPLAY_REF: node is a reference node +- */ +-enum { +- REPLAY_DELETION = 1, +- REPLAY_REF = 2, +-}; ++#include <linux/list_sort.h> + + /** +- * struct replay_entry - replay tree entry. ++ * struct replay_entry - replay list entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length ++ * @deletion: non-zero if this entry corresponds to a node deletion + * @sqnum: node sequence number +- * @flags: replay flags +- * @rb: links the replay tree ++ * @list: links the replay list + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size +- * @free: amount of free space in a bud +- * @dirty: amount of dirty space in a bud from padding and deletion nodes + * +- * UBIFS journal replay must compare node sequence numbers, which means it must +- * build a tree of node information to insert into the TNC. ++ * The replay process first scans all buds and builds the replay list, then ++ * sorts the replay list in nodes sequence number order, and then inserts all ++ * the replay entries to the TNC. + */ + struct replay_entry { + int lnum; + int offs; + int len; ++ unsigned int deletion:1; + unsigned long long sqnum; +- int flags; +- struct rb_node rb; ++ struct list_head list; + union ubifs_key key; + union { + struct qstr nm; +@@ -77,10 +66,6 @@ struct replay_entry { + loff_t old_size; + loff_t new_size; + }; +- struct { +- int free; +- int dirty; +- }; + }; + }; + +@@ -88,57 +73,64 @@ struct replay_entry { + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object +- * @free: free bytes in the bud + * @sqnum: reference node sequence number ++ * @free: free bytes in the bud ++ * @dirty: dirty bytes in the bud + */ + struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; +- int free; + unsigned long long sqnum; ++ int free; ++ int dirty; + }; + + /** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object +- * @r: replay entry of bud ++ * @b: bud entry which describes the bud ++ * ++ * This function makes sure the LEB properties of bud @b are set correctly ++ * after the replay. Returns zero in case of success and a negative error code ++ * in case of failure. + */ +-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) ++static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) + { + const struct ubifs_lprops *lp; + int err = 0, dirty; + + ubifs_get_lprops(c); + +- lp = ubifs_lpt_lookup_dirty(c, r->lnum); ++ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + dirty = lp->dirty; +- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { ++ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + /* + * The LEB was added to the journal with a starting offset of + * zero which means the LEB must have been empty. The LEB +- * property values should be lp->free == c->leb_size and +- * lp->dirty == 0, but that is not the case. The reason is that +- * the LEB was garbage collected. The garbage collector resets +- * the free and dirty space without recording it anywhere except +- * lprops, so if there is not a commit then lprops does not have +- * that information next time the file system is mounted. ++ * property values should be @lp->free == @c->leb_size and ++ * @lp->dirty == 0, but that is not the case. The reason is that ++ * the LEB had been garbage collected before it became the bud, ++ * and there was not commit inbetween. The garbage collector ++ * resets the free and dirty space without recording it ++ * anywhere except lprops, so if there was no commit then ++ * lprops does not have that information. + * + * We do not need to adjust free space because the scan has told + * us the exact value which is recorded in the replay entry as +- * r->free. ++ * @b->free. + * + * However we do need to subtract from the dirty space the + * amount of space that the garbage collector reclaimed, which + * is the whole LEB minus the amount of space that was free. + */ +- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); +- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); + dirty -= c->leb_size - lp->free; + /* +@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) + */ + if (dirty != 0) + dbg_msg("LEB %d lp: %d free %d dirty " +- "replay: %d free %d dirty", r->lnum, lp->free, +- lp->dirty, r->free, r->dirty); ++ "replay: %d free %d dirty", b->bud->lnum, ++ lp->free, lp->dirty, b->free, b->dirty); + } +- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, ++ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } ++ ++ /* Make sure the journal head points to the latest bud */ ++ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, ++ b->bud->lnum, c->leb_size - b->free, ++ UBI_SHORTTERM); ++ + out: + ubifs_release_lprops(c); + return err; + } + + /** ++ * set_buds_lprops - set free and dirty space for all replayed buds. ++ * @c: UBIFS file-system description object ++ * ++ * This function sets LEB properties for all replayed buds. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int set_buds_lprops(struct ubifs_info *c) ++{ ++ struct bud_entry *b; ++ int err; ++ ++ list_for_each_entry(b, &c->replay_buds, list) { ++ err = set_bud_lprops(c, b); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation +@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) + */ + static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + { +- int err, deletion = ((r->flags & REPLAY_DELETION) != 0); ++ int err; + +- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, +- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); ++ dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", ++ r->lnum, r->offs, r->len, r->deletion, r->sqnum); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + +- if (r->flags & REPLAY_REF) +- err = set_bud_lprops(c, r); +- else if (is_hash_key(c, &r->key)) { +- if (deletion) ++ if (is_hash_key(c, &r->key)) { ++ if (r->deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { +- if (deletion) ++ if (r->deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { +@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + return err; + + if (c->need_recovery) +- err = ubifs_recover_size_accum(c, &r->key, deletion, ++ err = ubifs_recover_size_accum(c, &r->key, r->deletion, + r->new_size); + } + +@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + } + + /** +- * destroy_replay_tree - destroy the replay. +- * @c: UBIFS file-system description object ++ * replay_entries_cmp - compare 2 replay entries. ++ * @priv: UBIFS file-system description object ++ * @a: first replay entry ++ * @a: second replay entry + * +- * Destroy the replay tree. ++ * This is a comparios function for 'list_sort()' which compares 2 replay ++ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has ++ * greater sequence number and %-1 otherwise. + */ +-static void destroy_replay_tree(struct ubifs_info *c) ++static int replay_entries_cmp(void *priv, struct list_head *a, ++ struct list_head *b) + { +- struct rb_node *this = c->replay_tree.rb_node; +- struct replay_entry *r; +- +- while (this) { +- if (this->rb_left) { +- this = this->rb_left; +- continue; +- } else if (this->rb_right) { +- this = this->rb_right; +- continue; +- } +- r = rb_entry(this, struct replay_entry, rb); +- this = rb_parent(this); +- if (this) { +- if (this->rb_left == &r->rb) +- this->rb_left = NULL; +- else +- this->rb_right = NULL; +- } +- if (is_hash_key(c, &r->key)) +- kfree(r->nm.name); +- kfree(r); +- } +- c->replay_tree = RB_ROOT; ++ struct replay_entry *ra, *rb; ++ ++ cond_resched(); ++ if (a == b) ++ return 0; ++ ++ ra = list_entry(a, struct replay_entry, list); ++ rb = list_entry(b, struct replay_entry, list); ++ ubifs_assert(ra->sqnum != rb->sqnum); ++ if (ra->sqnum > rb->sqnum) ++ return 1; ++ return -1; + } + + /** +- * apply_replay_tree - apply the replay tree to the TNC. ++ * apply_replay_list - apply the replay list to the TNC. + * @c: UBIFS file-system description object + * +- * Apply the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. ++ * Apply all entries in the replay list to the TNC. Returns zero in case of ++ * success and a negative error code in case of failure. + */ +-static int apply_replay_tree(struct ubifs_info *c) ++static int apply_replay_list(struct ubifs_info *c) + { +- struct rb_node *this = rb_first(&c->replay_tree); ++ struct replay_entry *r; ++ int err; + +- while (this) { +- struct replay_entry *r; +- int err; ++ list_sort(c, &c->replay_list, &replay_entries_cmp); + ++ list_for_each_entry(r, &c->replay_list, list) { + cond_resched(); + +- r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; +- this = rb_next(this); + } ++ + return 0; + } + + /** +- * insert_node - insert a node to the replay tree. ++ * destroy_replay_list - destroy the replay. ++ * @c: UBIFS file-system description object ++ * ++ * Destroy the replay list. ++ */ ++static void destroy_replay_list(struct ubifs_info *c) ++{ ++ struct replay_entry *r, *tmp; ++ ++ list_for_each_entry_safe(r, tmp, &c->replay_list, list) { ++ if (is_hash_key(c, &r->key)) ++ kfree(r->nm.name); ++ list_del(&r->list); ++ kfree(r); ++ } ++} ++ ++/** ++ * insert_node - insert a node to the replay list + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) + * @old_size: truncation old size + * @new_size: truncation new size + * +- * This function inserts a scanned non-direntry node to the replay tree. The +- * replay tree is an RB-tree containing @struct replay_entry elements which are +- * indexed by the sequence number. The replay tree is applied at the very end +- * of the replay process. Since the tree is sorted in sequence number order, +- * the older modifications are applied first. This function returns zero in +- * case of success and a negative error code in case of failure. ++ * This function inserts a scanned non-direntry node to the replay list. The ++ * replay list contains @struct replay_entry elements, and we sort this list in ++ * sequence number order before applying it. The replay list is applied at the ++ * very end of the replay process. Since the list is sorted in sequence number ++ * order, the older modifications are applied first. This function returns zero ++ * in case of success and a negative error code in case of failure. + */ + static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + ++ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); ++ + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; +@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; +- r->flags = (deletion ? REPLAY_DELETION : 0); ++ key_copy(c, key, &r->key); + r->old_size = old_size; + r->new_size = new_size; +- key_copy(c, key, &r->key); + +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + + /** +- * insert_dent - insert a directory entry node into the replay tree. ++ * insert_dent - insert a directory entry node into the replay list. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * +- * This function inserts a scanned directory entry node to the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. +- * +- * This function is also used for extended attribute entries because they are +- * implemented as directory entry nodes. ++ * This function inserts a scanned directory entry node or an extended ++ * attribute entry to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + ++ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } +- if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; ++ + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); +@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; ++ key_copy(c, key, &r->key); + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; +- r->flags = (deletion ? REPLAY_DELETION : 0); +- key_copy(c, key, &r->key); + +- ubifs_assert(!*p); +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + +@@ -482,29 +473,90 @@ int ubifs_validate_entry(struct ubifs_info *c, + } + + /** ++ * is_last_bud - check if the bud is the last in the journal head. ++ * @c: UBIFS file-system description object ++ * @bud: bud description object ++ * ++ * This function checks if bud @bud is the last bud in its journal head. This ++ * information is then used by 'replay_bud()' to decide whether the bud can ++ * have corruptions or not. Indeed, only last buds can be corrupted by power ++ * cuts. Returns %1 if this is the last bud, and %0 if not. ++ */ ++static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) ++{ ++ struct ubifs_jhead *jh = &c->jheads[bud->jhead]; ++ struct ubifs_bud *next; ++ uint32_t data; ++ int err; ++ ++ if (list_is_last(&bud->list, &jh->buds_list)) ++ return 1; ++ ++ /* ++ * The following is a quirk to make sure we work correctly with UBIFS ++ * images used with older UBIFS. ++ * ++ * Normally, the last bud will be the last in the journal head's list ++ * of bud. However, there is one exception if the UBIFS image belongs ++ * to older UBIFS. This is fairly unlikely: one would need to use old ++ * UBIFS, then have a power cut exactly at the right point, and then ++ * try to mount this image with new UBIFS. ++ * ++ * The exception is: it is possible to have 2 buds A and B, A goes ++ * before B, and B is the last, bud B is contains no data, and bud A is ++ * corrupted at the end. The reason is that in older versions when the ++ * journal code switched the next bud (from A to B), it first added a ++ * log reference node for the new bud (B), and only after this it ++ * synchronized the write-buffer of current bud (A). But later this was ++ * changed and UBIFS started to always synchronize the write-buffer of ++ * the bud (A) before writing the log reference for the new bud (B). ++ * ++ * But because older UBIFS always synchronized A's write-buffer before ++ * writing to B, we can recognize this exceptional situation but ++ * checking the contents of bud B - if it is empty, then A can be ++ * treated as the last and we can recover it. ++ * ++ * TODO: remove this piece of code in a couple of years (today it is ++ * 16.05.2011). ++ */ ++ next = list_entry(bud->list.next, struct ubifs_bud, list); ++ if (!list_is_last(&next->list, &jh->buds_list)) ++ return 0; ++ ++ err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); ++ if (err) ++ return 0; ++ ++ return data == 0xFFFFFFFF; ++} ++ ++/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object +- * @lnum: bud logical eraseblock number to replay +- * @offs: bud start offset +- * @jhead: journal head to which this bud belongs +- * @free: amount of free space in the bud is returned here +- * @dirty: amount of dirty space from padding and deletion nodes is returned +- * here ++ * @b: bud entry which describes the bud + * +- * This function returns zero in case of success and a negative error code in +- * case of failure. ++ * This function replays bud @bud, recovers it if needed, and adds all nodes ++ * from this bud to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ +-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, +- int *free, int *dirty) ++static int replay_bud(struct ubifs_info *c, struct bud_entry *b) + { +- int err = 0, used = 0; ++ int is_last = is_last_bud(c, b->bud); ++ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_bud *bud; + +- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); +- if (c->need_recovery) +- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); ++ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", ++ lnum, b->bud->jhead, offs, is_last); ++ ++ if (c->need_recovery && is_last) ++ /* ++ * Recover only last LEBs in the journal heads, because power ++ * cuts may cause corruptions only in these LEBs, because only ++ * these LEBs could possibly be written to at the power cut ++ * time. ++ */ ++ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); + else + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); + if (IS_ERR(sleb)) +@@ -620,19 +672,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + goto out; + } + +- bud = ubifs_search_bud(c, lnum); +- if (!bud) +- BUG(); +- ++ ubifs_assert(ubifs_search_bud(c, lnum)); + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + +- if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) +- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, +- sleb->endpt, UBI_SHORTTERM); +- +- *dirty = sleb->endpt - offs - used; +- *free = c->leb_size - sleb->endpt; ++ b->dirty = sleb->endpt - offs - used; ++ b->free = c->leb_size - sleb->endpt; ++ dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); + + out: + ubifs_scan_destroy(sleb); +@@ -646,55 +692,6 @@ out_dump: + } + + /** +- * insert_ref_node - insert a reference node to the replay tree. +- * @c: UBIFS file-system description object +- * @lnum: node logical eraseblock number +- * @offs: node offset +- * @sqnum: sequence number +- * @free: amount of free space in bud +- * @dirty: amount of dirty space from padding and deletion nodes +- * +- * This function inserts a reference node to the replay tree and returns zero +- * in case of success or a negative error code in case of failure. +- */ +-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, +- unsigned long long sqnum, int free, int dirty) +-{ +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +- struct replay_entry *r; +- +- dbg_mnt("add ref LEB %d:%d", lnum, offs); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay tree"); +- return -EINVAL; +- } +- +- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +- if (!r) +- return -ENOMEM; +- +- r->lnum = lnum; +- r->offs = offs; +- r->sqnum = sqnum; +- r->flags = REPLAY_REF; +- r->free = free; +- r->dirty = dirty; +- +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); +- return 0; +-} +- +-/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * +@@ -704,17 +701,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, + static int replay_buds(struct ubifs_info *c) + { + struct bud_entry *b; +- int err, uninitialized_var(free), uninitialized_var(dirty); ++ int err; ++ unsigned long long prev_sqnum = 0; + + list_for_each_entry(b, &c->replay_buds, list) { +- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, +- &free, &dirty); +- if (err) +- return err; +- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, +- free, dirty); ++ err = replay_bud(c, b); + if (err) + return err; ++ ++ ubifs_assert(b->sqnum > prev_sqnum); ++ prev_sqnum = b->sqnum; + } + + return 0; +@@ -1054,25 +1050,29 @@ int ubifs_replay_journal(struct ubifs_info *c) + if (err) + goto out; + +- err = apply_replay_tree(c); ++ err = apply_replay_list(c); ++ if (err) ++ goto out; ++ ++ err = set_buds_lprops(c); + if (err) + goto out; + + /* +- * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable +- * to roughly estimate index growth. Things like @c->min_idx_lebs ++ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable ++ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ +- c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); +- c->budg_uncommitted_idx *= c->max_idx_node_sz; ++ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); ++ c->bi.uncommitted_idx *= c->max_idx_node_sz; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + (unsigned long)c->highest_inum); + out: +- destroy_replay_tree(c); ++ destroy_replay_list(c); + destroy_bud_list(c); + c->replaying = 0; + return err; +diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c +index bf31b47..771f7fb 100644 +--- a/fs/ubifs/sb.c ++++ b/fs/ubifs/sb.c +@@ -247,7 +247,7 @@ static int create_default_filesystem(struct ubifs_info *c) + mst->total_dirty = cpu_to_le64(tmp64); + + /* The indexing LEB does not contribute to dark space */ +- tmp64 = (c->main_lebs - 1) * c->dark_wm; ++ tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); + mst->total_dark = cpu_to_le64(tmp64); + + mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); +@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) + } + + if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { +- err = 7; ++ ubifs_err("too few main LEBs count %d, must be at least %d", ++ c->main_lebs, UBIFS_MIN_MAIN_LEBS); + goto failed; + } + +- if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || +- c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { +- err = 8; ++ max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; ++ if (c->max_bud_bytes < max_bytes) { ++ ubifs_err("too small journal (%lld bytes), must be at least " ++ "%lld bytes", c->max_bud_bytes, max_bytes); ++ goto failed; ++ } ++ ++ max_bytes = (long long)c->leb_size * c->main_lebs; ++ if (c->max_bud_bytes > max_bytes) { ++ ubifs_err("too large journal size (%lld bytes), only %lld bytes" ++ "available in the main area", ++ c->max_bud_bytes, max_bytes); + goto failed; + } + +@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) + goto failed; + } + +- max_bytes = c->main_lebs * (long long)c->leb_size; + if (c->rp_size < 0 || max_bytes < c->rp_size) { + err = 14; + goto failed; +@@ -475,7 +484,8 @@ failed: + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error +- * code. ++ * code. Note, the user of this function is responsible of kfree()'ing the ++ * returned superblock buffer. + */ + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) + { +@@ -616,6 +626,7 @@ int ubifs_read_superblock(struct ubifs_info *c) + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + memcpy(&c->uuid, &sup->uuid, 16); + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); ++ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; +@@ -650,3 +661,152 @@ out: + kfree(sup); + return err; + } ++ ++/** ++ * fixup_leb - fixup/unmap an LEB containing free space. ++ * @c: UBIFS file-system description object ++ * @lnum: the LEB number to fix up ++ * @len: number of used bytes in LEB (starting at offset 0) ++ * ++ * This function reads the contents of the given LEB number @lnum, then fixes ++ * it up, so that empty min. I/O units in the end of LEB are actually erased on ++ * flash (rather than being just all-0xff real data). If the LEB is completely ++ * empty, it is simply unmapped. ++ */ ++static int fixup_leb(struct ubifs_info *c, int lnum, int len) ++{ ++ int err; ++ ++ ubifs_assert(len >= 0); ++ ubifs_assert(len % c->min_io_size == 0); ++ ubifs_assert(len < c->leb_size); ++ ++ if (len == 0) { ++ dbg_mnt("unmap empty LEB %d", lnum); ++ return ubifs_leb_unmap(c, lnum); ++ } ++ ++ dbg_mnt("fixup LEB %d, data len %d", lnum, len); ++ err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); ++ if (err) ++ return err; ++ ++ return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); ++} ++ ++/** ++ * fixup_free_space - find & remap all LEBs containing free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function walks through all LEBs in the filesystem and fiexes up those ++ * containing free/empty space. ++ */ ++static int fixup_free_space(struct ubifs_info *c) ++{ ++ int lnum, err = 0; ++ struct ubifs_lprops *lprops; ++ ++ ubifs_get_lprops(c); ++ ++ /* Fixup LEBs in the master area */ ++ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { ++ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); ++ if (err) ++ goto out; ++ } ++ ++ /* Unmap unused log LEBs */ ++ lnum = ubifs_next_log_lnum(c, c->lhead_lnum); ++ while (lnum != c->ltail_lnum) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ lnum = ubifs_next_log_lnum(c, lnum); ++ } ++ ++ /* Fixup the current log head */ ++ err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); ++ if (err) ++ goto out; ++ ++ /* Fixup LEBs in the LPT area */ ++ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { ++ int free = c->ltab[lnum - c->lpt_first].free; ++ ++ if (free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - free); ++ if (err) ++ goto out; ++ } ++ } ++ ++ /* Unmap LEBs in the orphans area */ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ } ++ ++ /* Fixup LEBs in the main area */ ++ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { ++ lprops = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ if (lprops->free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - lprops->free); ++ if (err) ++ goto out; ++ } ++ } ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_fixup_free_space - find & fix all LEBs with free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function fixes up LEBs containing free space on first mount, if the ++ * appropriate flag was set when the FS was created. Each LEB with one or more ++ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure ++ * the free space is actually erased. E.g., this is necessary for some NAND ++ * chips, since the free space may have been programmed like real "0xff" data ++ * (generating a non-0xff ECC), causing future writes to the not-really-erased ++ * NAND pages to behave badly. After the space is fixed up, the superblock flag ++ * is cleared, so that this is skipped for all future mounts. ++ */ ++int ubifs_fixup_free_space(struct ubifs_info *c) ++{ ++ int err; ++ struct ubifs_sb_node *sup; ++ ++ ubifs_assert(c->space_fixup); ++ ubifs_assert(!c->ro_mount); ++ ++ ubifs_msg("start fixing up free space"); ++ ++ err = fixup_free_space(c); ++ if (err) ++ return err; ++ ++ sup = ubifs_read_sb_node(c); ++ if (IS_ERR(sup)) ++ return PTR_ERR(sup); ++ ++ /* Free-space fixup is no longer required */ ++ c->space_fixup = 0; ++ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); ++ ++ err = ubifs_write_sb_node(c, sup); ++ kfree(sup); ++ if (err) ++ return err; ++ ++ ubifs_msg("free space fixup complete"); ++ return err; ++} +diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c +index 3e1ee57..37383e8 100644 +--- a/fs/ubifs/scan.c ++++ b/fs/ubifs/scan.c +@@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + INIT_LIST_HEAD(&sleb->nodes); + sleb->buf = sbuf; + +- err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); ++ err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); + if (err && err != -EBADMSG) { + ubifs_err("cannot read %d bytes from LEB %d:%d," + " error %d", c->leb_size - offs, lnum, offs, err); +@@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + int len; + + ubifs_err("corruption at LEB %d:%d", lnum, offs); +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + len = c->leb_size - offs; + if (len > 8192) +@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + if (!quiet) + ubifs_err("empty space starts at non-aligned offset %d", + offs); +- goto corrupted;; ++ goto corrupted; + } + + ubifs_end_scan(c, sleb, lnum, offs); +diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c +index 46961c0..d8f5d0f 100644 +--- a/fs/ubifs/shrinker.c ++++ b/fs/ubifs/shrinker.c +@@ -283,7 +283,11 @@ int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); + + if (nr == 0) +- return clean_zn_cnt; ++ /* ++ * Due to the way UBIFS updates the clean znode counter it may ++ * temporarily be negative. ++ */ ++ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + + if (!clean_zn_cnt) { + /* +diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c +index 91fac54..83651cd 100644 +--- a/fs/ubifs/super.c ++++ b/fs/ubifs/super.c +@@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + +- if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) ++ if (ui->xattr && !S_ISREG(inode->i_mode)) + return 5; + + if (!ubifs_compr_present(ui->compr_type)) { +@@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) + ubifs_compr_name(ui->compr_type)); + } + +- err = dbg_check_dir_size(c, inode); ++ err = dbg_check_dir(c, inode); + return err; + } + +@@ -367,7 +367,7 @@ out: + ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + done: +@@ -504,9 +504,12 @@ static int init_constants_early(struct ubifs_info *c) + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; ++ c->leb_start = c->di.leb_start; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; ++ c->max_write_size = c->di.max_write_size; ++ c->max_write_shift = fls(c->max_write_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", +@@ -526,6 +529,18 @@ static int init_constants_early(struct ubifs_info *c) + } + + /* ++ * Maximum write size has to be greater or equivalent to min. I/O ++ * size, and be multiple of min. I/O size. ++ */ ++ if (c->max_write_size < c->min_io_size || ++ c->max_write_size % c->min_io_size || ++ !is_power_of_2(c->max_write_size)) { ++ ubifs_err("bad write buffer size %d for %d min. I/O unit", ++ c->max_write_size, c->min_io_size); ++ return -EINVAL; ++ } ++ ++ /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. +@@ -533,6 +548,10 @@ static int init_constants_early(struct ubifs_info *c) + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; ++ if (c->max_write_size < c->min_io_size) { ++ c->max_write_size = c->min_io_size; ++ c->max_write_shift = c->min_io_shift; ++ } + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); +@@ -667,11 +686,11 @@ static int init_constants_sb(struct ubifs_info *c) + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so +- * it is not included into 'c->inode_budget'. ++ * it is not included into 'c->bi.inode_budget'. + */ +- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; +- c->inode_budget = UBIFS_INO_NODE_SZ; +- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; ++ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; ++ c->bi.inode_budget = UBIFS_INO_NODE_SZ; ++ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes +@@ -715,7 +734,7 @@ static void init_constants_master(struct ubifs_info *c) + { + long long tmp64; + +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + /* +@@ -784,15 +803,18 @@ static int alloc_wbufs(struct ubifs_info *c) + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; ++ c->jheads[i].grouped = 1; + } + + c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; + /* + * Garbage Collector head likely contains long-term data and +- * does not need to be synchronized by timer. ++ * does not need to be synchronized by timer. Also GC head nodes are ++ * not grouped. + */ + c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; + c->jheads[GCHD].wbuf.no_timer = 1; ++ c->jheads[GCHD].grouped = 0; + + return 0; + } +@@ -884,7 +906,7 @@ static int check_volume_empty(struct ubifs_info *c) + + c->empty = 1; + for (lnum = 0; lnum < c->leb_cnt; lnum++) { +- err = ubi_is_mapped(c->ubi, lnum); ++ err = ubifs_is_mapped(c, lnum); + if (unlikely(err < 0)) + return err; + if (err == 1) { +@@ -1117,8 +1139,8 @@ static int check_free_space(struct ubifs_info *c) + { + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { +- ubifs_err("insufficient free space to mount in read/write mode"); +- dbg_dump_budg(c); ++ ubifs_err("insufficient free space to mount in R/W mode"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + return -ENOSPC; + } +@@ -1194,11 +1216,14 @@ static int mount_ubifs(struct ubifs_info *c) + if (c->bulk_read == 1) + bu_init(c); + +- /* +- * We have to check all CRCs, even for data nodes, when we mount the FS +- * (specifically, when we are replaying). +- */ +- c->always_chk_crc = 1; ++ if (!c->ro_mount) { ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, ++ GFP_KERNEL); ++ if (!c->write_reserve_buf) ++ goto out_free; ++ } ++ ++ c->mounting = 1; + + err = ubifs_read_superblock(c); + if (err) +@@ -1227,12 +1252,12 @@ static int mount_ubifs(struct ubifs_info *c) + goto out_free; + } + ++ err = alloc_wbufs(c); ++ if (err) ++ goto out_cbuf; ++ + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); + if (!c->ro_mount) { +- err = alloc_wbufs(c); +- if (err) +- goto out_cbuf; +- + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { +@@ -1254,12 +1279,25 @@ static int mount_ubifs(struct ubifs_info *c) + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; +- if (!c->ro_mount) { +- err = ubifs_recover_inl_heads(c, c->sbuf); +- if (err) +- goto out_master; +- } +- } else if (!c->ro_mount) { ++ } ++ ++ if (c->need_recovery && !c->ro_mount) { ++ err = ubifs_recover_inl_heads(c, c->sbuf); ++ if (err) ++ goto out_master; ++ } ++ ++ err = ubifs_lpt_init(c, 1, !c->ro_mount); ++ if (err) ++ goto out_master; ++ ++ if (!c->ro_mount && c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out_master; ++ } ++ ++ if (!c->ro_mount) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. +@@ -1267,14 +1305,10 @@ static int mount_ubifs(struct ubifs_info *c) + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) +- goto out_master; ++ goto out_lpt; + } + +- err = ubifs_lpt_init(c, 1, !c->ro_mount); +- if (err) +- goto out_lpt; +- +- err = dbg_check_idx_size(c, c->old_idx_sz); ++ err = dbg_check_idx_size(c, c->bi.old_idx_sz); + if (err) + goto out_lpt; + +@@ -1283,7 +1317,7 @@ static int mount_ubifs(struct ubifs_info *c) + goto out_journal; + + /* Calculate 'min_idx_lebs' after journal replay */ +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); + if (err) +@@ -1374,7 +1408,7 @@ static int mount_ubifs(struct ubifs_info *c) + if (err) + goto out_infos; + +- c->always_chk_crc = 0; ++ c->mounting = 0; + + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); +@@ -1395,6 +1429,7 @@ static int mount_ubifs(struct ubifs_info *c) + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); ++ dbg_msg("max. write size: %d bytes", c->max_write_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size >> 10); + dbg_msg("data journal heads: %d", +@@ -1411,7 +1446,8 @@ static int mount_ubifs(struct ubifs_info *c) + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("index LEBs: %d", c->lst.idx_lebs); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", +- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); ++ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, ++ c->bi.old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); +@@ -1424,9 +1460,9 @@ static int mount_ubifs(struct ubifs_info *c) + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); +- dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", +- UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, +- UBIFS_MAX_DENT_NODE_SZ); ++ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", ++ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, ++ UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); +@@ -1466,6 +1502,7 @@ out_wbufs: + out_cbuf: + kfree(c->cbuf); + out_free: ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1504,6 +1541,7 @@ static void ubifs_umount(struct ubifs_info *c) + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1535,7 +1573,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) + mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); + c->remounting_rw = 1; +- c->always_chk_crc = 1; ++ c->ro_mount = 0; + + err = check_free_space(c); + if (err) +@@ -1551,6 +1589,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); ++ kfree(sup); + if (err) + goto out; + } +@@ -1590,16 +1629,14 @@ static int ubifs_remount_rw(struct ubifs_info *c) + goto out; + } + +- err = ubifs_lpt_init(c, 0, 1); +- if (err) ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); ++ if (!c->write_reserve_buf) + goto out; + +- err = alloc_wbufs(c); ++ err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + +- ubifs_create_buds_lists(c); +- + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { +@@ -1634,20 +1671,37 @@ static int ubifs_remount_rw(struct ubifs_info *c) + if (err) + goto out; + ++ dbg_gen("re-mounted read-write"); ++ c->remounting_rw = 0; ++ + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); ++ } else { ++ /* ++ * Do not run the debugging space check if the were doing ++ * recovery, because when we saved the information we had the ++ * file-system in a state where the TNC and lprops has been ++ * modified in memory, but all the I/O operations (including a ++ * commit) were deferred. So the file-system was in ++ * "non-committed" state. Now the file-system is in committed ++ * state, and of course the amount of free space will change ++ * because, for example, the old index size was imprecise. ++ */ ++ err = dbg_check_space_info(c); ++ } ++ ++ if (c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out; + } + +- dbg_gen("re-mounted read-write"); +- c->ro_mount = 0; +- c->remounting_rw = 0; +- c->always_chk_crc = 0; +- err = dbg_check_space_info(c); + mutex_unlock(&c->umount_mutex); + return err; + + out: ++ c->ro_mount = 1; + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { +@@ -1655,11 +1709,12 @@ out: + c->bgt = NULL; + } + free_wbufs(c); ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; +- c->always_chk_crc = 0; + mutex_unlock(&c->umount_mutex); + return err; + } +@@ -1696,9 +1751,10 @@ static void ubifs_remount_ro(struct ubifs_info *c) + if (err) + ubifs_ro_mode(c, err); + +- free_wbufs(c); + vfree(c->orph_buf); + c->orph_buf = NULL; ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); +@@ -1722,10 +1778,11 @@ static void ubifs_put_super(struct super_block *sb) + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ +- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); +- ubifs_assert(c->budg_idx_growth == 0); +- ubifs_assert(c->budg_dd_growth == 0); +- ubifs_assert(c->budg_data_growth == 0); ++ if (!c->ro_error) { ++ ubifs_assert(c->bi.idx_growth == 0); ++ ubifs_assert(c->bi.dd_growth == 0); ++ ubifs_assert(c->bi.data_growth == 0); ++ } + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker +@@ -1929,6 +1986,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); ++ mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; +@@ -1946,6 +2004,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); ++ c->no_chk_data_crc = 1; + + c->vfs_sb = sb; + c->highest_inum = UBIFS_FIRST_INO; +diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c +index ad9cf01..16ad84d 100644 +--- a/fs/ubifs/tnc.c ++++ b/fs/ubifs/tnc.c +@@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, + __set_bit(DIRTY_ZNODE, &zn->flags); + __clear_bit(COW_ZNODE, &zn->flags); + +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(znode)); + __set_bit(OBSOLETE_ZNODE, &znode->flags); + + if (znode->level != 0) { +@@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, + struct ubifs_znode *zn; + int err; + +- if (!test_bit(COW_ZNODE, &znode->flags)) { ++ if (!ubifs_zn_cow(znode)) { + /* znode is not being committed */ + if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { + atomic_long_inc(&c->dirty_zn_cnt); +@@ -344,12 +344,11 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, + return err; + } + +- lnc_node = kmalloc(zbr->len, GFP_NOFS); ++ lnc_node = kmemdup(node, zbr->len, GFP_NOFS); + if (!lnc_node) + /* We don't have to have the cache, so no error */ + return 0; + +- memcpy(lnc_node, node, zbr->len); + zbr->leaf = lnc_node; + return 0; + } +@@ -447,8 +446,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if +- * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always +- * checked. ++ * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to ++ * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is ++ * because during mounting or re-mounting from R/O mode to R/W mode we may read ++ * journal nodes (when replying the journal or doing the recovery) and the ++ * journal nodes may potentially be corrupted, so checking is required. + */ + static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +@@ -459,7 +461,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + +- err = ubi_read(c->ubi, lnum, buf, offs, len); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 1); + if (err) { + ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + type, lnum, offs, err); +@@ -476,7 +478,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, + if (node_len != len) + return 0; + +- if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) ++ if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && ++ !c->remounting_rw) + return 1; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -502,7 +505,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + { + int ret; + +- dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); ++ dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); + + ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, + zbr->offs); +@@ -516,8 +519,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + ret = 0; + } + if (ret == 0 && c->replaying) +- dbg_mnt("dangling branch LEB %d:%d len %d, key %s", +- zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); ++ dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", ++ zbr->lnum, zbr->offs, zbr->len); + return ret; + } + +@@ -992,9 +995,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, + if (adding || !o_znode) + return 0; + +- dbg_mnt("dangling match LEB %d:%d len %d %s", ++ dbg_mntk(key, "dangling match LEB %d:%d len %d key ", + o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, +- o_znode->zbranch[o_n].len, DBGKEY(key)); ++ o_znode->zbranch[o_n].len); + *zn = o_znode; + *n = o_n; + return 1; +@@ -1176,7 +1179,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + +- dbg_tnc("search key %s", DBGKEY(key)); ++ dbg_tnck(key, "search key "); + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); + + znode = c->zroot.znode; +@@ -1312,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + +- dbg_tnc("search and dirty key %s", DBGKEY(key)); ++ dbg_tnck(key, "search and dirty key "); + + znode = c->zroot.znode; + if (unlikely(!znode)) { +@@ -1662,7 +1665,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); +- return ubi_read(c->ubi, lnum, buf, offs, len); ++ return ubifs_leb_read(c, lnum, buf, offs, len, 0); + } + + /* Don't read under wbuf */ +@@ -1676,7 +1679,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, + + if (rlen > 0) + /* Read everything that goes before write-buffer */ +- return ubi_read(c->ubi, lnum, buf, offs, rlen); ++ return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + + return 0; + } +@@ -1719,8 +1722,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, + if (!keys_eq(c, &zbr->key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); +- dbg_tnc("looked for key %s found node's key %s", +- DBGKEY(&zbr->key), DBGKEY1(&key1)); ++ dbg_tnck(&zbr->key, "looked for key "); ++ dbg_tnck(&key1, "found node's key "); + goto out_err; + } + +@@ -1763,7 +1766,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) + if (wbuf) + err = read_wbuf(wbuf, bu->buf, len, lnum, offs); + else +- err = ubi_read(c->ubi, lnum, bu->buf, offs, len); ++ err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); + + /* Check for a race with GC */ + if (maybe_leb_gced(c, lnum, bu->gc_seq)) +@@ -1773,7 +1776,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) + ubifs_err("failed to read from LEB %d:%d, error %d", + lnum, offs, err); + dbg_dump_stack(); +- dbg_tnc("key %s", DBGKEY(&bu->key)); ++ dbg_tnck(&bu->key, "key "); + return err; + } + +@@ -1808,7 +1811,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + int found, n, err; + struct ubifs_znode *znode; + +- dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); ++ dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { +@@ -1982,8 +1985,7 @@ again: + zp = znode->parent; + if (znode->child_cnt < c->fanout) { + ubifs_assert(n != c->fanout); +- dbg_tnc("inserted at %d level %d, key %s", n, znode->level, +- DBGKEY(key)); ++ dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); + + insert_zbranch(znode, zbr, n); + +@@ -1998,7 +2000,7 @@ again: + * Unfortunately, @znode does not have more empty slots and we have to + * split it. + */ +- dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); ++ dbg_tnck(key, "splitting level %d, key ", znode->level); + + if (znode->alt) + /* +@@ -2092,7 +2094,7 @@ do_split: + } + + /* Insert new key and branch */ +- dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); ++ dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); + + insert_zbranch(zi, zbr, n); + +@@ -2168,7 +2170,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); ++ dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); + found = lookup_level0_dirty(c, key, &znode, &n); + if (!found) { + struct ubifs_zbranch zbr; +@@ -2217,8 +2219,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, +- old_offs, lnum, offs, len, DBGKEY(key)); ++ dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, ++ old_offs, lnum, offs, len); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2300,8 +2302,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, +- DBGKEY(key)); ++ dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", ++ lnum, offs, nm->len, nm->name); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2394,7 +2396,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + /* Delete without merge for now */ + ubifs_assert(znode->level == 0); + ubifs_assert(n >= 0 && n < c->fanout); +- dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); ++ dbg_tnck(&znode->zbranch[n].key, "deleting key "); + + zbr = &znode->zbranch[n]; + lnc_free(zbr); +@@ -2419,7 +2421,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + */ + + do { +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(ubifs_zn_dirty(znode)); + + zp = znode->parent; +@@ -2475,9 +2477,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + c->zroot.offs = zbr->offs; + c->zroot.len = zbr->len; + c->zroot.znode = znode; +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, +- &zp->flags)); +- ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(zp)); ++ ubifs_assert(ubifs_zn_dirty(zp)); + atomic_long_dec(&c->dirty_zn_cnt); + + if (zp->cnext) { +@@ -2505,7 +2506,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("key %s", DBGKEY(key)); ++ dbg_tnck(key, "key "); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2536,7 +2537,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); ++ dbg_tnck(key, "%.*s, key ", nm->len, nm->name); + err = lookup_level0_dirty(c, key, &znode, &n); + if (err < 0) + goto out_unlock; +@@ -2553,11 +2554,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { +- znode = dirty_cow_bottom_up(c, znode); +- if (IS_ERR(znode)) { +- err = PTR_ERR(znode); +- goto out_unlock; +- } ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } + } + err = tnc_delete(c, znode, n); + } +@@ -2651,7 +2652,7 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + dbg_dump_znode(c, znode); + goto out_unlock; + } +- dbg_tnc("removing %s", DBGKEY(key)); ++ dbg_tnck(key, "removing key "); + } + if (k) { + for (i = n + 1 + k; i < znode->child_cnt; i++) +@@ -2771,7 +2772,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + struct ubifs_zbranch *zbr; + union ubifs_key *dkey; + +- dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); ++ dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); + ubifs_assert(is_hash_key(c, key)); + + mutex_lock(&c->tnc_mutex); +@@ -2861,7 +2862,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) + struct ubifs_znode *znode = cnext; + + cnext = cnext->cnext; +- if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ if (ubifs_zn_obsolete(znode)) + kfree(znode); + } while (cnext && cnext != c->cnext); + } +@@ -2872,12 +2873,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) + */ + void ubifs_tnc_close(struct ubifs_info *c) + { +- long clean_freed; +- + tnc_destroy_cnext(c); + if (c->zroot.znode) { +- clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); +- atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); ++ long n; ++ ++ ubifs_destroy_tnc_subtree(c->zroot.znode); ++ n = atomic_long_read(&c->clean_zn_cnt); ++ atomic_long_sub(n, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); +@@ -3296,7 +3298,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + + if (!S_ISREG(inode->i_mode)) + return 0; +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; +@@ -3329,12 +3331,13 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + + out_dump: + block = key_block(c, key); +- ubifs_err("inode %lu has size %lld, but there are data at offset %lld " +- "(data key %s)", (unsigned long)inode->i_ino, size, +- ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); ++ ubifs_err("inode %lu has size %lld, but there are data at offset %lld", ++ (unsigned long)inode->i_ino, size, ++ ((loff_t)block) << UBIFS_BLOCK_SHIFT); ++ mutex_unlock(&c->tnc_mutex); + dbg_dump_inode(c, inode); + dbg_dump_stack(); +- err = -EINVAL; ++ return -EINVAL; + + out_unlock: + mutex_unlock(&c->tnc_mutex); +diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c +index 53288e5..4c15f07 100644 +--- a/fs/ubifs/tnc_commit.c ++++ b/fs/ubifs/tnc_commit.c +@@ -22,6 +22,7 @@ + + /* This file implements TNC functions for committing */ + ++#include <linux/random.h> + #include "ubifs.h" + + /** +@@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, + atomic_long_dec(&c->dirty_zn_cnt); + + ubifs_assert(ubifs_zn_dirty(znode)); +- ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(ubifs_zn_cow(znode)); + ++ /* ++ * Note, unlike 'write_index()' we do not add memory barriers here ++ * because this function is called with @c->tnc_mutex locked. ++ */ + __clear_bit(DIRTY_ZNODE, &znode->flags); + __clear_bit(COW_ZNODE, &znode->flags); + +@@ -377,15 +382,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) + c->gap_lebs = NULL; + return err; + } +- if (!dbg_force_in_the_gaps_enabled) { ++ if (!dbg_is_chk_index(c)) { + /* + * Do not print scary warnings if the debugging + * option which forces in-the-gaps is enabled. + */ +- ubifs_err("out of space"); +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_warn("out of space"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ +@@ -493,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c) + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +- if (c->min_io_size == 1) { +- buf_offs += ALIGN(len, 8); +- if (next_len) { +- if (buf_offs + next_len <= c->leb_size) +- continue; +- err = ubifs_update_one_lp(c, lnum, 0, +- c->leb_size - buf_offs, 0, 0); +- if (err) +- return err; +- lnum = -1; +- continue; +- } +- err = ubifs_update_one_lp(c, lnum, +- c->leb_size - buf_offs, 0, 0, 0); +- if (err) +- return err; +- break; +- } +- + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); +@@ -660,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) + } + cnt += 1; + while (1) { +- ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_cow(znode)); + __set_bit(COW_ZNODE, &znode->flags); + znode->alt = 0; + cnext = find_next_dirty(znode); +@@ -706,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) + c->ilebs[c->ileb_cnt++] = lnum; + dbg_cmt("LEB %d", lnum); + } +- if (dbg_force_in_the_gaps()) ++ if (dbg_is_chk_index(c) && !(random32() & 7)) + return -ENOSPC; + return 0; + } +@@ -796,16 +780,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) + spin_lock(&c->space_lock); + /* + * Although we have not finished committing yet, update size of the +- * committed index ('c->old_idx_sz') and zero out the index growth ++ * committed index ('c->bi.old_idx_sz') and zero out the index growth + * budget. It is OK to do this now, because we've reserved all the + * space which is needed to commit the index, and it is save for the + * budgeting subsystem to assume the index is already committed, + * even though it is not. + */ +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- c->old_idx_sz = c->calc_idx_sz; +- c->budg_uncommitted_idx = 0; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ c->bi.old_idx_sz = c->calc_idx_sz; ++ c->bi.uncommitted_idx = 0; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + mutex_unlock(&c->tnc_mutex); + +@@ -832,7 +816,7 @@ static int write_index(struct ubifs_info *c) + struct ubifs_idx_node *idx; + struct ubifs_znode *znode, *cnext; + int i, lnum, offs, len, next_len, buf_len, buf_offs, used; +- int avail, wlen, err, lnum_pos = 0; ++ int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; + + cnext = c->enext; + if (!cnext) +@@ -909,7 +893,7 @@ static int write_index(struct ubifs_info *c) + cnext = znode->cnext; + + ubifs_assert(ubifs_zn_dirty(znode)); +- ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(ubifs_zn_cow(znode)); + + /* + * It is important that other threads should see %DIRTY_ZNODE +@@ -924,6 +908,28 @@ static int write_index(struct ubifs_info *c) + clear_bit(COW_ZNODE, &znode->flags); + smp_mb__after_clear_bit(); + ++ /* ++ * We have marked the znode as clean but have not updated the ++ * @c->clean_zn_cnt counter. If this znode becomes dirty again ++ * before 'free_obsolete_znodes()' is called, then ++ * @c->clean_zn_cnt will be decremented before it gets ++ * incremented (resulting in 2 decrements for the same znode). ++ * This means that @c->clean_zn_cnt may become negative for a ++ * while. ++ * ++ * Q: why we cannot increment @c->clean_zn_cnt? ++ * A: because we do not have the @c->tnc_mutex locked, and the ++ * following code would be racy and buggy: ++ * ++ * if (!ubifs_zn_obsolete(znode)) { ++ * atomic_long_inc(&c->clean_zn_cnt); ++ * atomic_long_inc(&ubifs_clean_zn_cnt); ++ * } ++ * ++ * Thus, we just delay the @c->clean_zn_cnt update until we ++ * have the mutex locked. ++ */ ++ + /* Do not access znode from this point on */ + + /* Update buffer positions */ +@@ -940,65 +946,38 @@ static int write_index(struct ubifs_info *c) + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +- if (c->min_io_size == 1) { +- /* +- * Write the prepared index node immediately if there is +- * no minimum IO size +- */ +- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +- wlen, UBI_SHORTTERM); +- if (err) +- return err; +- buf_offs += ALIGN(wlen, 8); +- if (next_len) { +- used = 0; +- avail = buf_len; +- if (buf_offs + next_len > c->leb_size) { +- err = ubifs_update_one_lp(c, lnum, +- LPROPS_NC, 0, 0, LPROPS_TAKEN); +- if (err) +- return err; +- lnum = -1; +- } ++ nxt_offs = buf_offs + used + next_len; ++ if (next_len && nxt_offs <= c->leb_size) { ++ if (avail > 0) + continue; +- } ++ else ++ blen = buf_len; + } else { +- int blen, nxt_offs = buf_offs + used + next_len; +- +- if (next_len && nxt_offs <= c->leb_size) { +- if (avail > 0) +- continue; +- else +- blen = buf_len; +- } else { +- wlen = ALIGN(wlen, 8); +- blen = ALIGN(wlen, c->min_io_size); +- ubifs_pad(c, c->cbuf + wlen, blen - wlen); +- } +- /* +- * The buffer is full or there are no more znodes +- * to do +- */ +- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +- blen, UBI_SHORTTERM); +- if (err) +- return err; +- buf_offs += blen; +- if (next_len) { +- if (nxt_offs > c->leb_size) { +- err = ubifs_update_one_lp(c, lnum, +- LPROPS_NC, 0, 0, LPROPS_TAKEN); +- if (err) +- return err; +- lnum = -1; +- } +- used -= blen; +- if (used < 0) +- used = 0; +- avail = buf_len - used; +- memmove(c->cbuf, c->cbuf + blen, used); +- continue; ++ wlen = ALIGN(wlen, 8); ++ blen = ALIGN(wlen, c->min_io_size); ++ ubifs_pad(c, c->cbuf + wlen, blen - wlen); ++ } ++ ++ /* The buffer is full or there are no more znodes to do */ ++ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, ++ UBI_SHORTTERM); ++ if (err) ++ return err; ++ buf_offs += blen; ++ if (next_len) { ++ if (nxt_offs > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, ++ 0, LPROPS_TAKEN); ++ if (err) ++ return err; ++ lnum = -1; + } ++ used -= blen; ++ if (used < 0) ++ used = 0; ++ avail = buf_len - used; ++ memmove(c->cbuf, c->cbuf + blen, used); ++ continue; + } + break; + } +@@ -1031,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) + do { + znode = cnext; + cnext = znode->cnext; +- if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ if (ubifs_zn_obsolete(znode)) + kfree(znode); + else { + znode->cnext = NULL; +diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c +index b48db99..dc28fe6 100644 +--- a/fs/ubifs/tnc_misc.c ++++ b/fs/ubifs/tnc_misc.c +@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, + case UBIFS_XENT_KEY: + break; + default: +- dbg_msg("bad key type at slot %d: %s", i, +- DBGKEY(&zbr->key)); ++ dbg_msg("bad key type at slot %d: %d", ++ i, key_type(c, &zbr->key)); + err = 3; + goto out_dump; + } +@@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + zbr->offs); + + if (err) { +- dbg_tnc("key %s", DBGKEY(key)); ++ dbg_tnck(key, "key "); + return err; + } + +@@ -484,8 +484,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + if (!keys_eq(c, key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); +- dbg_tnc("looked for key %s found node's key %s", +- DBGKEY(key), DBGKEY1(&key1)); ++ dbg_tnck(key, "looked for key "); ++ dbg_tnck(&key1, "but found node's key "); + dbg_dump_node(c, node); + return -EINVAL; + } +diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h +index 191ca78..e24380c 100644 +--- a/fs/ubifs/ubifs-media.h ++++ b/fs/ubifs/ubifs-media.h +@@ -408,9 +408,11 @@ enum { + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set ++ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed + */ + enum { + UBIFS_FLG_BIGLPT = 0x02, ++ UBIFS_FLG_SPACE_FIXUP = 0x04, + }; + + /** +@@ -434,7 +436,7 @@ struct ubifs_ch { + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * union ubifs_dev_desc - device node descriptor. +@@ -448,7 +450,7 @@ struct ubifs_ch { + union ubifs_dev_desc { + __le32 new; + __le64 huge; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ino_node - inode node. +@@ -509,7 +511,7 @@ struct ubifs_ino_node { + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_dent_node - directory entry node. +@@ -534,7 +536,7 @@ struct ubifs_dent_node { + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_data_node - data node. +@@ -555,7 +557,7 @@ struct ubifs_data_node { + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_trun_node - truncation node. +@@ -575,7 +577,7 @@ struct ubifs_trun_node { + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_pad_node - padding node. +@@ -586,7 +588,7 @@ struct ubifs_trun_node { + struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_sb_node - superblock node. +@@ -644,7 +646,7 @@ struct ubifs_sb_node { + __u8 uuid[16]; + __le32 ro_compat_version; + __u8 padding2[3968]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_mst_node - master node. +@@ -711,7 +713,7 @@ struct ubifs_mst_node { + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ref_node - logical eraseblock reference node. +@@ -727,7 +729,7 @@ struct ubifs_ref_node { + __le32 offs; + __le32 jhead; + __u8 padding[28]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_branch - key/reference/length branch +@@ -741,7 +743,7 @@ struct ubifs_branch { + __le32 offs; + __le32 len; + __u8 key[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_idx_node - indexing node. +@@ -755,7 +757,7 @@ struct ubifs_idx_node { + __le16 child_cnt; + __le16 level; + __u8 branches[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_cs_node - commit start node. +@@ -765,7 +767,7 @@ struct ubifs_idx_node { + struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_orph_node - orphan node. +@@ -777,6 +779,6 @@ struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +-} __attribute__ ((packed)); ++} __packed; + + #endif /* __UBIFS_MEDIA_H__ */ +diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h +index 381d6b2..caf9e4b 100644 +--- a/fs/ubifs/ubifs.h ++++ b/fs/ubifs/ubifs.h +@@ -84,9 +84,6 @@ + #define INUM_WARN_WATERMARK 0xFFF00000 + #define INUM_WATERMARK 0xFFFFFF00 + +-/* Largest key size supported in this implementation */ +-#define CUR_MAX_KEY_LEN UBIFS_SK_LEN +- + /* Maximum number of entries in each LPT (LEB category) heap */ + #define LPT_HEAP_SZ 256 + +@@ -151,6 +148,12 @@ + */ + #define WORST_COMPR_FACTOR 2 + ++/* ++ * How much memory is needed for a buffer where we comress a data node. ++ */ ++#define COMPRESSED_DATA_NODE_BUF_SZ \ ++ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) ++ + /* Maximum expected tree height for use by bottom_up_buf */ + #define BOTTOM_UP_HEIGHT 64 + +@@ -224,14 +227,14 @@ enum { + * LPT cnode flag bits. + * + * DIRTY_CNODE: cnode is dirty +- * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), +- * so it can (and must) be freed when the commit is finished ++ * so it can (and must) be freed when the commit is finished ++ * COW_CNODE: cnode is being committed and must be copied before writing + */ + enum { + DIRTY_CNODE = 0, +- COW_CNODE = 1, +- OBSOLETE_CNODE = 2, ++ OBSOLETE_CNODE = 1, ++ COW_CNODE = 2, + }; + + /* +@@ -271,10 +274,10 @@ struct ubifs_old_idx { + + /* The below union makes it easier to deal with keys */ + union ubifs_key { +- uint8_t u8[CUR_MAX_KEY_LEN]; +- uint32_t u32[CUR_MAX_KEY_LEN/4]; +- uint64_t u64[CUR_MAX_KEY_LEN/8]; +- __le32 j32[CUR_MAX_KEY_LEN/4]; ++ uint8_t u8[UBIFS_SK_LEN]; ++ uint32_t u32[UBIFS_SK_LEN/4]; ++ uint64_t u64[UBIFS_SK_LEN/8]; ++ __le32 j32[UBIFS_SK_LEN/4]; + }; + + /** +@@ -383,9 +386,9 @@ struct ubifs_gced_idx_leb { + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it +- * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock +- * with 'ubifs_writepage()' (see file.c). All the other inode fields are +- * changed under @ui_mutex, so they do not need "shadow" fields. Note, one ++ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would ++ * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields ++ * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ + struct ubifs_inode { +@@ -646,6 +649,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used: number of used bytes in the write-buffer ++ * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep +@@ -680,6 +684,7 @@ struct ubifs_wbuf { + int offs; + int avail; + int used; ++ int size; + int dtype; + int jhead; + int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); +@@ -714,12 +719,14 @@ struct ubifs_bud { + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head ++ * @grouped: non-zero if UBIFS groups nodes when writing to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ + struct ubifs_jhead { + struct ubifs_wbuf wbuf; + struct list_head buds_list; ++ unsigned int grouped:1; + }; + + /** +@@ -929,6 +936,40 @@ struct ubifs_mount_opts { + unsigned int compr_type:2; + }; + ++/** ++ * struct ubifs_budg_info - UBIFS budgeting information. ++ * @idx_growth: amount of bytes budgeted for index growth ++ * @data_growth: amount of bytes budgeted for cached data ++ * @dd_growth: amount of bytes budgeted for cached data that will make ++ * other data dirty ++ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but ++ * which still have to be taken into account because the index ++ * has not been committed so far ++ * @old_idx_sz: size of index on flash ++ * @min_idx_lebs: minimum number of LEBs required for the index ++ * @nospace: non-zero if the file-system does not have flash space (used as ++ * optimization) ++ * @nospace_rp: the same as @nospace, but additionally means that even reserved ++ * pool is full ++ * @page_budget: budget for a page (constant, nenver changed after mount) ++ * @inode_budget: budget for an inode (constant, nenver changed after mount) ++ * @dent_budget: budget for a directory entry (constant, nenver changed after ++ * mount) ++ */ ++struct ubifs_budg_info { ++ long long idx_growth; ++ long long data_growth; ++ long long dd_growth; ++ long long uncommitted_idx; ++ unsigned long long old_idx_sz; ++ int min_idx_lebs; ++ unsigned int nospace:1; ++ unsigned int nospace_rp:1; ++ int page_budget; ++ int inode_budget; ++ int dent_budget; ++}; ++ + struct ubifs_debug_info; + + /** +@@ -972,6 +1013,7 @@ struct ubifs_debug_info; + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * + * @big_lpt: flag that LPT is too big to write whole during commit ++ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + * recovery) + * @bulk_read: enable bulk-reads +@@ -1003,6 +1045,11 @@ struct ubifs_debug_info; + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * ++ * @write_reserve_mutex: protects @write_reserve_buf ++ * @write_reserve_buf: on the write path we allocate memory, which might ++ * sometimes be unavailable, in which case we use this ++ * write reserve buffer ++ * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log +@@ -1024,7 +1071,12 @@ struct ubifs_debug_info; + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) ++ * @max_write_shift: number of bits in @max_write_size minus one + * @leb_size: logical eraseblock size in bytes ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) +@@ -1039,32 +1091,14 @@ struct ubifs_debug_info; + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * +- * @budg_idx_growth: amount of bytes budgeted for index growth +- * @budg_data_growth: amount of bytes budgeted for cached data +- * @budg_dd_growth: amount of bytes budgeted for cached data that will make +- * other data dirty +- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, +- * but which still have to be taken into account because +- * the index has not been committed so far +- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, +- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, +- * @nospace, and @nospace_rp; +- * @min_idx_lebs: minimum number of LEBs required for the index +- * @old_idx_sz: size of index on flash ++ * @space_lock: protects @bi and @lst ++ * @lst: lprops statistics ++ * @bi: budgeting information + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) +- * @lst: lprops statistics +- * @nospace: non-zero if the file-system does not have flash space (used as +- * optimization) +- * @nospace_rp: the same as @nospace, but additionally means that even reserved +- * pool is full +- * +- * @page_budget: budget for a page +- * @inode_budget: budget for an inode +- * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash +- * I/O unit ++ * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary +@@ -1166,22 +1200,20 @@ struct ubifs_debug_info; + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * +- * @empty: if the UBI device is empty +- * @replay_tree: temporary tree used during journal replay ++ * @empty: %1 if the UBI device is empty ++ * @need_recovery: %1 if the file-system needs recovery ++ * @replaying: %1 during journal replay ++ * @mounting: %1 while mounting ++ * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed +- * @need_recovery: file-system needs recovery +- * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W + * mode + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted + * FS to R/W mode + * @size_tree: inode size information for recovery +- * @remounting_rw: set while re-mounting from R/O mode to R/W mode +- * @always_chk_crc: always check CRCs (while mounting and remounting to R/W +- * mode) + * @mount_opts: UBIFS-specific mount options + * + * @dbg: debugging-related information +@@ -1221,6 +1253,7 @@ struct ubifs_info { + wait_queue_head_t cmt_wq; + + unsigned int big_lpt:1; ++ unsigned int space_fixup:1; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; +@@ -1250,6 +1283,9 @@ struct ubifs_info { + struct mutex bu_mutex; + struct bu_info bu; + ++ struct mutex write_reserve_mutex; ++ void *write_reserve_buf; ++ + int log_lebs; + long long log_bytes; + int log_last; +@@ -1271,7 +1307,10 @@ struct ubifs_info { + + int min_io_size; + int min_io_shift; ++ int max_write_size; ++ int max_write_shift; + int leb_size; ++ int leb_start; + int half_leb_size; + int idx_leb_size; + int leb_cnt; +@@ -1285,21 +1324,10 @@ struct ubifs_info { + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; + +- long long budg_idx_growth; +- long long budg_data_growth; +- long long budg_dd_growth; +- long long budg_uncommitted_idx; + spinlock_t space_lock; +- int min_idx_lebs; +- unsigned long long old_idx_sz; +- unsigned long long calc_idx_sz; + struct ubifs_lp_stats lst; +- unsigned int nospace:1; +- unsigned int nospace_rp:1; +- +- int page_budget; +- int inode_budget; +- int dent_budget; ++ struct ubifs_budg_info bi; ++ unsigned long long calc_idx_sz; + + int ref_node_alsz; + int mst_node_alsz; +@@ -1402,19 +1430,18 @@ struct ubifs_info { + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ +- int empty; +- struct rb_root replay_tree; ++ unsigned int empty:1; ++ unsigned int need_recovery:1; ++ unsigned int replaying:1; ++ unsigned int mounting:1; ++ unsigned int remounting_rw:1; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; +- int need_recovery; +- int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; +- int remounting_rw; +- int always_chk_crc; + struct ubifs_mount_opts mount_opts; + + #ifdef CONFIG_UBIFS_FS_DEBUG +@@ -1438,6 +1465,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + + /* io.c */ + void ubifs_ro_mode(struct ubifs_info *c, int err); ++int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, ++ int len, int even_ebadmsg); ++int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype); ++int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype); ++int ubifs_leb_unmap(struct ubifs_info *c, int lnum); ++int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); ++int ubifs_is_mapped(const struct ubifs_info *c, int lnum); + int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); + int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype); +@@ -1605,6 +1641,7 @@ int ubifs_write_master(struct ubifs_info *c); + int ubifs_read_superblock(struct ubifs_info *c); + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); + int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); ++int ubifs_fixup_free_space(struct ubifs_info *c); + + /* replay.c */ + int ubifs_validate_entry(struct ubifs_info *c, +@@ -1713,11 +1750,11 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); + int ubifs_recover_master_node(struct ubifs_info *c); + int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped); ++ int offs, void *sbuf, int jhead); + struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); +-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); ++int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); ++int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); + int ubifs_rcvry_gc_commit(struct ubifs_info *c); + int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size); +diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c +index c74400f..2fdc4fa 100644 +--- a/fs/ubifs/xattr.c ++++ b/fs/ubifs/xattr.c +@@ -79,9 +79,9 @@ enum { + SECURITY_XATTR, + }; + +-static const struct inode_operations none_inode_operations; +-static const struct address_space_operations none_address_operations; +-static const struct file_operations none_file_operations; ++static const struct inode_operations empty_iops; ++static const struct file_operations empty_fops; ++static struct address_space_operations empty_aops; + + /** + * create_xattr - create an extended attribute. +@@ -130,20 +130,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, + } + + /* Re-define all operations to be "nothing" */ +- inode->i_mapping->a_ops = &none_address_operations; +- inode->i_op = &none_inode_operations; +- inode->i_fop = &none_file_operations; ++ inode->i_mapping->a_ops = &empty_aops; ++ inode->i_op = &empty_iops; ++ inode->i_fop = &empty_fops; + + inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; + ui = ubifs_inode(inode); + ui->xattr = 1; + ui->flags |= UBIFS_XATTR_FL; +- ui->data = kmalloc(size, GFP_NOFS); ++ ui->data = kmemdup(value, size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_free; + } +- memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + +@@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, + return err; + + kfree(ui->data); +- ui->data = kmalloc(size, GFP_NOFS); ++ ui->data = kmemdup(value, size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_free; + } +- memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + +diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h +index b31bd9e..db4836b 100644 +--- a/include/linux/mtd/ubi.h ++++ b/include/linux/mtd/ubi.h +@@ -21,7 +21,7 @@ + #ifndef __LINUX_UBI_H__ + #define __LINUX_UBI_H__ + +-#include <asm/ioctl.h> ++#include <linux/ioctl.h> + #include <linux/types.h> + #include <mtd/ubi-user.h> + +@@ -87,7 +87,7 @@ enum { + * physical eraseblock size and on how much bytes UBI headers consume. But + * because of the volume alignment (@alignment), the usable size of logical + * eraseblocks if a volume may be less. The following equation is true: +- * @usable_leb_size = LEB size - (LEB size mod @alignment), ++ * @usable_leb_size = LEB size - (LEB size mod @alignment), + * where LEB size is the logical eraseblock size defined by the UBI device. + * + * The alignment is multiple to the minimal flash input/output unit size or %1 +@@ -116,29 +116,53 @@ struct ubi_volume_info { + * struct ubi_device_info - UBI device description data structure. + * @ubi_num: ubi device number + * @leb_size: logical eraseblock size on this UBI device ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @min_io_size: minimal I/O unit size ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) + * @ro_mode: if this device is in read-only mode + * @cdev: UBI character device major and minor numbers + * + * Note, @leb_size is the logical eraseblock size offered by the UBI device. + * Volumes of this UBI device may have smaller logical eraseblock size if their + * alignment is not equivalent to %1. ++ * ++ * The @max_write_size field describes flash write maximum write unit. For ++ * example, NOR flash allows for changing individual bytes, so @min_io_size is ++ * %1. However, it does not mean than NOR flash has to write data byte-by-byte. ++ * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when ++ * writing large chunks of data, they write 64-bytes at a time. Obviously, this ++ * improves write throughput. ++ * ++ * Also, the MTD device may have N interleaved (striped) flash chips ++ * underneath, in which case @min_io_size can be physical min. I/O size of ++ * single flash chip, while @max_write_size can be N * @min_io_size. ++ * ++ * The @max_write_size field is always greater or equivalent to @min_io_size. ++ * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In ++ * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND ++ * page size. + */ + struct ubi_device_info { + int ubi_num; + int leb_size; ++ int leb_start; + int min_io_size; ++ int max_write_size; + int ro_mode; + dev_t cdev; + }; + + /* +- * enum - volume notification types. +- * @UBI_VOLUME_ADDED: volume has been added +- * @UBI_VOLUME_REMOVED: start volume volume +- * @UBI_VOLUME_RESIZED: volume size has been re-sized +- * @UBI_VOLUME_RENAMED: volume name has been re-named +- * @UBI_VOLUME_UPDATED: volume name has been updated ++ * Volume notification types. ++ * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a ++ * volume was created) ++ * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached ++ * or a volume was removed) ++ * @UBI_VOLUME_RESIZED: a volume has been re-sized ++ * @UBI_VOLUME_RENAMED: a volume has been re-named ++ * @UBI_VOLUME_UPDATED: data has been written to a volume + * + * These constants define which type of event has happened when a volume + * notification function is invoked. |