diff options
Diffstat (limited to 'recipes/linux/linux-vuduo2-3.1.1/ubifs_backport.patch')
-rw-r--r-- | recipes/linux/linux-vuduo2-3.1.1/ubifs_backport.patch | 11520 |
1 files changed, 11520 insertions, 0 deletions
diff --git a/recipes/linux/linux-vuduo2-3.1.1/ubifs_backport.patch b/recipes/linux/linux-vuduo2-3.1.1/ubifs_backport.patch new file mode 100644 index 0000000..b3a7c94 --- /dev/null +++ b/recipes/linux/linux-vuduo2-3.1.1/ubifs_backport.patch @@ -0,0 +1,11520 @@ +diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig +index 3cf193f..4dcc752 100644 +--- a/drivers/mtd/ubi/Kconfig ++++ b/drivers/mtd/ubi/Kconfig +@@ -52,6 +52,12 @@ config MTD_UBI_GLUEBI + work on top of UBI. Do not enable this unless you use legacy + software. + +-source "drivers/mtd/ubi/Kconfig.debug" ++config MTD_UBI_DEBUG ++ bool "UBI debugging" ++ depends on SYSFS ++ select DEBUG_FS ++ select KALLSYMS ++ help ++ This option enables UBI debugging. + + endif # MTD_UBI +diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c +index fba2b66..ba73c00 100644 +--- a/drivers/mtd/ubi/build.c ++++ b/drivers/mtd/ubi/build.c +@@ -690,11 +690,25 @@ static int io_init(struct ubi_device *ubi) + ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); + ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + ++ ubi->max_write_size = ubi->mtd->writebufsize; ++ /* ++ * Maximum write size has to be greater or equivalent to min. I/O ++ * size, and be multiple of min. I/O size. ++ */ ++ if (ubi->max_write_size < ubi->min_io_size || ++ ubi->max_write_size % ubi->min_io_size || ++ !is_power_of_2(ubi->max_write_size)) { ++ ubi_err("bad write buffer size %d for %d min. I/O unit", ++ ubi->max_write_size, ubi->min_io_size); ++ return -EINVAL; ++ } ++ + /* Calculate default aligned sizes of EC and VID headers */ + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + + dbg_msg("min_io_size %d", ubi->min_io_size); ++ dbg_msg("max_write_size %d", ubi->max_write_size); + dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); +@@ -711,7 +725,7 @@ static int io_init(struct ubi_device *ubi) + } + + /* Similar for the data offset */ +- ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; ++ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + + dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); +@@ -923,31 +937,26 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) + spin_lock_init(&ubi->volumes_lock); + + ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num); ++ dbg_msg("sizeof(struct ubi_scan_leb) %zu", sizeof(struct ubi_scan_leb)); ++ dbg_msg("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry)); + + err = io_init(ubi); + if (err) + goto out_free; + + err = -ENOMEM; +- ubi->peb_buf1 = vmalloc(ubi->peb_size); +- if (!ubi->peb_buf1) ++ ubi->peb_buf = vmalloc(ubi->peb_size); ++ if (!ubi->peb_buf) + goto out_free; + +- ubi->peb_buf2 = vmalloc(ubi->peb_size); +- if (!ubi->peb_buf2) +- goto out_free; +- +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +- mutex_init(&ubi->dbg_buf_mutex); +- ubi->dbg_peb_buf = vmalloc(ubi->peb_size); +- if (!ubi->dbg_peb_buf) ++ err = ubi_debugging_init_dev(ubi); ++ if (err) + goto out_free; +-#endif + + err = attach_by_scanning(ubi); + if (err) { + dbg_err("failed to attach by scanning, error %d", err); +- goto out_free; ++ goto out_debugging; + } + + if (ubi->autoresize_vol_id != -1) { +@@ -960,12 +969,16 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) + if (err) + goto out_detach; + ++ err = ubi_debugfs_init_dev(ubi); ++ if (err) ++ goto out_uif; ++ + ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + if (IS_ERR(ubi->bgt_thread)) { + err = PTR_ERR(ubi->bgt_thread); + ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, + err); +- goto out_uif; ++ goto out_debugfs; + } + + ubi_msg("attached mtd%d to ubi%d", mtd->index, ubi_num); +@@ -991,8 +1004,7 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) + * checks @ubi->thread_enabled. Otherwise we may fail to wake it up. + */ + spin_lock(&ubi->wl_lock); +- if (!DBG_DISABLE_BGT) +- ubi->thread_enabled = 1; ++ ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); + +@@ -1000,18 +1012,20 @@ int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) + ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); + return ubi_num; + ++out_debugfs: ++ ubi_debugfs_exit_dev(ubi); + out_uif: ++ get_device(&ubi->dev); ++ ubi_assert(ref); + uif_close(ubi); + out_detach: + ubi_wl_close(ubi); + free_internal_volumes(ubi); + vfree(ubi->vtbl); ++out_debugging: ++ ubi_debugging_exit_dev(ubi); + out_free: +- vfree(ubi->peb_buf1); +- vfree(ubi->peb_buf2); +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +- vfree(ubi->dbg_peb_buf); +-#endif ++ vfree(ubi->peb_buf); + if (ref) + put_device(&ubi->dev); + else +@@ -1075,16 +1089,14 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) + */ + get_device(&ubi->dev); + ++ ubi_debugfs_exit_dev(ubi); + uif_close(ubi); + ubi_wl_close(ubi); + free_internal_volumes(ubi); + vfree(ubi->vtbl); + put_mtd_device(ubi->mtd); +- vfree(ubi->peb_buf1); +- vfree(ubi->peb_buf2); +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +- vfree(ubi->dbg_peb_buf); +-#endif ++ ubi_debugging_exit_dev(ubi); ++ vfree(ubi->peb_buf); + ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); + put_device(&ubi->dev); + return 0; +@@ -1197,6 +1209,11 @@ static int __init ubi_init(void) + if (!ubi_wl_entry_slab) + goto out_dev_unreg; + ++ err = ubi_debugfs_init(); ++ if (err) ++ goto out_slab; ++ ++ + /* Attach MTD devices */ + for (i = 0; i < mtd_devs; i++) { + struct mtd_dev_param *p = &mtd_dev_param[i]; +@@ -1245,6 +1262,8 @@ out_detach: + ubi_detach_mtd_dev(ubi_devices[k]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } ++ ubi_debugfs_exit(); ++out_slab: + kmem_cache_destroy(ubi_wl_entry_slab); + out_dev_unreg: + misc_deregister(&ubi_ctrl_cdev); +@@ -1256,11 +1275,7 @@ out: + ubi_err("UBI error: cannot initialize UBI, error %d", err); + return err; + } +-#if defined(CONFIG_BRCMSTB) && !defined(MODULE) +-late_initcall(ubi_init); /* need to wait for the MTD driver */ +-#else + module_init(ubi_init); +-#endif + + static void __exit ubi_exit(void) + { +@@ -1272,6 +1287,7 @@ static void __exit ubi_exit(void) + ubi_detach_mtd_dev(ubi_devices[i]->ubi_num, 1); + mutex_unlock(&ubi_devices_mutex); + } ++ ubi_debugfs_exit(); + kmem_cache_destroy(ubi_wl_entry_slab); + misc_deregister(&ubi_ctrl_cdev); + class_remove_file(ubi_class, &ubi_version); +diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c +index af9fb0f..cdea669 100644 +--- a/drivers/mtd/ubi/cdev.c ++++ b/drivers/mtd/ubi/cdev.c +@@ -115,7 +115,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file) + mode = UBI_READONLY; + + dbg_gen("open device %d, volume %d, mode %d", +- ubi_num, vol_id, mode); ++ ubi_num, vol_id, mode); + + desc = ubi_open_volume(ubi_num, vol_id, mode); + if (IS_ERR(desc)) +@@ -158,7 +158,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) + loff_t new_offset; + + if (vol->updating) { +- /* Update is in progress, seeking is prohibited */ ++ /* Update is in progress, seeking is prohibited */ + dbg_err("updating"); + return -EBUSY; + } +@@ -561,18 +561,18 @@ static long vol_cdev_ioctl(struct file *file, unsigned int cmd, + } + + /* Set volume property command */ +- case UBI_IOCSETPROP: ++ case UBI_IOCSETVOLPROP: + { +- struct ubi_set_prop_req req; ++ struct ubi_set_vol_prop_req req; + + err = copy_from_user(&req, argp, +- sizeof(struct ubi_set_prop_req)); ++ sizeof(struct ubi_set_vol_prop_req)); + if (err) { + err = -EFAULT; + break; + } + switch (req.property) { +- case UBI_PROP_DIRECT_WRITE: ++ case UBI_VOL_PROP_DIRECT_WRITE: + mutex_lock(&ubi->device_mutex); + desc->vol->direct_writes = !!req.value; + mutex_unlock(&ubi->device_mutex); +@@ -628,6 +628,9 @@ static int verify_mkvol_req(const struct ubi_device *ubi, + if (req->alignment != 1 && n) + goto bad; + ++ if (!req->name[0] || !req->name_len) ++ goto bad; ++ + if (req->name_len > UBI_VOL_NAME_MAX) { + err = -ENAMETOOLONG; + goto bad; +@@ -1100,5 +1103,5 @@ const struct file_operations ubi_ctrl_cdev_operations = { + .owner = THIS_MODULE, + .unlocked_ioctl = ctrl_cdev_ioctl, + .compat_ioctl = ctrl_cdev_compat_ioctl, +- .llseek = noop_llseek, ++ .llseek = no_llseek, + }; +diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c +index 4876977..ab80c0d 100644 +--- a/drivers/mtd/ubi/debug.c ++++ b/drivers/mtd/ubi/debug.c +@@ -27,6 +27,9 @@ + #ifdef CONFIG_MTD_UBI_DEBUG + + #include "ubi.h" ++#include <linux/debugfs.h> ++#include <linux/uaccess.h> ++#include <linux/module.h> + + /** + * ubi_dbg_dump_ec_hdr - dump an erase counter header. +@@ -61,15 +64,15 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) + { + printk(KERN_DEBUG "Volume identifier header dump:\n"); + printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); +- printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); +- printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); +- printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); +- printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); +- printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); +- printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); +- printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); +- printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); +- printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); ++ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); ++ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); ++ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); ++ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); ++ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); ++ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); ++ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); ++ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); ++ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + printk(KERN_DEBUG "\tsqnum %llu\n", + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); + printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); +@@ -228,4 +231,261 @@ out: + return; + } + ++/** ++ * ubi_debugging_init_dev - initialize debugging for an UBI device. ++ * @ubi: UBI device description object ++ * ++ * This function initializes debugging-related data for UBI device @ubi. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubi_debugging_init_dev(struct ubi_device *ubi) ++{ ++ ubi->dbg = kzalloc(sizeof(struct ubi_debug_info), GFP_KERNEL); ++ if (!ubi->dbg) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * ubi_debugging_exit_dev - free debugging data for an UBI device. ++ * @ubi: UBI device description object ++ */ ++void ubi_debugging_exit_dev(struct ubi_device *ubi) ++{ ++ kfree(ubi->dbg); ++} ++ ++/* ++ * Root directory for UBI stuff in debugfs. Contains sub-directories which ++ * contain the stuff specific to particular UBI devices. ++ */ ++static struct dentry *dfs_rootdir; ++ ++/** ++ * ubi_debugfs_init - create UBI debugfs directory. ++ * ++ * Create UBI debugfs directory. Returns zero in case of success and a negative ++ * error code in case of failure. ++ */ ++int ubi_debugfs_init(void) ++{ ++ dfs_rootdir = debugfs_create_dir("ubi", NULL); ++ if (IS_ERR_OR_NULL(dfs_rootdir)) { ++ int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); ++ ++ ubi_err("cannot create \"ubi\" debugfs directory, error %d\n", ++ err); ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** ++ * ubi_debugfs_exit - remove UBI debugfs directory. ++ */ ++void ubi_debugfs_exit(void) ++{ ++ debugfs_remove(dfs_rootdir); ++} ++ ++/* Read an UBI debugfs file */ ++static ssize_t dfs_file_read(struct file *file, char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ unsigned long ubi_num = (unsigned long)file->private_data; ++ struct dentry *dent = file->f_path.dentry; ++ struct ubi_device *ubi; ++ struct ubi_debug_info *d; ++ char buf[3]; ++ int val; ++ ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) ++ return -ENODEV; ++ d = ubi->dbg; ++ ++ if (dent == d->dfs_chk_gen) ++ val = d->chk_gen; ++ else if (dent == d->dfs_chk_io) ++ val = d->chk_io; ++ else if (dent == d->dfs_disable_bgt) ++ val = d->disable_bgt; ++ else if (dent == d->dfs_emulate_bitflips) ++ val = d->emulate_bitflips; ++ else if (dent == d->dfs_emulate_io_failures) ++ val = d->emulate_io_failures; ++ else { ++ count = -EINVAL; ++ goto out; ++ } ++ ++ if (val) ++ buf[0] = '1'; ++ else ++ buf[0] = '0'; ++ buf[1] = '\n'; ++ buf[2] = 0x00; ++ ++ count = simple_read_from_buffer(user_buf, count, ppos, buf, 2); ++ ++out: ++ ubi_put_device(ubi); ++ return count; ++} ++ ++/* Write an UBI debugfs file */ ++static ssize_t dfs_file_write(struct file *file, const char __user *user_buf, ++ size_t count, loff_t *ppos) ++{ ++ unsigned long ubi_num = (unsigned long)file->private_data; ++ struct dentry *dent = file->f_path.dentry; ++ struct ubi_device *ubi; ++ struct ubi_debug_info *d; ++ size_t buf_size; ++ char buf[8]; ++ int val; ++ ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) ++ return -ENODEV; ++ d = ubi->dbg; ++ ++ buf_size = min_t(size_t, count, (sizeof(buf) - 1)); ++ if (copy_from_user(buf, user_buf, buf_size)) { ++ count = -EFAULT; ++ goto out; ++ } ++ ++ if (buf[0] == '1') ++ val = 1; ++ else if (buf[0] == '0') ++ val = 0; ++ else { ++ count = -EINVAL; ++ goto out; ++ } ++ ++ if (dent == d->dfs_chk_gen) ++ d->chk_gen = val; ++ else if (dent == d->dfs_chk_io) ++ d->chk_io = val; ++ else if (dent == d->dfs_disable_bgt) ++ d->disable_bgt = val; ++ else if (dent == d->dfs_emulate_bitflips) ++ d->emulate_bitflips = val; ++ else if (dent == d->dfs_emulate_io_failures) ++ d->emulate_io_failures = val; ++ else ++ count = -EINVAL; ++ ++out: ++ ubi_put_device(ubi); ++ return count; ++} ++ ++static int default_open(struct inode *inode, struct file *file) ++{ ++ if (inode->i_private) ++ file->private_data = inode->i_private; ++ ++ return 0; ++} ++ ++/* File operations for all UBI debugfs files */ ++static const struct file_operations dfs_fops = { ++ .read = dfs_file_read, ++ .write = dfs_file_write, ++ .open = default_open, ++ .llseek = no_llseek, ++ .owner = THIS_MODULE, ++}; ++ ++/** ++ * ubi_debugfs_init_dev - initialize debugfs for an UBI device. ++ * @ubi: UBI device description object ++ * ++ * This function creates all debugfs files for UBI device @ubi. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++int ubi_debugfs_init_dev(struct ubi_device *ubi) ++{ ++ int err, n; ++ unsigned long ubi_num = ubi->ubi_num; ++ const char *fname; ++ struct dentry *dent; ++ struct ubi_debug_info *d = ubi->dbg; ++ ++ n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, ++ ubi->ubi_num); ++ if (n == UBI_DFS_DIR_LEN) { ++ /* The array size is too small */ ++ fname = UBI_DFS_DIR_NAME; ++ dent = ERR_PTR(-EINVAL); ++ goto out; ++ } ++ ++ fname = d->dfs_dir_name; ++ dent = debugfs_create_dir(fname, dfs_rootdir); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out; ++ d->dfs_dir = dent; ++ ++ fname = "chk_gen"; ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_gen = dent; ++ ++ fname = "chk_io"; ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_io = dent; ++ ++ fname = "tst_disable_bgt"; ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_disable_bgt = dent; ++ ++ fname = "tst_emulate_bitflips"; ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_emulate_bitflips = dent; ++ ++ fname = "tst_emulate_io_failures"; ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, (void *)ubi_num, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_emulate_io_failures = dent; ++ ++ return 0; ++ ++out_remove: ++ debugfs_remove_recursive(d->dfs_dir); ++out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubi_err("cannot create \"%s\" debugfs file or directory, error %d\n", ++ fname, err); ++ return err; ++} ++ ++/** ++ * dbg_debug_exit_dev - free all debugfs files corresponding to device @ubi ++ * @ubi: UBI device description object ++ */ ++void ubi_debugfs_exit_dev(struct ubi_device *ubi) ++{ ++ debugfs_remove_recursive(ubi->dbg->dfs_dir); ++} ++ + #endif /* CONFIG_MTD_UBI_DEBUG */ +diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h +index 9eca950..8c513ee 100644 +--- a/drivers/mtd/ubi/debug.h ++++ b/drivers/mtd/ubi/debug.h +@@ -24,8 +24,6 @@ + #ifdef CONFIG_MTD_UBI_DEBUG + #include <linux/random.h> + +-#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) +- + #define ubi_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ +@@ -34,19 +32,31 @@ + } \ + } while (0) + ++#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) ++ ++#define ubi_dbg_dump_stack() dump_stack() ++ ++#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ ++ print_hex_dump(l, ps, pt, r, g, b, len, a) ++ ++#define ubi_dbg_msg(type, fmt, ...) \ ++ pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__) ++ ++/* Just a debugging messages not related to any specific UBI subsystem */ + #define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__) + +-#define ubi_dbg_dump_stack() dump_stack() +- +-struct ubi_ec_hdr; +-struct ubi_vid_hdr; +-struct ubi_volume; +-struct ubi_vtbl_record; +-struct ubi_scan_volume; +-struct ubi_scan_leb; +-struct ubi_mkvol_req; ++/* General debugging messages */ ++#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__) ++/* Messages from the eraseblock association sub-system */ ++#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__) ++/* Messages from the wear-leveling sub-system */ ++#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__) ++/* Messages from the input/output sub-system */ ++#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__) ++/* Initialization and build messages */ ++#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__) + + void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); + void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); +@@ -56,134 +66,174 @@ void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); + void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); + void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); + void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); +- +-#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ +- print_hex_dump(l, ps, pt, r, g, b, len, a) +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG +-/* General debugging messages */ +-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_gen(fmt, ...) ({}) +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA +-/* Messages from the eraseblock association sub-system */ +-#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_eba(fmt, ...) ({}) +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL +-/* Messages from the wear-leveling sub-system */ +-#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_wl(fmt, ...) ({}) +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO +-/* Messages from the input/output sub-system */ +-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_io(fmt, ...) ({}) +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD +-/* Initialization and build messages */ +-#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define UBI_IO_DEBUG 1 +-#else +-#define dbg_bld(fmt, ...) ({}) +-#define UBI_IO_DEBUG 0 +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); + int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len); +-#else +-#define ubi_dbg_check_all_ff(ubi, pnum, offset, len) 0 +-#define ubi_dbg_check_write(ubi, buf, pnum, offset, len) 0 +-#endif ++int ubi_debugging_init_dev(struct ubi_device *ubi); ++void ubi_debugging_exit_dev(struct ubi_device *ubi); ++int ubi_debugfs_init(void); ++void ubi_debugfs_exit(void); ++int ubi_debugfs_init_dev(struct ubi_device *ubi); ++void ubi_debugfs_exit_dev(struct ubi_device *ubi); + +-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +-#define DBG_DISABLE_BGT 1 +-#else +-#define DBG_DISABLE_BGT 0 +-#endif ++/* ++ * The UBI debugfs directory name pattern and maximum name length (3 for "ubi" ++ * + 2 for the number plus 1 for the trailing zero byte. ++ */ ++#define UBI_DFS_DIR_NAME "ubi%d" ++#define UBI_DFS_DIR_LEN (3 + 2 + 1) ++ ++/** ++ * struct ubi_debug_info - debugging information for an UBI device. ++ * ++ * @chk_gen: if UBI general extra checks are enabled ++ * @chk_io: if UBI I/O extra checks are enabled ++ * @disable_bgt: disable the background task for testing purposes ++ * @emulate_bitflips: emulate bit-flips for testing purposes ++ * @emulate_io_failures: emulate write/erase failures for testing purposes ++ * @dfs_dir_name: name of debugfs directory containing files of this UBI device ++ * @dfs_dir: direntry object of the UBI device debugfs directory ++ * @dfs_chk_gen: debugfs knob to enable UBI general extra checks ++ * @dfs_chk_io: debugfs knob to enable UBI I/O extra checks ++ * @dfs_disable_bgt: debugfs knob to disable the background task ++ * @dfs_emulate_bitflips: debugfs knob to emulate bit-flips ++ * @dfs_emulate_io_failures: debugfs knob to emulate write/erase failures ++ */ ++struct ubi_debug_info { ++ unsigned int chk_gen:1; ++ unsigned int chk_io:1; ++ unsigned int disable_bgt:1; ++ unsigned int emulate_bitflips:1; ++ unsigned int emulate_io_failures:1; ++ char dfs_dir_name[UBI_DFS_DIR_LEN + 1]; ++ struct dentry *dfs_dir; ++ struct dentry *dfs_chk_gen; ++ struct dentry *dfs_chk_io; ++ struct dentry *dfs_disable_bgt; ++ struct dentry *dfs_emulate_bitflips; ++ struct dentry *dfs_emulate_io_failures; ++}; ++ ++/** ++ * ubi_dbg_is_bgt_disabled - if the background thread is disabled. ++ * @ubi: UBI device description object ++ * ++ * Returns non-zero if the UBI background thread is disabled for testing ++ * purposes. ++ */ ++static inline int ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) ++{ ++ return ubi->dbg->disable_bgt; ++} + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS + /** + * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. ++ * @ubi: UBI device description object + * + * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. + */ +-static inline int ubi_dbg_is_bitflip(void) ++static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) + { +- return !(random32() % 200); ++ if (ubi->dbg->emulate_bitflips) ++ return !(random32() % 200); ++ return 0; + } +-#else +-#define ubi_dbg_is_bitflip() 0 +-#endif + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES + /** + * ubi_dbg_is_write_failure - if it is time to emulate a write failure. ++ * @ubi: UBI device description object + * + * Returns non-zero if a write failure should be emulated, otherwise returns + * zero. + */ +-static inline int ubi_dbg_is_write_failure(void) ++static inline int ubi_dbg_is_write_failure(const struct ubi_device *ubi) + { +- return !(random32() % 500); ++ if (ubi->dbg->emulate_io_failures) ++ return !(random32() % 500); ++ return 0; + } +-#else +-#define ubi_dbg_is_write_failure() 0 +-#endif + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES + /** + * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. ++ * @ubi: UBI device description object + * + * Returns non-zero if an erase failure should be emulated, otherwise returns + * zero. + */ +-static inline int ubi_dbg_is_erase_failure(void) ++static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) + { ++ if (ubi->dbg->emulate_io_failures) + return !(random32() % 400); ++ return 0; + } +-#else +-#define ubi_dbg_is_erase_failure() 0 +-#endif + + #else + +-#define ubi_assert(expr) ({}) +-#define dbg_err(fmt, ...) ({}) +-#define dbg_msg(fmt, ...) ({}) +-#define dbg_gen(fmt, ...) ({}) +-#define dbg_eba(fmt, ...) ({}) +-#define dbg_wl(fmt, ...) ({}) +-#define dbg_io(fmt, ...) ({}) +-#define dbg_bld(fmt, ...) ({}) +-#define ubi_dbg_dump_stack() ({}) +-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +-#define ubi_dbg_dump_vol_info(vol) ({}) +-#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +-#define ubi_dbg_dump_sv(sv) ({}) +-#define ubi_dbg_dump_seb(seb, type) ({}) +-#define ubi_dbg_dump_mkvol_req(req) ({}) +-#define ubi_dbg_dump_flash(ubi, pnum, offset, len) ({}) +-#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) ({}) +- +-#define UBI_IO_DEBUG 0 +-#define DBG_DISABLE_BGT 0 +-#define ubi_dbg_is_bitflip() 0 +-#define ubi_dbg_is_write_failure() 0 +-#define ubi_dbg_is_erase_failure() 0 +-#define ubi_dbg_check_all_ff(ubi, pnum, offset, len) 0 +-#define ubi_dbg_check_write(ubi, buf, pnum, offset, len) 0 ++/* Use "if (0)" to make compiler check arguments even if debugging is off */ ++#define ubi_assert(expr) do { \ ++ if (0) { \ ++ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ ++ __func__, __LINE__, current->pid); \ ++ } \ ++} while (0) ++ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubi_err(fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#define ubi_dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ pr_debug(fmt "\n", ##__VA_ARGS__); \ ++} while (0) ++ ++#define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gen(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_eba(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_wl(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_bld(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++ ++static inline void ubi_dbg_dump_stack(void) { return; } ++static inline void ++ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { return; } ++static inline void ++ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { return; } ++static inline void ++ubi_dbg_dump_vol_info(const struct ubi_volume *vol) { return; } ++static inline void ++ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { return; } ++static inline void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) { return; } ++static inline void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, ++ int type) { return; } ++static inline void ++ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) { return; } ++static inline void ubi_dbg_dump_flash(struct ubi_device *ubi, ++ int pnum, int offset, int len) { return; } ++static inline void ++ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r, ++ int g, const void *b, size_t len, bool a) { return; } ++static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi, ++ int pnum, int offset, ++ int len) { return 0; } ++static inline int ubi_dbg_check_write(struct ubi_device *ubi, ++ const void *buf, int pnum, ++ int offset, int len) { return 0; } ++ ++static inline int ubi_debugging_init_dev(struct ubi_device *ubi) { return 0; } ++static inline void ubi_debugging_exit_dev(struct ubi_device *ubi) { return; } ++static inline int ubi_debugfs_init(void) { return 0; } ++static inline void ubi_debugfs_exit(void) { return; } ++static inline int ubi_debugfs_init_dev(struct ubi_device *ubi) { return 0; } ++static inline void ubi_debugfs_exit_dev(struct ubi_device *ubi) { return; } ++ ++static inline int ++ubi_dbg_is_bgt_disabled(const struct ubi_device *ubi) { return 0; } ++static inline int ubi_dbg_is_bitflip(const struct ubi_device *ubi) { return 0; } ++static inline int ++ubi_dbg_is_write_failure(const struct ubi_device *ubi) { return 0; } ++static inline int ++ubi_dbg_is_erase_failure(const struct ubi_device *ubi) { return 0; } + + #endif /* !CONFIG_MTD_UBI_DEBUG */ + #endif /* !__UBI_DEBUG_H__ */ +diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c +index fb7f19b..9b7f87e 100644 +--- a/drivers/mtd/ubi/eba.c ++++ b/drivers/mtd/ubi/eba.c +@@ -443,7 +443,7 @@ retry: + if (err == UBI_IO_BITFLIPS) { + scrub = 1; + err = 0; +- } else if (mtd_is_eccerr(err)) { ++ } else if (err == -EBADMSG) { + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + goto out_unlock; + scrub = 1; +@@ -529,18 +529,18 @@ retry: + + data_size = offset + len; + mutex_lock(&ubi->buf_mutex); +- memset(ubi->peb_buf1 + offset, 0xFF, len); ++ memset(ubi->peb_buf + offset, 0xFF, len); + + /* Read everything before the area where the write failure happened */ + if (offset > 0) { +- err = ubi_io_read_data(ubi, ubi->peb_buf1, pnum, 0, offset); ++ err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); + if (err && err != UBI_IO_BITFLIPS) + goto out_unlock; + } + +- memcpy(ubi->peb_buf1 + offset, buf, len); ++ memcpy(ubi->peb_buf + offset, buf, len); + +- err = ubi_io_write_data(ubi, ubi->peb_buf1, new_pnum, 0, data_size); ++ err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); + if (err) { + mutex_unlock(&ubi->buf_mutex); + goto write_error; +@@ -979,7 +979,7 @@ static int is_error_sane(int err) + * physical eraseblock @to. The @vid_hdr buffer may be changed by this + * function. Returns: + * o %0 in case of success; +- * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_CANCEL_BITFLIPS, etc; ++ * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; + * o a negative error code in case of failure. + */ + int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, +@@ -1028,12 +1028,14 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are + * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the + * LEB is already locked, we just do not move it and return +- * %MOVE_CANCEL_RACE, which means that UBI will re-try, but later. ++ * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because ++ * we do not know the reasons of the contention - it may be just a ++ * normal I/O on this LEB, so we want to re-try. + */ + err = leb_write_trylock(ubi, vol_id, lnum); + if (err) { + dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); +- return MOVE_CANCEL_RACE; ++ return MOVE_RETRY; + } + + /* +@@ -1051,13 +1053,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + + /* + * OK, now the LEB is locked and we can safely start moving it. Since +- * this function utilizes the @ubi->peb_buf1 buffer which is shared ++ * this function utilizes the @ubi->peb_buf buffer which is shared + * with some other functions - we lock the buffer by taking the + * @ubi->buf_mutex. + */ + mutex_lock(&ubi->buf_mutex); + dbg_wl("read %d bytes of data", aldata_size); +- err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); ++ err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); + if (err && err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data from PEB %d", + err, from); +@@ -1077,10 +1079,10 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + */ + if (vid_hdr->vol_type == UBI_VID_DYNAMIC) + aldata_size = data_size = +- ubi_calc_data_len(ubi, ubi->peb_buf1, data_size); ++ ubi_calc_data_len(ubi, ubi->peb_buf, data_size); + + cond_resched(); +- crc = crc32(UBI_CRC32_INIT, ubi->peb_buf1, data_size); ++ crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); + cond_resched(); + + /* +@@ -1114,12 +1116,12 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else +- err = MOVE_CANCEL_BITFLIPS; ++ err = MOVE_TARGET_BITFLIPS; + goto out_unlock_buf; + } + + if (data_size > 0) { +- err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); ++ err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); + if (err) { + if (err == -EIO) + err = MOVE_TARGET_WR_ERR; +@@ -1132,8 +1134,8 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + * We've written the data and are going to read it back to make + * sure it was written correctly. + */ +- +- err = ubi_io_read_data(ubi, ubi->peb_buf2, to, 0, aldata_size); ++ memset(ubi->peb_buf, 0xFF, aldata_size); ++ err = ubi_io_read_data(ubi, ubi->peb_buf, to, 0, aldata_size); + if (err) { + if (err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data back " +@@ -1141,13 +1143,13 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + if (is_error_sane(err)) + err = MOVE_TARGET_RD_ERR; + } else +- err = MOVE_CANCEL_BITFLIPS; ++ err = MOVE_TARGET_BITFLIPS; + goto out_unlock_buf; + } + + cond_resched(); + +- if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { ++ if (crc != crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size)) { + ubi_warn("read data back from PEB %d and it is " + "different", to); + err = -EINVAL; +diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c +index 8ca890c..2614dcc 100644 +--- a/drivers/mtd/ubi/io.c ++++ b/drivers/mtd/ubi/io.c +@@ -91,7 +91,7 @@ + #include <linux/slab.h> + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum); + static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); + static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, +@@ -146,13 +146,35 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + if (err) + return err; + ++ /* ++ * Deliberately corrupt the buffer to improve robustness. Indeed, if we ++ * do not do this, the following may happen: ++ * 1. The buffer contains data from previous operation, e.g., read from ++ * another PEB previously. The data looks like expected, e.g., if we ++ * just do not read anything and return - the caller would not ++ * notice this. E.g., if we are reading a VID header, the buffer may ++ * contain a valid VID header from another PEB. ++ * 2. The driver is buggy and returns us success or -EBADMSG or ++ * -EUCLEAN, but it does not actually put any data to the buffer. ++ * ++ * This may confuse UBI or upper layers - they may think the buffer ++ * contains valid data while in fact it is just old data. This is ++ * especially possible because UBI (and UBIFS) relies on CRC, and ++ * treats data as correct even in case of ECC errors if the CRC is ++ * correct. ++ * ++ * Try to prevent this situation by changing the first byte of the ++ * buffer. ++ */ ++ *((uint8_t *)buf) ^= 0xFF; ++ + addr = (loff_t)pnum * ubi->peb_size + offset; + retry: + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err) { +- const char *errstr = mtd_is_eccerr(err) ? " (ECC error)" : ""; ++ const char *errstr = (err == -EBADMSG) ? " (ECC error)" : ""; + +- if (mtd_is_bitflip(err)) { ++ if (err == -EUCLEAN) { + /* + * -EUCLEAN is reported if there was a bit-flip which + * was corrected, so this is harmless. +@@ -166,9 +188,9 @@ retry: + return UBI_IO_BITFLIPS; + } + +- if (read != len && retries++ < UBI_IO_RETRIES) { +- dbg_io("error %d%s while reading %d bytes from PEB %d:%d," +- " read only %zd bytes, retry", ++ if (retries++ < UBI_IO_RETRIES) { ++ dbg_io("error %d%s while reading %d bytes from PEB " ++ "%d:%d, read only %zd bytes, retry", + err, errstr, len, pnum, offset, read); + yield(); + goto retry; +@@ -183,14 +205,14 @@ retry: + * all the requested data. But some buggy drivers might do + * this, so we change it to -EIO. + */ +- if (read != len && mtd_is_eccerr(err)) { ++ if (read != len && err == -EBADMSG) { + ubi_assert(0); + err = -EIO; + } + } else { + ubi_assert(len == read); + +- if (ubi_dbg_is_bitflip()) { ++ if (ubi_dbg_is_bitflip(ubi)) { + dbg_gen("bit-flip (emulated)"); + err = UBI_IO_BITFLIPS; + } +@@ -259,7 +281,7 @@ int ubi_io_write(struct ubi_device *ubi, const void *buf, int pnum, int offset, + return err; + } + +- if (ubi_dbg_is_write_failure()) { ++ if (ubi_dbg_is_write_failure(ubi)) { + dbg_err("cannot write %d bytes to PEB %d:%d " + "(emulated)", len, pnum, offset); + ubi_dbg_dump_stack(); +@@ -322,6 +344,12 @@ static int do_sync_erase(struct ubi_device *ubi, int pnum) + wait_queue_head_t wq; + + dbg_io("erase PEB %d", pnum); ++ ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ++ ++ if (ubi->ro_mode) { ++ ubi_err("read-only mode"); ++ return -EROFS; ++ } + + retry: + init_waitqueue_head(&wq); +@@ -368,7 +396,7 @@ retry: + if (err) + return err; + +- if (ubi_dbg_is_erase_failure() && !err) { ++ if (ubi_dbg_is_erase_failure(ubi)) { + dbg_err("cannot erase PEB %d (emulated)", pnum); + return -EIO; + } +@@ -403,11 +431,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum) + goto out; + + /* Make sure the PEB contains only 0xFF bytes */ +- err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); ++ err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + +- err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); ++ err = ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->peb_size); + if (err == 0) { + ubi_err("erased PEB %d, but a non-0xFF byte found", + pnum); +@@ -416,17 +444,17 @@ static int torture_peb(struct ubi_device *ubi, int pnum) + } + + /* Write a pattern and check it */ +- memset(ubi->peb_buf1, patterns[i], ubi->peb_size); +- err = ubi_io_write(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); ++ memset(ubi->peb_buf, patterns[i], ubi->peb_size); ++ err = ubi_io_write(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + +- memset(ubi->peb_buf1, ~patterns[i], ubi->peb_size); +- err = ubi_io_read(ubi, ubi->peb_buf1, pnum, 0, ubi->peb_size); ++ memset(ubi->peb_buf, ~patterns[i], ubi->peb_size); ++ err = ubi_io_read(ubi, ubi->peb_buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + +- err = ubi_check_pattern(ubi->peb_buf1, patterns[i], ++ err = ubi_check_pattern(ubi->peb_buf, patterns[i], + ubi->peb_size); + if (err == 0) { + ubi_err("pattern %x checking failed for PEB %d", +@@ -437,11 +465,11 @@ static int torture_peb(struct ubi_device *ubi, int pnum) + } + + err = patt_count; +- ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); ++ ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum); + + out: + mutex_unlock(&ubi->buf_mutex); +- if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { ++ if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + /* + * If a bit-flip or data integrity error was detected, the test + * has not passed because it happened on a freshly erased +@@ -480,6 +508,13 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) + size_t written; + loff_t addr; + uint32_t data = 0; ++ /* ++ * Note, we cannot generally define VID header buffers on stack, ++ * because of the way we deal with these buffers (see the header ++ * comment in this file). But we know this is a NOR-specific piece of ++ * code, so we can do this. But yes, this is error-prone and we should ++ * (pre-)allocate VID header buffer instead. ++ */ + struct ubi_vid_hdr vid_hdr; + + /* +@@ -507,11 +542,13 @@ static int nor_erase_prepare(struct ubi_device *ubi, int pnum) + * PEB. + */ + err1 = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0); +- if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR) { ++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR || ++ err1 == UBI_IO_FF) { + struct ubi_ec_hdr ec_hdr; + + err1 = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); +- if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR) ++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR || ++ err1 == UBI_IO_FF) + /* + * Both VID and EC headers are corrupted, so we can + * safely erase this PEB and not afraid that it will be +@@ -723,7 +760,7 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + + read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (read_err) { +- if (read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) ++ if (read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) + return read_err; + + /* +@@ -739,7 +776,7 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { +- if (mtd_is_eccerr(read_err)) ++ if (read_err == -EBADMSG) + return UBI_IO_BAD_HDR_EBADMSG; + + /* +@@ -752,9 +789,8 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + if (verbose) + ubi_warn("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); +- else if (UBI_IO_DEBUG) +- dbg_msg("no EC header found at PEB %d, " +- "only 0xFF bytes", pnum); ++ dbg_bld("no EC header found at PEB %d, " ++ "only 0xFF bytes", pnum); + if (!read_err) + return UBI_IO_FF; + else +@@ -769,9 +805,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad magic number at PEB %d: %08x instead of " +- "%08x", pnum, magic, UBI_EC_HDR_MAGIC); ++ } ++ dbg_bld("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + return UBI_IO_BAD_HDR; + } + +@@ -783,9 +819,9 @@ int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + ubi_warn("bad EC header CRC at PEB %d, calculated " + "%#08x, read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad EC header CRC at PEB %d, calculated " +- "%#08x, read %#08x", pnum, crc, hdr_crc); ++ } ++ dbg_bld("bad EC header CRC at PEB %d, calculated " ++ "%#08x, read %#08x", pnum, crc, hdr_crc); + + if (!read_err) + return UBI_IO_BAD_HDR; +@@ -996,21 +1032,20 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + p = (char *)vid_hdr - ubi->vid_hdr_shift; + read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); +- if (read_err && read_err != UBI_IO_BITFLIPS && !mtd_is_eccerr(read_err)) ++ if (read_err && read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) + return read_err; + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { +- if (mtd_is_eccerr(read_err)) ++ if (read_err == -EBADMSG) + return UBI_IO_BAD_HDR_EBADMSG; + + if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + if (verbose) + ubi_warn("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); +- else if (UBI_IO_DEBUG) +- dbg_msg("no VID header found at PEB %d, " +- "only 0xFF bytes", pnum); ++ dbg_bld("no VID header found at PEB %d, " ++ "only 0xFF bytes", pnum); + if (!read_err) + return UBI_IO_FF; + else +@@ -1021,9 +1056,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad magic number at PEB %d: %08x instead of " +- "%08x", pnum, magic, UBI_VID_HDR_MAGIC); ++ } ++ dbg_bld("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + return UBI_IO_BAD_HDR; + } + +@@ -1035,9 +1070,9 @@ int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + ubi_warn("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad CRC at PEB %d, calculated %#08x, " +- "read %#08x", pnum, crc, hdr_crc); ++ } ++ dbg_bld("bad CRC at PEB %d, calculated %#08x, " ++ "read %#08x", pnum, crc, hdr_crc); + if (!read_err) + return UBI_IO_BAD_HDR; + else +@@ -1097,7 +1132,7 @@ int ubi_io_write_vid_hdr(struct ubi_device *ubi, int pnum, + return err; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_not_bad - ensure that a physical eraseblock is not bad. +@@ -1111,6 +1146,9 @@ static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) + { + int err; + ++ if (!ubi->dbg->chk_io) ++ return 0; ++ + err = ubi_io_is_bad(ubi, pnum); + if (!err) + return err; +@@ -1135,6 +1173,9 @@ static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + int err; + uint32_t magic; + ++ if (!ubi->dbg->chk_io) ++ return 0; ++ + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + ubi_err("bad magic %#08x, must be %#08x", +@@ -1170,12 +1211,15 @@ static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) + uint32_t crc, hdr_crc; + struct ubi_ec_hdr *ec_hdr; + ++ if (!ubi->dbg->chk_io) ++ return 0; ++ + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); +- if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) ++ if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); +@@ -1211,6 +1255,9 @@ static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + int err; + uint32_t magic; + ++ if (!ubi->dbg->chk_io) ++ return 0; ++ + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", +@@ -1249,6 +1296,9 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) + struct ubi_vid_hdr *vid_hdr; + void *p; + ++ if (!ubi->dbg->chk_io) ++ return 0; ++ + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; +@@ -1256,7 +1306,7 @@ static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); +- if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) ++ if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); +@@ -1294,15 +1344,26 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len) + { + int err, i; ++ size_t read; ++ void *buf1; ++ loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + +- mutex_lock(&ubi->dbg_buf_mutex); +- err = ubi_io_read(ubi, ubi->dbg_peb_buf, pnum, offset, len); +- if (err) +- goto out_unlock; ++ if (!ubi->dbg->chk_io) ++ return 0; ++ ++ buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); ++ if (!buf1) { ++ ubi_err("cannot allocate memory to check writes"); ++ return 0; ++ } ++ ++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1); ++ if (err && err != -EUCLEAN) ++ goto out_free; + + for (i = 0; i < len; i++) { + uint8_t c = ((uint8_t *)buf)[i]; +- uint8_t c1 = ((uint8_t *)ubi->dbg_peb_buf)[i]; ++ uint8_t c1 = ((uint8_t *)buf1)[i]; + int dump_len; + + if (c == c1) +@@ -1319,17 +1380,17 @@ int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, + ubi_msg("hex dump of the read buffer from %d to %d", + i, i + dump_len); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, +- ubi->dbg_peb_buf + i, dump_len, 1); ++ buf1 + i, dump_len, 1); + ubi_dbg_dump_stack(); + err = -EINVAL; +- goto out_unlock; ++ goto out_free; + } +- mutex_unlock(&ubi->dbg_buf_mutex); + ++ vfree(buf1); + return 0; + +-out_unlock: +- mutex_unlock(&ubi->dbg_buf_mutex); ++out_free: ++ vfree(buf1); + return err; + } + +@@ -1348,36 +1409,44 @@ int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) + { + size_t read; + int err; ++ void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + +- mutex_lock(&ubi->dbg_buf_mutex); +- err = ubi->mtd->read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf); +- if (err && !mtd_is_bitflip(err)) { ++ if (!ubi->dbg->chk_io) ++ return 0; ++ ++ buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubi_err("cannot allocate memory to check for 0xFFs"); ++ return 0; ++ } ++ ++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); ++ if (err && err != -EUCLEAN) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + goto error; + } + +- err = ubi_check_pattern(ubi->dbg_peb_buf, 0xFF, len); ++ err = ubi_check_pattern(buf, 0xFF, len); + if (err == 0) { + ubi_err("flash region at PEB %d:%d, length %d does not " + "contain all 0xFF bytes", pnum, offset, len); + goto fail; + } +- mutex_unlock(&ubi->dbg_buf_mutex); + ++ vfree(buf); + return 0; + + fail: + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_msg("hex dump of the %d-%d region", offset, offset + len); +- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, +- ubi->dbg_peb_buf, len, 1); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); + err = -EINVAL; + error: + ubi_dbg_dump_stack(); +- mutex_unlock(&ubi->dbg_buf_mutex); ++ vfree(buf); + return err; + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c +index 1c95758..d39716e 100644 +--- a/drivers/mtd/ubi/kapi.c ++++ b/drivers/mtd/ubi/kapi.c +@@ -40,7 +40,9 @@ void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) + { + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; ++ di->leb_start = ubi->leb_start; + di->min_io_size = ubi->min_io_size; ++ di->max_write_size = ubi->max_write_size; + di->ro_mode = ubi->ro_mode; + di->cdev = ubi->cdev.dev; + } +@@ -408,7 +410,7 @@ int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + return 0; + + err = ubi_eba_read_leb(ubi, vol, lnum, buf, offset, len, check); +- if (err && mtd_is_eccerr(err) && vol->vol_type == UBI_STATIC_VOLUME) { ++ if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn("mark volume %d as corrupted", vol_id); + vol->corrupted = 1; + } +diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c +index f6a7d7a..ff2a65c 100644 +--- a/drivers/mtd/ubi/misc.c ++++ b/drivers/mtd/ubi/misc.c +@@ -81,7 +81,7 @@ int ubi_check_volume(struct ubi_device *ubi, int vol_id) + + err = ubi_eba_read_leb(ubi, vol, i, buf, 0, size, 1); + if (err) { +- if (mtd_is_eccerr(err)) ++ if (err == -EBADMSG) + err = 1; + break; + } +diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c +index 438ec20..22f03d6 100644 +--- a/drivers/mtd/ubi/scan.c ++++ b/drivers/mtd/ubi/scan.c +@@ -39,32 +39,46 @@ + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. + * ++ * About corruptions ++ * ~~~~~~~~~~~~~~~~~ ++ * ++ * UBI protects EC and VID headers with CRC-32 checksums, so it can detect ++ * whether the headers are corrupted or not. Sometimes UBI also protects the ++ * data with CRC-32, e.g., when it executes the atomic LEB change operation, or ++ * when it moves the contents of a PEB for wear-leveling purposes. ++ * + * UBI tries to distinguish between 2 types of corruptions. +- * 1. Corruptions caused by power cuts. These are harmless and expected +- * corruptions and UBI tries to handle them gracefully, without printing too +- * many warnings and error messages. The idea is that we do not lose +- * important data in these case - we may lose only the data which was being +- * written to the media just before the power cut happened, and the upper +- * layers (e.g., UBIFS) are supposed to handle these situations. UBI puts +- * these PEBs to the head of the @erase list and they are scheduled for +- * erasure. ++ * ++ * 1. Corruptions caused by power cuts. These are expected corruptions and UBI ++ * tries to handle them gracefully, without printing too many warnings and ++ * error messages. The idea is that we do not lose important data in these case ++ * - we may lose only the data which was being written to the media just before ++ * the power cut happened, and the upper layers (e.g., UBIFS) are supposed to ++ * handle such data losses (e.g., by using the FS journal). ++ * ++ * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like ++ * the reason is a power cut, UBI puts this PEB to the @erase list, and all ++ * PEBs in the @erase list are scheduled for erasure later. + * + * 2. Unexpected corruptions which are not caused by power cuts. During +- * scanning, such PEBs are put to the @corr list and UBI preserves them. +- * Obviously, this lessens the amount of available PEBs, and if at some +- * point UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly +- * informs about such PEBs every time the MTD device is attached. ++ * scanning, such PEBs are put to the @corr list and UBI preserves them. ++ * Obviously, this lessens the amount of available PEBs, and if at some point ++ * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs ++ * about such PEBs every time the MTD device is attached. + * + * However, it is difficult to reliably distinguish between these types of +- * corruptions and UBI's strategy is as follows. UBI assumes (2.) if the VID +- * header is corrupted and the data area does not contain all 0xFFs, and there +- * were not bit-flips or integrity errors while reading the data area. Otherwise +- * UBI assumes (1.). The assumptions are: +- * o if the data area contains only 0xFFs, there is no data, and it is safe +- * to just erase this PEB. +- * o if the data area has bit-flips and data integrity errors (ECC errors on ++ * corruptions and UBI's strategy is as follows. UBI assumes corruption type 2 ++ * if the VID header is corrupted and the data area does not contain all 0xFFs, ++ * and there were no bit-flips or integrity errors while reading the data area. ++ * Otherwise UBI assumes corruption type 1. So the decision criteria are as ++ * follows. ++ * o If the data area contains only 0xFFs, there is no data, and it is safe ++ * to just erase this PEB - this is corruption type 1. ++ * o If the data area has bit-flips or data integrity errors (ECC errors on + * NAND), it is probably a PEB which was being erased when power cut +- * happened. ++ * happened, so this is corruption type 1. However, this is just a guess, ++ * which might be wrong. ++ * o Otherwise this it corruption type 2. + */ + + #include <linux/err.h> +@@ -74,7 +88,7 @@ + #include <linux/random.h> + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si); + #else + #define paranoid_check_si(ubi, si) 0 +@@ -115,7 +129,7 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head, + } else + BUG(); + +- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); + if (!seb) + return -ENOMEM; + +@@ -144,7 +158,7 @@ static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec) + + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + +- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); + if (!seb) + return -ENOMEM; + +@@ -381,7 +395,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb, + } + + err = ubi_io_read_data(ubi, buf, pnum, 0, len); +- if (err && err != UBI_IO_BITFLIPS && !mtd_is_eccerr(err)) ++ if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto out_free_buf; + + data_crc = be32_to_cpu(vid_hdr->data_crc); +@@ -553,7 +567,7 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si, + if (err) + return err; + +- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); + if (!seb) + return -ENOMEM; + +@@ -775,11 +789,11 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + int err; + + mutex_lock(&ubi->buf_mutex); +- memset(ubi->peb_buf1, 0x00, ubi->leb_size); ++ memset(ubi->peb_buf, 0x00, ubi->leb_size); + +- err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, ++ err = ubi_io_read(ubi, ubi->peb_buf, pnum, ubi->leb_start, + ubi->leb_size); +- if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) { ++ if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + /* + * Bit-flips or integrity errors while reading the data area. + * It is difficult to say for sure what type of corruption is +@@ -794,7 +808,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + if (err) + goto out_unlock; + +- if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) ++ if (ubi_check_pattern(ubi->peb_buf, 0xFF, ubi->leb_size)) + goto out_unlock; + + ubi_err("PEB %d contains corrupted VID header, and the data does not " +@@ -804,7 +818,7 @@ static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, + dbg_msg("hexdump of PEB %d offset %d, length %d", + pnum, ubi->leb_start, ubi->leb_size); + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, +- ubi->peb_buf1, ubi->leb_size, 1); ++ ubi->peb_buf, ubi->leb_size, 1); + err = 1; + + out_unlock: +@@ -1089,7 +1103,7 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) + * otherwise, only print a warning. + */ + if (si->corr_peb_count >= max_corr) { +- ubi_err("too many corrupted PEBs, refusing this device"); ++ ubi_err("too many corrupted PEBs, refusing"); + return -EINVAL; + } + } +@@ -1152,6 +1166,12 @@ struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) + si->volumes = RB_ROOT; + + err = -ENOMEM; ++ si->scan_leb_slab = kmem_cache_create("ubi_scan_leb_slab", ++ sizeof(struct ubi_scan_leb), ++ 0, 0, NULL); ++ if (!si->scan_leb_slab) ++ goto out_si; ++ + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + goto out_si; +@@ -1223,11 +1243,12 @@ out_si: + /** + * destroy_sv - free the scanning volume information + * @sv: scanning volume information ++ * @si: scanning information + * + * This function destroys the volume RB-tree (@sv->root) and the scanning + * volume information. + */ +-static void destroy_sv(struct ubi_scan_volume *sv) ++static void destroy_sv(struct ubi_scan_info *si, struct ubi_scan_volume *sv) + { + struct ubi_scan_leb *seb; + struct rb_node *this = sv->root.rb_node; +@@ -1247,7 +1268,7 @@ static void destroy_sv(struct ubi_scan_volume *sv) + this->rb_right = NULL; + } + +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + } + kfree(sv); +@@ -1265,19 +1286,19 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si) + + list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + + /* Destroy the volume RB-tree */ +@@ -1298,14 +1319,17 @@ void ubi_scan_destroy_si(struct ubi_scan_info *si) + rb->rb_right = NULL; + } + +- destroy_sv(sv); ++ destroy_sv(si, sv); + } + } + ++ if (si->scan_leb_slab) ++ kmem_cache_destroy(si->scan_leb_slab); ++ + kfree(si); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_si - check the scanning information. +@@ -1323,6 +1347,9 @@ static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) + struct ubi_scan_leb *seb, *last_seb; + uint8_t *buf; + ++ if (!ubi->dbg->chk_gen) ++ return 0; ++ + /* + * At first, check that scanning information is OK. + */ +@@ -1575,4 +1602,4 @@ out: + return -EINVAL; + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h +index a3264f0..d48aef1 100644 +--- a/drivers/mtd/ubi/scan.h ++++ b/drivers/mtd/ubi/scan.h +@@ -109,6 +109,7 @@ struct ubi_scan_volume { + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec ++ * @scan_leb_slab: slab cache for &struct ubi_scan_leb objects + * + * This data structure contains the result of scanning and may be used by other + * UBI sub-systems to build final UBI data structures, further error-recovery +@@ -134,6 +135,7 @@ struct ubi_scan_info { + int mean_ec; + uint64_t ec_sum; + int ec_count; ++ struct kmem_cache *scan_leb_slab; + }; + + struct ubi_device; +diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h +index 503ea9b..6fb8ec2 100644 +--- a/drivers/mtd/ubi/ubi-media.h ++++ b/drivers/mtd/ubi/ubi-media.h +@@ -164,7 +164,7 @@ struct ubi_ec_hdr { + __be32 image_seq; + __u8 padding2[32]; + __be32 hdr_crc; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubi_vid_hdr - on-flash UBI volume identifier header. +@@ -292,7 +292,7 @@ struct ubi_vid_hdr { + __be64 sqnum; + __u8 padding3[12]; + __be32 hdr_crc; +-} __attribute__ ((packed)); ++} __packed; + + /* Internal UBI volumes count */ + #define UBI_INT_VOL_COUNT 1 +@@ -373,6 +373,6 @@ struct ubi_vtbl_record { + __u8 flags; + __u8 padding[23]; + __be32 crc; +-} __attribute__ ((packed)); ++} __packed; + + #endif /* !__UBI_MEDIA_H__ */ +diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h +index 0b0149c..b162790 100644 +--- a/drivers/mtd/ubi/ubi.h ++++ b/drivers/mtd/ubi/ubi.h +@@ -40,10 +40,10 @@ + #include <linux/notifier.h> + #include <linux/mtd/mtd.h> + #include <linux/mtd/ubi.h> ++#include <asm/pgtable.h> + + #include "ubi-media.h" + #include "scan.h" +-#include "debug.h" + + /* Maximum number of supported UBI devices */ + #define UBI_MAX_DEVICES 32 +@@ -118,15 +118,17 @@ enum { + * PEB + * MOVE_TARGET_WR_ERR: canceled because there was a write error to the target + * PEB +- * MOVE_CANCEL_BITFLIPS: canceled because a bit-flip was detected in the ++ * MOVE_TARGET_BITFLIPS: canceled because a bit-flip was detected in the + * target PEB ++ * MOVE_RETRY: retry scrubbing the PEB + */ + enum { + MOVE_CANCEL_RACE = 1, + MOVE_SOURCE_RD_ERR, + MOVE_TARGET_RD_ERR, + MOVE_TARGET_WR_ERR, +- MOVE_CANCEL_BITFLIPS, ++ MOVE_TARGET_BITFLIPS, ++ MOVE_RETRY, + }; + + /** +@@ -340,8 +342,8 @@ struct ubi_wl_entry; + * protected from the wear-leveling worker) + * @pq_head: protection queue head + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, +- * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, +- * @erroneous, and @erroneous_peb_count fields ++ * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, ++ * @erroneous, and @erroneous_peb_count fields + * @move_mutex: serializes eraseblock moves + * @work_sem: synchronizes the WL worker with use tasks + * @wl_scheduled: non-zero if the wear-leveling was scheduled +@@ -381,14 +383,15 @@ struct ubi_wl_entry; + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or + * not + * @nor_flash: non-zero if working on top of NOR flash ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) + * @mtd: MTD device descriptor + * +- * @peb_buf1: a buffer of PEB size used for different purposes +- * @peb_buf2: another buffer of PEB size used for different purposes +- * @buf_mutex: protects @peb_buf1 and @peb_buf2 ++ * @peb_buf: a buffer of PEB size used for different purposes ++ * @buf_mutex: protects @peb_buf + * @ckvol_mutex: serializes static volume checking when opening +- * @dbg_peb_buf: buffer of PEB size used for debugging +- * @dbg_buf_mutex: protects @dbg_peb_buf ++ * ++ * @dbg: debugging information for this UBI device + */ + struct ubi_device { + struct cdev cdev; +@@ -464,18 +467,18 @@ struct ubi_device { + int vid_hdr_shift; + unsigned int bad_allowed:1; + unsigned int nor_flash:1; ++ int max_write_size; + struct mtd_info *mtd; + +- void *peb_buf1; +- void *peb_buf2; ++ void *peb_buf; + struct mutex buf_mutex; + struct mutex ckvol_mutex; +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +- void *dbg_peb_buf; +- struct mutex dbg_buf_mutex; +-#endif ++ ++ struct ubi_debug_info *dbg; + }; + ++#include "debug.h" ++ + extern struct kmem_cache *ubi_wl_entry_slab; + extern const struct file_operations ubi_ctrl_cdev_operations; + extern const struct file_operations ubi_cdev_operations; +@@ -664,6 +667,7 @@ static inline void ubi_ro_mode(struct ubi_device *ubi) + if (!ubi->ro_mode) { + ubi->ro_mode = 1; + ubi_warn("switch to read-only mode"); ++ ubi_dbg_dump_stack(); + } + } + +diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c +index c47620d..97e093d 100644 +--- a/drivers/mtd/ubi/vmt.c ++++ b/drivers/mtd/ubi/vmt.c +@@ -28,7 +28,7 @@ + #include <linux/slab.h> + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_volumes(struct ubi_device *ubi); + #else + #define paranoid_check_volumes(ubi) 0 +@@ -711,7 +711,7 @@ void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) + volume_sysfs_close(vol); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_volume - check volume information. +@@ -790,11 +790,6 @@ static int paranoid_check_volume(struct ubi_device *ubi, int vol_id) + goto fail; + } + +- if (!vol->name) { +- ubi_err("NULL volume name"); +- goto fail; +- } +- + n = strnlen(vol->name, vol->name_len + 1); + if (n != vol->name_len) { + ubi_err("bad name_len %lld", n); +@@ -876,6 +871,9 @@ static int paranoid_check_volumes(struct ubi_device *ubi) + { + int i, err = 0; + ++ if (!ubi->dbg->chk_gen) ++ return 0; ++ + for (i = 0; i < ubi->vtbl_slots; i++) { + err = paranoid_check_volume(ubi, i); + if (err) +diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c +index 157d88b..a547990 100644 +--- a/drivers/mtd/ubi/vtbl.c ++++ b/drivers/mtd/ubi/vtbl.c +@@ -62,7 +62,7 @@ + #include <asm/div64.h> + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static void paranoid_vtbl_check(const struct ubi_device *ubi); + #else + #define paranoid_vtbl_check(ubi) +@@ -306,9 +306,8 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, + int copy, void *vtbl) + { + int err, tries = 0; +- static struct ubi_vid_hdr *vid_hdr; +- struct ubi_scan_volume *sv; +- struct ubi_scan_leb *new_seb, *old_seb = NULL; ++ struct ubi_vid_hdr *vid_hdr; ++ struct ubi_scan_leb *new_seb; + + ubi_msg("create volume table (copy #%d)", copy + 1); + +@@ -316,15 +315,6 @@ static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si, + if (!vid_hdr) + return -ENOMEM; + +- /* +- * Check if there is a logical eraseblock which would have to contain +- * this volume table copy was found during scanning. It has to be wiped +- * out. +- */ +- sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID); +- if (sv) +- old_seb = ubi_scan_find_seb(sv, copy); +- + retry: + new_seb = ubi_scan_get_free_peb(ubi, si); + if (IS_ERR(new_seb)) { +@@ -332,7 +322,7 @@ retry: + goto out_free; + } + +- vid_hdr->vol_type = UBI_VID_DYNAMIC; ++ vid_hdr->vol_type = UBI_LAYOUT_VOLUME_TYPE; + vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID); + vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; + vid_hdr->data_size = vid_hdr->used_ebs = +@@ -351,8 +341,8 @@ retry: + goto write_error; + + /* +- * And add it to the scanning information. Don't delete the old +- * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. ++ * And add it to the scanning information. Don't delete the old version ++ * of this LEB as it will be deleted and freed in 'ubi_scan_add_used()'. + */ + err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, + vid_hdr, 0); +@@ -434,7 +424,7 @@ static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi, + + err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, + ubi->vtbl_size); +- if (err == UBI_IO_BITFLIPS || mtd_is_eccerr(err)) ++ if (err == UBI_IO_BITFLIPS || err == -EBADMSG) + /* + * Scrub the PEB later. Note, -EBADMSG indicates an + * uncorrectable ECC error, but we have our own CRC and +@@ -644,7 +634,7 @@ static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, + return -ENOMEM; + + vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; +- vol->alignment = 1; ++ vol->alignment = UBI_LAYOUT_VOLUME_ALIGN; + vol->vol_type = UBI_DYNAMIC_VOLUME; + vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; + memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); +@@ -870,7 +860,7 @@ out_free: + return err; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_vtbl_check - check volume table. +@@ -878,10 +868,13 @@ out_free: + */ + static void paranoid_vtbl_check(const struct ubi_device *ubi) + { ++ if (!ubi->dbg->chk_gen) ++ return; ++ + if (vtbl_check(ubi, ubi->vtbl)) { + ubi_err("paranoid check failed"); + BUG(); + } + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c +index 655bbbe..7c1a9bf 100644 +--- a/drivers/mtd/ubi/wl.c ++++ b/drivers/mtd/ubi/wl.c +@@ -1,4 +1,5 @@ + /* ++ * @ubi: UBI device description object + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify +@@ -161,14 +162,16 @@ struct ubi_work { + int torture; + }; + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); +-static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, ++static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, ++ struct ubi_wl_entry *e, + struct rb_root *root); +-static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); ++static int paranoid_check_in_pq(const struct ubi_device *ubi, ++ struct ubi_wl_entry *e); + #else + #define paranoid_check_ec(ubi, pnum, ec) 0 +-#define paranoid_check_in_wl_tree(e, root) ++#define paranoid_check_in_wl_tree(ubi, e, root) + #define paranoid_check_in_pq(ubi, e) 0 + #endif + +@@ -347,18 +350,19 @@ static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) + /** + * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @root: the RB-tree where to look for +- * @max: highest possible erase counter ++ * @diff: maximum possible difference from the smallest erase counter + * + * This function looks for a wear leveling entry with erase counter closest to +- * @max and less than @max. ++ * min + @diff, where min is the smallest erase counter. + */ +-static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) ++static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int diff) + { + struct rb_node *p; + struct ubi_wl_entry *e; ++ int max; + + e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb); +- max += e->ec; ++ max = e->ec + diff; + + p = root->rb_node; + while (p) { +@@ -386,7 +390,7 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) + */ + int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) + { +- int err, medium_ec; ++ int err; + struct ubi_wl_entry *e, *first, *last; + + ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || +@@ -424,7 +428,7 @@ retry: + * For unknown data we pick a physical eraseblock with medium + * erase counter. But we by no means can pick a physical + * eraseblock with erase counter greater or equivalent than the +- * lowest erase counter plus %WL_FREE_MAX_DIFF. ++ * lowest erase counter plus %WL_FREE_MAX_DIFF/2. + */ + first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, + u.rb); +@@ -433,10 +437,8 @@ retry: + if (last->ec - first->ec < WL_FREE_MAX_DIFF) + e = rb_entry(ubi->free.rb_node, + struct ubi_wl_entry, u.rb); +- else { +- medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; +- e = find_wl_entry(&ubi->free, medium_ec); +- } ++ else ++ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF/2); + break; + case UBI_SHORTTERM: + /* +@@ -449,7 +451,7 @@ retry: + BUG(); + } + +- paranoid_check_in_wl_tree(e, &ubi->free); ++ paranoid_check_in_wl_tree(ubi, e, &ubi->free); + + /* + * Move the physical eraseblock to the protection queue where it will +@@ -613,7 +615,7 @@ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) + list_add_tail(&wrk->list, &ubi->works); + ubi_assert(ubi->works_count >= 0); + ubi->works_count += 1; +- if (ubi->thread_enabled) ++ if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled(ubi)) + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); + } +@@ -712,7 +714,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + e1->ec, e2->ec); + goto out_cancel; + } +- paranoid_check_in_wl_tree(e1, &ubi->used); ++ paranoid_check_in_wl_tree(ubi, e1, &ubi->used); + rb_erase(&e1->u.rb, &ubi->used); + dbg_wl("move PEB %d EC %d to PEB %d EC %d", + e1->pnum, e1->ec, e2->pnum, e2->ec); +@@ -721,12 +723,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + scrubbing = 1; + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); +- paranoid_check_in_wl_tree(e1, &ubi->scrub); ++ paranoid_check_in_wl_tree(ubi, e1, &ubi->scrub); + rb_erase(&e1->u.rb, &ubi->scrub); + dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); + } + +- paranoid_check_in_wl_tree(e2, &ubi->free); ++ paranoid_check_in_wl_tree(ubi, e2, &ubi->free); + rb_erase(&e2->u.rb, &ubi->free); + ubi->move_from = e1; + ubi->move_to = e2; +@@ -792,8 +794,11 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + protect = 1; + goto out_not_moved; + } +- +- if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR || ++ if (err == MOVE_RETRY) { ++ scrubbing = 1; ++ goto out_not_moved; ++ } ++ if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || + err == MOVE_TARGET_RD_ERR) { + /* + * Target PEB had bit-flips or write error - torture it. +@@ -1046,7 +1051,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + + ubi_err("failed to erase PEB %d, error %d", pnum, err); + kfree(wl_wrk); +- kmem_cache_free(ubi_wl_entry_slab, e); + + if (err == -EINTR || err == -ENOMEM || err == -EAGAIN || + err == -EBUSY) { +@@ -1059,14 +1063,16 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + goto out_ro; + } + return err; +- } else if (err != -EIO) { ++ } ++ ++ kmem_cache_free(ubi_wl_entry_slab, e); ++ if (err != -EIO) + /* + * If this is not %-EIO, we have no idea what to do. Scheduling + * this physical eraseblock for erasure again would cause + * errors again and again. Well, lets switch to R/O mode. + */ + goto out_ro; +- } + + /* It is %-EIO, the PEB went bad */ + +@@ -1169,13 +1175,13 @@ retry: + return 0; + } else { + if (in_wl_tree(e, &ubi->used)) { +- paranoid_check_in_wl_tree(e, &ubi->used); ++ paranoid_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); + } else if (in_wl_tree(e, &ubi->scrub)) { +- paranoid_check_in_wl_tree(e, &ubi->scrub); ++ paranoid_check_in_wl_tree(ubi, e, &ubi->scrub); + rb_erase(&e->u.rb, &ubi->scrub); + } else if (in_wl_tree(e, &ubi->erroneous)) { +- paranoid_check_in_wl_tree(e, &ubi->erroneous); ++ paranoid_check_in_wl_tree(ubi, e, &ubi->erroneous); + rb_erase(&e->u.rb, &ubi->erroneous); + ubi->erroneous_peb_count -= 1; + ubi_assert(ubi->erroneous_peb_count >= 0); +@@ -1242,7 +1248,7 @@ retry: + } + + if (in_wl_tree(e, &ubi->used)) { +- paranoid_check_in_wl_tree(e, &ubi->used); ++ paranoid_check_in_wl_tree(ubi, e, &ubi->used); + rb_erase(&e->u.rb, &ubi->used); + } else { + int err; +@@ -1364,7 +1370,7 @@ int ubi_thread(void *u) + + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works) || ubi->ro_mode || +- !ubi->thread_enabled) { ++ !ubi->thread_enabled || ubi_dbg_is_bgt_disabled(ubi)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&ubi->wl_lock); + schedule(); +@@ -1561,7 +1567,7 @@ void ubi_wl_close(struct ubi_device *ubi) + kfree(ubi->lookuptbl); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_ec - make sure that the erase counter of a PEB is correct. +@@ -1570,7 +1576,8 @@ void ubi_wl_close(struct ubi_device *ubi) + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum +- * is equivalent to @ec, and a negative error code if not or if an error occurred. ++ * is equivalent to @ec, and a negative error code if not or if an error ++ * occurred. + */ + static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) + { +@@ -1578,6 +1585,9 @@ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) + long long read_ec; + struct ubi_ec_hdr *ec_hdr; + ++ if (!ubi->dbg->chk_gen) ++ return 0; ++ + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; +@@ -1605,15 +1615,20 @@ out_free: + + /** + * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree. ++ * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it + * is not. + */ +-static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, ++static int paranoid_check_in_wl_tree(const struct ubi_device *ubi, ++ struct ubi_wl_entry *e, + struct rb_root *root) + { ++ if (!ubi->dbg->chk_gen) ++ return 0; ++ + if (in_wl_tree(e, root)) + return 0; + +@@ -1631,11 +1646,15 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + * + * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. + */ +-static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) ++static int paranoid_check_in_pq(const struct ubi_device *ubi, ++ struct ubi_wl_entry *e) + { + struct ubi_wl_entry *p; + int i; + ++ if (!ubi->dbg->chk_gen) ++ return 0; ++ + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) +@@ -1646,4 +1665,5 @@ static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) + ubi_dbg_dump_stack(); + return -EINVAL; + } +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig +index 830e3f7..f8b0160 100644 +--- a/fs/ubifs/Kconfig ++++ b/fs/ubifs/Kconfig +@@ -44,29 +44,17 @@ config UBIFS_FS_ZLIB + + # Debugging-related stuff + config UBIFS_FS_DEBUG +- bool "Enable debugging" ++ bool "Enable debugging support" + depends on UBIFS_FS + select DEBUG_FS +- select KALLSYMS_ALL +- help +- This option enables UBIFS debugging. +- +-config UBIFS_FS_DEBUG_MSG_LVL +- int "Default message level (0 = no extra messages, 3 = lots)" +- depends on UBIFS_FS_DEBUG +- default "0" +- help +- This controls the amount of debugging messages produced by UBIFS. +- If reporting bugs, please try to have available a full dump of the +- messages at level 1 while the misbehaviour was occurring. Level 2 +- may become necessary if level 1 messages were not enough to find the +- bug. Generally Level 3 should be avoided. +- +-config UBIFS_FS_DEBUG_CHKS +- bool "Enable extra checks" +- depends on UBIFS_FS_DEBUG +- help +- If extra checks are enabled UBIFS will check the consistency of its +- internal data structures during operation. However, UBIFS performance +- is dramatically slower when this option is selected especially if the +- file system is large. ++ select KALLSYMS ++ help ++ This option enables UBIFS debugging support. It makes sure various ++ assertions, self-checks, debugging messages and test modes are compiled ++ in (this all is compiled out otherwise). Assertions are light-weight ++ and this option also enables them. Self-checks, debugging messages and ++ test modes are switched off by default. Thus, it is safe and actually ++ recommended to have debugging support enabled, and it should not slow ++ down UBIFS. You can then further enable / disable individual debugging ++ features using UBIFS module parameters and the corresponding sysfs ++ interfaces. +diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c +index c8ff0d1..02c73e7 100644 +--- a/fs/ubifs/budget.c ++++ b/fs/ubifs/budget.c +@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) + long long liab; + + spin_lock(&c->space_lock); +- liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; ++ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; + spin_unlock(&c->space_lock); + return liab; + } +@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) + int idx_lebs; + long long idx_size; + +- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; ++ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; + /* And make sure we have thrice the index size of space reserved */ + idx_size += idx_size << 1; + /* +@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) + * budgeted index space to the size of the current index, multiplies this by 3, + * and makes sure this does not exceed the amount of free LEBs. + * +- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: ++ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. +- * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, +- * the index may be consolidated to take up to @c->min_idx_lebs LEBs. ++ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, ++ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. +@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c) + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " +- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, ++ "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, + rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", +@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c) + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + +- c->min_idx_lebs = min_idx_lebs; ++ c->bi.min_idx_lebs = min_idx_lebs; + return 0; + } + +@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c, + { + int data_growth; + +- data_growth = req->new_ino ? c->inode_budget : 0; ++ data_growth = req->new_ino ? c->bi.inode_budget : 0; + if (req->new_page) +- data_growth += c->page_budget; ++ data_growth += c->bi.page_budget; + if (req->new_dent) +- data_growth += c->dent_budget; ++ data_growth += c->bi.dent_budget; + data_growth += req->new_ino_d; + return data_growth; + } +@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c, + { + int dd_growth; + +- dd_growth = req->dirtied_page ? c->page_budget : 0; ++ dd_growth = req->dirtied_page ? c->bi.page_budget : 0; + + if (req->dirtied_ino) +- dd_growth += c->inode_budget << (req->dirtied_ino - 1); ++ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) +- dd_growth += c->dent_budget; ++ dd_growth += c->bi.dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; + } +@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) + + again: + spin_lock(&c->space_lock); +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); + +- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { ++ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + +- c->budg_idx_growth += idx_growth; +- c->budg_data_growth += data_growth; +- c->budg_dd_growth += dd_growth; ++ c->bi.idx_growth += idx_growth; ++ c->bi.data_growth += data_growth; ++ c->bi.dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { +@@ -484,9 +484,9 @@ again: + } + + /* Restore the old values */ +- c->budg_idx_growth -= idx_growth; +- c->budg_data_growth -= data_growth; +- c->budg_dd_growth -= dd_growth; ++ c->bi.idx_growth -= idx_growth; ++ c->bi.data_growth -= data_growth; ++ c->bi.dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { +@@ -506,9 +506,9 @@ again: + goto again; + } + dbg_budg("FS is full, -ENOSPC"); +- c->nospace = 1; ++ c->bi.nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) +- c->nospace_rp = 1; ++ c->bi.nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); +@@ -523,8 +523,8 @@ again: + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget +- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be +- * zeroed by the commit operation. ++ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed ++ * by the commit operation. + */ + void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) + { +@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) + if (!req->data_growth && !req->dd_growth) + return; + +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); +- c->budg_idx_growth -= req->idx_growth; +- c->budg_uncommitted_idx += req->idx_growth; +- c->budg_data_growth -= req->data_growth; +- c->budg_dd_growth -= req->dd_growth; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); +- +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); +- ubifs_assert(c->min_idx_lebs < c->main_lebs); +- ubifs_assert(!(c->budg_idx_growth & 7)); +- ubifs_assert(!(c->budg_data_growth & 7)); +- ubifs_assert(!(c->budg_dd_growth & 7)); ++ c->bi.idx_growth -= req->idx_growth; ++ c->bi.uncommitted_idx += req->idx_growth; ++ c->bi.data_growth -= req->data_growth; ++ c->bi.dd_growth -= req->dd_growth; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); ++ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); ++ ubifs_assert(!(c->bi.idx_growth & 7)); ++ ubifs_assert(!(c->bi.data_growth & 7)); ++ ubifs_assert(!(c->bi.dd_growth & 7)); + spin_unlock(&c->space_lock); + } + +@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) + { + spin_lock(&c->space_lock); + /* Release the index growth reservation */ +- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ +- c->budg_data_growth -= c->page_budget; ++ c->bi.data_growth -= c->bi.page_budget; + /* Increase the dirty data growth reservation instead */ +- c->budg_dd_growth += c->page_budget; ++ c->bi.dd_growth += c->bi.page_budget; + /* And re-calculate the indexing space reservation */ +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + } + +@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, + + memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ +- req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ++ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); + ubifs_release_budget(c, &req); + } + +@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) + int rsvd_idx_lebs, lebs; + long long available, outstanding, free; + +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- outstanding = c->budg_data_growth + c->budg_dd_growth; +- available = ubifs_calc_available(c, c->min_idx_lebs); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); + + /* + * When reporting free space to user-space, UBIFS guarantees that it is +@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c +index 02429d8..fb3b5c8 100644 +--- a/fs/ubifs/commit.c ++++ b/fs/ubifs/commit.c +@@ -48,6 +48,56 @@ + #include <linux/slab.h> + #include "ubifs.h" + ++/* ++ * nothing_to_commit - check if there is nothing to commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which checks if there is anything to commit. It is ++ * used as an optimization to avoid starting the commit if it is not really ++ * necessary. Indeed, the commit operation always assumes flash I/O (e.g., ++ * writing the commit start node to the log), and it is better to avoid doing ++ * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is ++ * nothing to commit, it is more optimal to avoid any flash I/O. ++ * ++ * This function has to be called with @c->commit_sem locked for writing - ++ * this function does not take LPT/TNC locks because the @c->commit_sem ++ * guarantees that we have exclusive access to the TNC and LPT data structures. ++ * ++ * This function returns %1 if there is nothing to commit and %0 otherwise. ++ */ ++static int nothing_to_commit(struct ubifs_info *c) ++{ ++ /* ++ * During mounting or remounting from R/O mode to R/W mode we may ++ * commit for various recovery-related reasons. ++ */ ++ if (c->mounting || c->remounting_rw) ++ return 0; ++ ++ /* ++ * If the root TNC node is dirty, we definitely have something to ++ * commit. ++ */ ++ if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode)) ++ return 0; ++ ++ /* ++ * Even though the TNC is clean, the LPT tree may have dirty nodes. For ++ * example, this may happen if the budgeting subsystem invoked GC to ++ * make some free space, and the GC found an LEB with only dirty and ++ * free space. In this case GC would just change the lprops of this ++ * LEB (by turning all space into free space) and unmap it. ++ */ ++ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) ++ return 0; ++ ++ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); ++ ubifs_assert(c->dirty_pn_cnt == 0); ++ ubifs_assert(c->dirty_nn_cnt == 0); ++ ++ return 1; ++} ++ + /** + * do_commit - commit the journal. + * @c: UBIFS file-system description object +@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c) + goto out_up; + } + ++ if (nothing_to_commit(c)) { ++ up_write(&c->commit_sem); ++ err = 0; ++ goto out_cancel; ++ } ++ + /* Sync all write buffers (necessary for recovery) */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); +@@ -126,7 +182,7 @@ static int do_commit(struct ubifs_info *c) + c->mst_node->root_len = cpu_to_le32(zroot.len); + c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); + c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); +- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); ++ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); + c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); + c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); + c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); +@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c) + if (err) + goto out; + ++out_cancel: + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_RESTING; + wake_up(&c->cmt_wq); + dbg_cmt("commit end"); + spin_unlock(&c->cs_lock); +- + return 0; + + out_up: +@@ -362,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c) + + spin_lock(&c->cs_lock); + if (c->cmt_state == COMMIT_BROKEN) { +- err = -EINVAL; ++ err = -EROFS; + goto out; + } + +@@ -388,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c) + * re-check it. + */ + if (c->cmt_state == COMMIT_BROKEN) { +- err = -EINVAL; ++ err = -EROFS; + goto out_cmt_unlock; + } + +@@ -520,8 +576,8 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) + struct idx_node *i; + size_t sz; + +- if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) +- goto out; ++ if (!dbg_is_chk_index(c)) ++ return 0; + + INIT_LIST_HEAD(&list); + +diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c +index 0bee4db..1934084 100644 +--- a/fs/ubifs/debug.c ++++ b/fs/ubifs/debug.c +@@ -27,33 +27,16 @@ + * various local functions of those subsystems. + */ + +-#define UBIFS_DBG_PRESERVE_UBI +- +-#include "ubifs.h" + #include <linux/module.h> +-#include <linux/moduleparam.h> + #include <linux/debugfs.h> + #include <linux/math64.h> +-#include <linux/slab.h> ++#include <linux/uaccess.h> ++#include <linux/random.h> ++#include "ubifs.h" + + #ifdef CONFIG_UBIFS_FS_DEBUG + +-DEFINE_SPINLOCK(dbg_lock); +- +-static char dbg_key_buf0[128]; +-static char dbg_key_buf1[128]; +- +-unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +-unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; +-unsigned int ubifs_tst_flags; +- +-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); +-module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); +-module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); +- +-MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); +-MODULE_PARM_DESC(debug_chks, "Debug check flags"); +-MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); ++static DEFINE_SPINLOCK(dbg_lock); + + static const char *get_key_fmt(int fmt) + { +@@ -95,8 +78,30 @@ static const char *get_key_type(int type) + } + } + +-static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, +- char *buffer) ++static const char *get_dent_type(int type) ++{ ++ switch (type) { ++ case UBIFS_ITYPE_REG: ++ return "file"; ++ case UBIFS_ITYPE_DIR: ++ return "dir"; ++ case UBIFS_ITYPE_LNK: ++ return "symlink"; ++ case UBIFS_ITYPE_BLK: ++ return "blkdev"; ++ case UBIFS_ITYPE_CHR: ++ return "char dev"; ++ case UBIFS_ITYPE_FIFO: ++ return "fifo"; ++ case UBIFS_ITYPE_SOCK: ++ return "socket"; ++ default: ++ return "unknown/invalid type"; ++ } ++} ++ ++const char *dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, int len) + { + char *p = buffer; + int type = key_type(c, key); +@@ -104,45 +109,34 @@ static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, + if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) { + switch (type) { + case UBIFS_INO_KEY: +- sprintf(p, "(%lu, %s)", (unsigned long)key_inum(c, key), +- get_key_type(type)); ++ len -= snprintf(p, len, "(%lu, %s)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type)); + break; + case UBIFS_DENT_KEY: + case UBIFS_XENT_KEY: +- sprintf(p, "(%lu, %s, %#08x)", +- (unsigned long)key_inum(c, key), +- get_key_type(type), key_hash(c, key)); ++ len -= snprintf(p, len, "(%lu, %s, %#08x)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_hash(c, key)); + break; + case UBIFS_DATA_KEY: +- sprintf(p, "(%lu, %s, %u)", +- (unsigned long)key_inum(c, key), +- get_key_type(type), key_block(c, key)); ++ len -= snprintf(p, len, "(%lu, %s, %u)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type), key_block(c, key)); + break; + case UBIFS_TRUN_KEY: +- sprintf(p, "(%lu, %s)", +- (unsigned long)key_inum(c, key), +- get_key_type(type)); ++ len -= snprintf(p, len, "(%lu, %s)", ++ (unsigned long)key_inum(c, key), ++ get_key_type(type)); + break; + default: +- sprintf(p, "(bad key type: %#08x, %#08x)", +- key->u32[0], key->u32[1]); ++ len -= snprintf(p, len, "(bad key type: %#08x, %#08x)", ++ key->u32[0], key->u32[1]); + } + } else +- sprintf(p, "bad key format %d", c->key_fmt); +-} +- +-const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key) +-{ +- /* dbg_lock must be held */ +- sprintf_key(c, key, dbg_key_buf0); +- return dbg_key_buf0; +-} +- +-const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key) +-{ +- /* dbg_lock must be held */ +- sprintf_key(c, key, dbg_key_buf1); +- return dbg_key_buf1; ++ len -= snprintf(p, len, "bad key format %d", c->key_fmt); ++ ubifs_assert(len > 0); ++ return p; + } + + const char *dbg_ntype(int type) +@@ -227,53 +221,83 @@ const char *dbg_jhead(int jhead) + + static void dump_ch(const struct ubifs_ch *ch) + { +- printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); +- printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc)); +- printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type, ++ printk(KERN_ERR "\tmagic %#x\n", le32_to_cpu(ch->magic)); ++ printk(KERN_ERR "\tcrc %#x\n", le32_to_cpu(ch->crc)); ++ printk(KERN_ERR "\tnode_type %d (%s)\n", ch->node_type, + dbg_ntype(ch->node_type)); +- printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type, ++ printk(KERN_ERR "\tgroup_type %d (%s)\n", ch->group_type, + dbg_gtype(ch->group_type)); +- printk(KERN_DEBUG "\tsqnum %llu\n", ++ printk(KERN_ERR "\tsqnum %llu\n", + (unsigned long long)le64_to_cpu(ch->sqnum)); +- printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); ++ printk(KERN_ERR "\tlen %u\n", le32_to_cpu(ch->len)); + } + +-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) ++void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode) + { + const struct ubifs_inode *ui = ubifs_inode(inode); ++ struct qstr nm = { .name = NULL }; ++ union ubifs_key key; ++ struct ubifs_dent_node *dent, *pdent = NULL; ++ int count = 2; + +- printk(KERN_DEBUG "Dump in-memory inode:"); +- printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); +- printk(KERN_DEBUG "\tsize %llu\n", ++ printk(KERN_ERR "Dump in-memory inode:"); ++ printk(KERN_ERR "\tinode %lu\n", inode->i_ino); ++ printk(KERN_ERR "\tsize %llu\n", + (unsigned long long)i_size_read(inode)); +- printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); +- printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); +- printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); +- printk(KERN_DEBUG "\tatime %u.%u\n", ++ printk(KERN_ERR "\tnlink %u\n", inode->i_nlink); ++ printk(KERN_ERR "\tuid %u\n", (unsigned int)inode->i_uid); ++ printk(KERN_ERR "\tgid %u\n", (unsigned int)inode->i_gid); ++ printk(KERN_ERR "\tatime %u.%u\n", + (unsigned int)inode->i_atime.tv_sec, + (unsigned int)inode->i_atime.tv_nsec); +- printk(KERN_DEBUG "\tmtime %u.%u\n", ++ printk(KERN_ERR "\tmtime %u.%u\n", + (unsigned int)inode->i_mtime.tv_sec, + (unsigned int)inode->i_mtime.tv_nsec); +- printk(KERN_DEBUG "\tctime %u.%u\n", ++ printk(KERN_ERR "\tctime %u.%u\n", + (unsigned int)inode->i_ctime.tv_sec, + (unsigned int)inode->i_ctime.tv_nsec); +- printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); +- printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); +- printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); +- printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); +- printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); +- printk(KERN_DEBUG "\txattr %u\n", ui->xattr); +- printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); +- printk(KERN_DEBUG "\tsynced_i_size %llu\n", ++ printk(KERN_ERR "\tcreat_sqnum %llu\n", ui->creat_sqnum); ++ printk(KERN_ERR "\txattr_size %u\n", ui->xattr_size); ++ printk(KERN_ERR "\txattr_cnt %u\n", ui->xattr_cnt); ++ printk(KERN_ERR "\txattr_names %u\n", ui->xattr_names); ++ printk(KERN_ERR "\tdirty %u\n", ui->dirty); ++ printk(KERN_ERR "\txattr %u\n", ui->xattr); ++ printk(KERN_ERR "\tbulk_read %u\n", ui->xattr); ++ printk(KERN_ERR "\tsynced_i_size %llu\n", + (unsigned long long)ui->synced_i_size); +- printk(KERN_DEBUG "\tui_size %llu\n", ++ printk(KERN_ERR "\tui_size %llu\n", + (unsigned long long)ui->ui_size); +- printk(KERN_DEBUG "\tflags %d\n", ui->flags); +- printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); +- printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); +- printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); +- printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); ++ printk(KERN_ERR "\tflags %d\n", ui->flags); ++ printk(KERN_ERR "\tcompr_type %d\n", ui->compr_type); ++ printk(KERN_ERR "\tlast_page_read %lu\n", ui->last_page_read); ++ printk(KERN_ERR "\tread_in_a_row %lu\n", ui->read_in_a_row); ++ printk(KERN_ERR "\tdata_len %d\n", ui->data_len); ++ ++ if (!S_ISDIR(inode->i_mode)) ++ return; ++ ++ printk(KERN_ERR "List of directory entries:\n"); ++ ubifs_assert(!mutex_is_locked(&c->tnc_mutex)); ++ ++ lowest_dent_key(c, &key, inode->i_ino); ++ while (1) { ++ dent = ubifs_tnc_next_ent(c, &key, &nm); ++ if (IS_ERR(dent)) { ++ if (PTR_ERR(dent) != -ENOENT) ++ printk(KERN_ERR "error %ld\n", PTR_ERR(dent)); ++ break; ++ } ++ ++ printk(KERN_ERR "\t%d: %s (%s)\n", ++ count++, dent->name, get_dent_type(dent->type)); ++ ++ nm.name = dent->name; ++ nm.len = le16_to_cpu(dent->nlen); ++ kfree(pdent); ++ pdent = dent; ++ key_read(c, &dent->key, &key); ++ } ++ kfree(pdent); + } + + void dbg_dump_node(const struct ubifs_info *c, const void *node) +@@ -281,14 +305,15 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int i, n; + union ubifs_key key; + const struct ubifs_ch *ch = node; ++ char key_buf[DBG_KEY_BUF_LEN]; + +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + + /* If the magic is incorrect, just hexdump the first bytes */ + if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) { +- printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ); +- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ printk(KERN_ERR "Not a node, first %zu bytes:", UBIFS_CH_SZ); ++ print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node, UBIFS_CH_SZ, 1); + return; + } +@@ -301,7 +326,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_pad_node *pad = node; + +- printk(KERN_DEBUG "\tpad_len %u\n", ++ printk(KERN_ERR "\tpad_len %u\n", + le32_to_cpu(pad->pad_len)); + break; + } +@@ -310,48 +335,50 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_sb_node *sup = node; + unsigned int sup_flags = le32_to_cpu(sup->flags); + +- printk(KERN_DEBUG "\tkey_hash %d (%s)\n", ++ printk(KERN_ERR "\tkey_hash %d (%s)\n", + (int)sup->key_hash, get_key_hash(sup->key_hash)); +- printk(KERN_DEBUG "\tkey_fmt %d (%s)\n", ++ printk(KERN_ERR "\tkey_fmt %d (%s)\n", + (int)sup->key_fmt, get_key_fmt(sup->key_fmt)); +- printk(KERN_DEBUG "\tflags %#x\n", sup_flags); +- printk(KERN_DEBUG "\t big_lpt %u\n", ++ printk(KERN_ERR "\tflags %#x\n", sup_flags); ++ printk(KERN_ERR "\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); +- printk(KERN_DEBUG "\tmin_io_size %u\n", ++ printk(KERN_ERR "\t space_fixup %u\n", ++ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); ++ printk(KERN_ERR "\tmin_io_size %u\n", + le32_to_cpu(sup->min_io_size)); +- printk(KERN_DEBUG "\tleb_size %u\n", ++ printk(KERN_ERR "\tleb_size %u\n", + le32_to_cpu(sup->leb_size)); +- printk(KERN_DEBUG "\tleb_cnt %u\n", ++ printk(KERN_ERR "\tleb_cnt %u\n", + le32_to_cpu(sup->leb_cnt)); +- printk(KERN_DEBUG "\tmax_leb_cnt %u\n", ++ printk(KERN_ERR "\tmax_leb_cnt %u\n", + le32_to_cpu(sup->max_leb_cnt)); +- printk(KERN_DEBUG "\tmax_bud_bytes %llu\n", ++ printk(KERN_ERR "\tmax_bud_bytes %llu\n", + (unsigned long long)le64_to_cpu(sup->max_bud_bytes)); +- printk(KERN_DEBUG "\tlog_lebs %u\n", ++ printk(KERN_ERR "\tlog_lebs %u\n", + le32_to_cpu(sup->log_lebs)); +- printk(KERN_DEBUG "\tlpt_lebs %u\n", ++ printk(KERN_ERR "\tlpt_lebs %u\n", + le32_to_cpu(sup->lpt_lebs)); +- printk(KERN_DEBUG "\torph_lebs %u\n", ++ printk(KERN_ERR "\torph_lebs %u\n", + le32_to_cpu(sup->orph_lebs)); +- printk(KERN_DEBUG "\tjhead_cnt %u\n", ++ printk(KERN_ERR "\tjhead_cnt %u\n", + le32_to_cpu(sup->jhead_cnt)); +- printk(KERN_DEBUG "\tfanout %u\n", ++ printk(KERN_ERR "\tfanout %u\n", + le32_to_cpu(sup->fanout)); +- printk(KERN_DEBUG "\tlsave_cnt %u\n", ++ printk(KERN_ERR "\tlsave_cnt %u\n", + le32_to_cpu(sup->lsave_cnt)); +- printk(KERN_DEBUG "\tdefault_compr %u\n", ++ printk(KERN_ERR "\tdefault_compr %u\n", + (int)le16_to_cpu(sup->default_compr)); +- printk(KERN_DEBUG "\trp_size %llu\n", ++ printk(KERN_ERR "\trp_size %llu\n", + (unsigned long long)le64_to_cpu(sup->rp_size)); +- printk(KERN_DEBUG "\trp_uid %u\n", ++ printk(KERN_ERR "\trp_uid %u\n", + le32_to_cpu(sup->rp_uid)); +- printk(KERN_DEBUG "\trp_gid %u\n", ++ printk(KERN_ERR "\trp_gid %u\n", + le32_to_cpu(sup->rp_gid)); +- printk(KERN_DEBUG "\tfmt_version %u\n", ++ printk(KERN_ERR "\tfmt_version %u\n", + le32_to_cpu(sup->fmt_version)); +- printk(KERN_DEBUG "\ttime_gran %u\n", ++ printk(KERN_ERR "\ttime_gran %u\n", + le32_to_cpu(sup->time_gran)); +- printk(KERN_DEBUG "\tUUID %pUB\n", ++ printk(KERN_ERR "\tUUID %pUB\n", + sup->uuid); + break; + } +@@ -359,61 +386,61 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_mst_node *mst = node; + +- printk(KERN_DEBUG "\thighest_inum %llu\n", ++ printk(KERN_ERR "\thighest_inum %llu\n", + (unsigned long long)le64_to_cpu(mst->highest_inum)); +- printk(KERN_DEBUG "\tcommit number %llu\n", ++ printk(KERN_ERR "\tcommit number %llu\n", + (unsigned long long)le64_to_cpu(mst->cmt_no)); +- printk(KERN_DEBUG "\tflags %#x\n", ++ printk(KERN_ERR "\tflags %#x\n", + le32_to_cpu(mst->flags)); +- printk(KERN_DEBUG "\tlog_lnum %u\n", ++ printk(KERN_ERR "\tlog_lnum %u\n", + le32_to_cpu(mst->log_lnum)); +- printk(KERN_DEBUG "\troot_lnum %u\n", ++ printk(KERN_ERR "\troot_lnum %u\n", + le32_to_cpu(mst->root_lnum)); +- printk(KERN_DEBUG "\troot_offs %u\n", ++ printk(KERN_ERR "\troot_offs %u\n", + le32_to_cpu(mst->root_offs)); +- printk(KERN_DEBUG "\troot_len %u\n", ++ printk(KERN_ERR "\troot_len %u\n", + le32_to_cpu(mst->root_len)); +- printk(KERN_DEBUG "\tgc_lnum %u\n", ++ printk(KERN_ERR "\tgc_lnum %u\n", + le32_to_cpu(mst->gc_lnum)); +- printk(KERN_DEBUG "\tihead_lnum %u\n", ++ printk(KERN_ERR "\tihead_lnum %u\n", + le32_to_cpu(mst->ihead_lnum)); +- printk(KERN_DEBUG "\tihead_offs %u\n", ++ printk(KERN_ERR "\tihead_offs %u\n", + le32_to_cpu(mst->ihead_offs)); +- printk(KERN_DEBUG "\tindex_size %llu\n", ++ printk(KERN_ERR "\tindex_size %llu\n", + (unsigned long long)le64_to_cpu(mst->index_size)); +- printk(KERN_DEBUG "\tlpt_lnum %u\n", ++ printk(KERN_ERR "\tlpt_lnum %u\n", + le32_to_cpu(mst->lpt_lnum)); +- printk(KERN_DEBUG "\tlpt_offs %u\n", ++ printk(KERN_ERR "\tlpt_offs %u\n", + le32_to_cpu(mst->lpt_offs)); +- printk(KERN_DEBUG "\tnhead_lnum %u\n", ++ printk(KERN_ERR "\tnhead_lnum %u\n", + le32_to_cpu(mst->nhead_lnum)); +- printk(KERN_DEBUG "\tnhead_offs %u\n", ++ printk(KERN_ERR "\tnhead_offs %u\n", + le32_to_cpu(mst->nhead_offs)); +- printk(KERN_DEBUG "\tltab_lnum %u\n", ++ printk(KERN_ERR "\tltab_lnum %u\n", + le32_to_cpu(mst->ltab_lnum)); +- printk(KERN_DEBUG "\tltab_offs %u\n", ++ printk(KERN_ERR "\tltab_offs %u\n", + le32_to_cpu(mst->ltab_offs)); +- printk(KERN_DEBUG "\tlsave_lnum %u\n", ++ printk(KERN_ERR "\tlsave_lnum %u\n", + le32_to_cpu(mst->lsave_lnum)); +- printk(KERN_DEBUG "\tlsave_offs %u\n", ++ printk(KERN_ERR "\tlsave_offs %u\n", + le32_to_cpu(mst->lsave_offs)); +- printk(KERN_DEBUG "\tlscan_lnum %u\n", ++ printk(KERN_ERR "\tlscan_lnum %u\n", + le32_to_cpu(mst->lscan_lnum)); +- printk(KERN_DEBUG "\tleb_cnt %u\n", ++ printk(KERN_ERR "\tleb_cnt %u\n", + le32_to_cpu(mst->leb_cnt)); +- printk(KERN_DEBUG "\tempty_lebs %u\n", ++ printk(KERN_ERR "\tempty_lebs %u\n", + le32_to_cpu(mst->empty_lebs)); +- printk(KERN_DEBUG "\tidx_lebs %u\n", ++ printk(KERN_ERR "\tidx_lebs %u\n", + le32_to_cpu(mst->idx_lebs)); +- printk(KERN_DEBUG "\ttotal_free %llu\n", ++ printk(KERN_ERR "\ttotal_free %llu\n", + (unsigned long long)le64_to_cpu(mst->total_free)); +- printk(KERN_DEBUG "\ttotal_dirty %llu\n", ++ printk(KERN_ERR "\ttotal_dirty %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dirty)); +- printk(KERN_DEBUG "\ttotal_used %llu\n", ++ printk(KERN_ERR "\ttotal_used %llu\n", + (unsigned long long)le64_to_cpu(mst->total_used)); +- printk(KERN_DEBUG "\ttotal_dead %llu\n", ++ printk(KERN_ERR "\ttotal_dead %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dead)); +- printk(KERN_DEBUG "\ttotal_dark %llu\n", ++ printk(KERN_ERR "\ttotal_dark %llu\n", + (unsigned long long)le64_to_cpu(mst->total_dark)); + break; + } +@@ -421,11 +448,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_ref_node *ref = node; + +- printk(KERN_DEBUG "\tlnum %u\n", ++ printk(KERN_ERR "\tlnum %u\n", + le32_to_cpu(ref->lnum)); +- printk(KERN_DEBUG "\toffs %u\n", ++ printk(KERN_ERR "\toffs %u\n", + le32_to_cpu(ref->offs)); +- printk(KERN_DEBUG "\tjhead %u\n", ++ printk(KERN_ERR "\tjhead %u\n", + le32_to_cpu(ref->jhead)); + break; + } +@@ -434,39 +461,40 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_ino_node *ino = node; + + key_read(c, &ino->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tcreat_sqnum %llu\n", + (unsigned long long)le64_to_cpu(ino->creat_sqnum)); +- printk(KERN_DEBUG "\tsize %llu\n", ++ printk(KERN_ERR "\tsize %llu\n", + (unsigned long long)le64_to_cpu(ino->size)); +- printk(KERN_DEBUG "\tnlink %u\n", ++ printk(KERN_ERR "\tnlink %u\n", + le32_to_cpu(ino->nlink)); +- printk(KERN_DEBUG "\tatime %lld.%u\n", ++ printk(KERN_ERR "\tatime %lld.%u\n", + (long long)le64_to_cpu(ino->atime_sec), + le32_to_cpu(ino->atime_nsec)); +- printk(KERN_DEBUG "\tmtime %lld.%u\n", ++ printk(KERN_ERR "\tmtime %lld.%u\n", + (long long)le64_to_cpu(ino->mtime_sec), + le32_to_cpu(ino->mtime_nsec)); +- printk(KERN_DEBUG "\tctime %lld.%u\n", ++ printk(KERN_ERR "\tctime %lld.%u\n", + (long long)le64_to_cpu(ino->ctime_sec), + le32_to_cpu(ino->ctime_nsec)); +- printk(KERN_DEBUG "\tuid %u\n", ++ printk(KERN_ERR "\tuid %u\n", + le32_to_cpu(ino->uid)); +- printk(KERN_DEBUG "\tgid %u\n", ++ printk(KERN_ERR "\tgid %u\n", + le32_to_cpu(ino->gid)); +- printk(KERN_DEBUG "\tmode %u\n", ++ printk(KERN_ERR "\tmode %u\n", + le32_to_cpu(ino->mode)); +- printk(KERN_DEBUG "\tflags %#x\n", ++ printk(KERN_ERR "\tflags %#x\n", + le32_to_cpu(ino->flags)); +- printk(KERN_DEBUG "\txattr_cnt %u\n", ++ printk(KERN_ERR "\txattr_cnt %u\n", + le32_to_cpu(ino->xattr_cnt)); +- printk(KERN_DEBUG "\txattr_size %u\n", ++ printk(KERN_ERR "\txattr_size %u\n", + le32_to_cpu(ino->xattr_size)); +- printk(KERN_DEBUG "\txattr_names %u\n", ++ printk(KERN_ERR "\txattr_names %u\n", + le32_to_cpu(ino->xattr_names)); +- printk(KERN_DEBUG "\tcompr_type %#x\n", ++ printk(KERN_ERR "\tcompr_type %#x\n", + (int)le16_to_cpu(ino->compr_type)); +- printk(KERN_DEBUG "\tdata len %u\n", ++ printk(KERN_ERR "\tdata len %u\n", + le32_to_cpu(ino->data_len)); + break; + } +@@ -477,15 +505,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int nlen = le16_to_cpu(dent->nlen); + + key_read(c, &dent->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tinum %llu\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tinum %llu\n", + (unsigned long long)le64_to_cpu(dent->inum)); +- printk(KERN_DEBUG "\ttype %d\n", (int)dent->type); +- printk(KERN_DEBUG "\tnlen %d\n", nlen); +- printk(KERN_DEBUG "\tname "); ++ printk(KERN_ERR "\ttype %d\n", (int)dent->type); ++ printk(KERN_ERR "\tnlen %d\n", nlen); ++ printk(KERN_ERR "\tname "); + + if (nlen > UBIFS_MAX_NLEN) +- printk(KERN_DEBUG "(bad name length, not printing, " ++ printk(KERN_ERR "(bad name length, not printing, " + "bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) +@@ -501,15 +530,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; + + key_read(c, &dn->key, &key); +- printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key)); +- printk(KERN_DEBUG "\tsize %u\n", ++ printk(KERN_ERR "\tkey %s\n", ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ++ printk(KERN_ERR "\tsize %u\n", + le32_to_cpu(dn->size)); +- printk(KERN_DEBUG "\tcompr_typ %d\n", ++ printk(KERN_ERR "\tcompr_typ %d\n", + (int)le16_to_cpu(dn->compr_type)); +- printk(KERN_DEBUG "\tdata size %d\n", ++ printk(KERN_ERR "\tdata size %d\n", + dlen); +- printk(KERN_DEBUG "\tdata:\n"); +- print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1, ++ printk(KERN_ERR "\tdata:\n"); ++ print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, + (void *)&dn->data, dlen, 0); + break; + } +@@ -517,11 +547,11 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_trun_node *trun = node; + +- printk(KERN_DEBUG "\tinum %u\n", ++ printk(KERN_ERR "\tinum %u\n", + le32_to_cpu(trun->inum)); +- printk(KERN_DEBUG "\told_size %llu\n", ++ printk(KERN_ERR "\told_size %llu\n", + (unsigned long long)le64_to_cpu(trun->old_size)); +- printk(KERN_DEBUG "\tnew_size %llu\n", ++ printk(KERN_ERR "\tnew_size %llu\n", + (unsigned long long)le64_to_cpu(trun->new_size)); + break; + } +@@ -530,19 +560,21 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + const struct ubifs_idx_node *idx = node; + + n = le16_to_cpu(idx->child_cnt); +- printk(KERN_DEBUG "\tchild_cnt %d\n", n); +- printk(KERN_DEBUG "\tlevel %d\n", ++ printk(KERN_ERR "\tchild_cnt %d\n", n); ++ printk(KERN_ERR "\tlevel %d\n", + (int)le16_to_cpu(idx->level)); +- printk(KERN_DEBUG "\tBranches:\n"); ++ printk(KERN_ERR "\tBranches:\n"); + + for (i = 0; i < n && i < c->fanout - 1; i++) { + const struct ubifs_branch *br; + + br = ubifs_idx_branch(c, idx, i); + key_read(c, &br->key, &key); +- printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n", ++ printk(KERN_ERR "\t%d: LEB %d:%d len %d key %s\n", + i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs), +- le32_to_cpu(br->len), DBGKEY(&key)); ++ le32_to_cpu(br->len), ++ dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + } + break; + } +@@ -552,20 +584,20 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + { + const struct ubifs_orph_node *orph = node; + +- printk(KERN_DEBUG "\tcommit number %llu\n", ++ printk(KERN_ERR "\tcommit number %llu\n", + (unsigned long long) + le64_to_cpu(orph->cmt_no) & LLONG_MAX); +- printk(KERN_DEBUG "\tlast node flag %llu\n", ++ printk(KERN_ERR "\tlast node flag %llu\n", + (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); + n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; +- printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); ++ printk(KERN_ERR "\t%d orphan inode numbers:\n", n); + for (i = 0; i < n; i++) +- printk(KERN_DEBUG "\t ino %llu\n", ++ printk(KERN_ERR "\t ino %llu\n", + (unsigned long long)le64_to_cpu(orph->inos[i])); + break; + } + default: +- printk(KERN_DEBUG "node type %d was not recognized\n", ++ printk(KERN_ERR "node type %d was not recognized\n", + (int)ch->node_type); + } + spin_unlock(&dbg_lock); +@@ -574,16 +606,16 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) + void dbg_dump_budget_req(const struct ubifs_budget_req *req) + { + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n", ++ printk(KERN_ERR "Budgeting request: new_ino %d, dirtied_ino %d\n", + req->new_ino, req->dirtied_ino); +- printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n", ++ printk(KERN_ERR "\tnew_ino_d %d, dirtied_ino_d %d\n", + req->new_ino_d, req->dirtied_ino_d); +- printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n", ++ printk(KERN_ERR "\tnew_page %d, dirtied_page %d\n", + req->new_page, req->dirtied_page); +- printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n", ++ printk(KERN_ERR "\tnew_dent %d, mod_dent %d\n", + req->new_dent, req->mod_dent); +- printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth); +- printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n", ++ printk(KERN_ERR "\tidx_growth %d\n", req->idx_growth); ++ printk(KERN_ERR "\tdata_growth %d dd_growth %d\n", + req->data_growth, req->dd_growth); + spin_unlock(&dbg_lock); + } +@@ -591,18 +623,18 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) + void dbg_dump_lstats(const struct ubifs_lp_stats *lst) + { + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " ++ printk(KERN_ERR "(pid %d) Lprops statistics: empty_lebs %d, " + "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); +- printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " ++ printk(KERN_ERR "\ttaken_empty_lebs %d, total_free %lld, " + "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, + lst->total_dirty); +- printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, " ++ printk(KERN_ERR "\ttotal_used %lld, total_dark %lld, " + "total_dead %lld\n", lst->total_used, lst->total_dark, + lst->total_dead); + spin_unlock(&dbg_lock); + } + +-void dbg_dump_budg(struct ubifs_info *c) ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) + { + int i; + struct rb_node *rb; +@@ -610,51 +642,69 @@ void dbg_dump_budg(struct ubifs_info *c) + struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + +- ubifs_assert(spin_is_locked(&c->space_lock)); ++ spin_lock(&c->space_lock); + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " +- "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, +- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); +- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " +- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, +- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, +- c->freeable_cnt); +- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " +- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, +- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); +- printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " ++ printk(KERN_ERR "(pid %d) Budgeting info: data budget sum %lld, " ++ "total budget sum %lld\n", current->pid, ++ bi->data_growth + bi->dd_growth, ++ bi->data_growth + bi->dd_growth + bi->idx_growth); ++ printk(KERN_ERR "\tbudg_data_growth %lld, budg_dd_growth %lld, " ++ "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, ++ bi->idx_growth); ++ printk(KERN_ERR "\tmin_idx_lebs %d, old_idx_sz %llu, " ++ "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, ++ bi->uncommitted_idx); ++ printk(KERN_ERR "\tpage_budget %d, inode_budget %d, dent_budget %d\n", ++ bi->page_budget, bi->inode_budget, bi->dent_budget); ++ printk(KERN_ERR "\tnospace %u, nospace_rp %u\n", ++ bi->nospace, bi->nospace_rp); ++ printk(KERN_ERR "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", ++ c->dark_wm, c->dead_wm, c->max_idx_node_sz); ++ ++ if (bi != &c->bi) ++ /* ++ * If we are dumping saved budgeting data, do not print ++ * additional information which is about the current state, not ++ * the old one which corresponded to the saved budgeting data. ++ */ ++ goto out_unlock; ++ ++ printk(KERN_ERR "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", ++ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); ++ printk(KERN_ERR "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); +- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", +- c->dark_wm, c->dead_wm, c->max_idx_node_sz); +- printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", ++ printk(KERN_ERR "\tgc_lnum %d, ihead_lnum %d\n", + c->gc_lnum, c->ihead_lnum); ++ + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) +- printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", ++ printk(KERN_ERR "\tjhead %s\t LEB %d\n", + dbg_jhead(c->jheads[i].wbuf.jhead), + c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); +- printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); ++ printk(KERN_ERR "\tbud LEB %d\n", bud->lnum); + } + list_for_each_entry(bud, &c->old_buds, list) +- printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum); ++ printk(KERN_ERR "\told bud LEB %d\n", bud->lnum); + list_for_each_entry(idx_gc, &c->idx_gc, list) +- printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n", ++ printk(KERN_ERR "\tGC'ed idx LEB %d unmap %d\n", + idx_gc->lnum, idx_gc->unmap); +- printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); ++ printk(KERN_ERR "\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ +- available = ubifs_calc_available(c, c->min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + free = ubifs_get_free_space_nolock(c); +- printk(KERN_DEBUG "Budgeting predictions:\n"); +- printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", ++ printk(KERN_ERR "Budgeting predictions:\n"); ++ printk(KERN_ERR "\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); ++out_unlock: + spin_unlock(&dbg_lock); ++ spin_unlock(&c->space_lock); + } + + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) +@@ -670,11 +720,11 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) + dark = ubifs_calc_dark(c, spc); + + if (lp->flags & LPROPS_INDEX) +- printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d flags %#x (", lp->lnum, lp->free, + lp->dirty, c->leb_size - spc, spc, lp->flags); + else +- printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ printk(KERN_ERR "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " + "flags %#-4x (", lp->lnum, lp->free, lp->dirty, + c->leb_size - spc, spc, dark, dead, +@@ -729,7 +779,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) + if (bud->lnum == lp->lnum) { + int head = 0; + for (i = 0; i < c->jhead_cnt; i++) { +- if (lp->lnum == c->jheads[i].wbuf.lnum) { ++ /* ++ * Note, if we are in R/O mode or in the middle ++ * of mounting/re-mounting, the write-buffers do ++ * not exist. ++ */ ++ if (c->jheads && ++ lp->lnum == c->jheads[i].wbuf.lnum) { + printk(KERN_CONT ", jhead %s", + dbg_jhead(i)); + head = 1; +@@ -751,7 +807,7 @@ void dbg_dump_lprops(struct ubifs_info *c) + struct ubifs_lprops lp; + struct ubifs_lp_stats lst; + +- printk(KERN_DEBUG "(pid %d) start dumping LEB properties\n", ++ printk(KERN_ERR "(pid %d) start dumping LEB properties\n", + current->pid); + ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); +@@ -763,7 +819,7 @@ void dbg_dump_lprops(struct ubifs_info *c) + + dbg_dump_lprop(c, &lp); + } +- printk(KERN_DEBUG "(pid %d) finish dumping LEB properties\n", ++ printk(KERN_ERR "(pid %d) finish dumping LEB properties\n", + current->pid); + } + +@@ -772,69 +828,96 @@ void dbg_dump_lpt_info(struct ubifs_info *c) + int i; + + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) dumping LPT information\n", current->pid); +- printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); +- printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); +- printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); +- printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); +- printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); +- printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); +- printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); +- printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); +- printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); +- printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); +- printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); +- printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); +- printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); +- printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); +- printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); +- printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); +- printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); +- printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); +- printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); +- printk(KERN_DEBUG "\tLPT head is at %d:%d\n", ++ printk(KERN_ERR "(pid %d) dumping LPT information\n", current->pid); ++ printk(KERN_ERR "\tlpt_sz: %lld\n", c->lpt_sz); ++ printk(KERN_ERR "\tpnode_sz: %d\n", c->pnode_sz); ++ printk(KERN_ERR "\tnnode_sz: %d\n", c->nnode_sz); ++ printk(KERN_ERR "\tltab_sz: %d\n", c->ltab_sz); ++ printk(KERN_ERR "\tlsave_sz: %d\n", c->lsave_sz); ++ printk(KERN_ERR "\tbig_lpt: %d\n", c->big_lpt); ++ printk(KERN_ERR "\tlpt_hght: %d\n", c->lpt_hght); ++ printk(KERN_ERR "\tpnode_cnt: %d\n", c->pnode_cnt); ++ printk(KERN_ERR "\tnnode_cnt: %d\n", c->nnode_cnt); ++ printk(KERN_ERR "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); ++ printk(KERN_ERR "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); ++ printk(KERN_ERR "\tlsave_cnt: %d\n", c->lsave_cnt); ++ printk(KERN_ERR "\tspace_bits: %d\n", c->space_bits); ++ printk(KERN_ERR "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); ++ printk(KERN_ERR "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); ++ printk(KERN_ERR "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); ++ printk(KERN_ERR "\tpcnt_bits: %d\n", c->pcnt_bits); ++ printk(KERN_ERR "\tlnum_bits: %d\n", c->lnum_bits); ++ printk(KERN_ERR "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); ++ printk(KERN_ERR "\tLPT head is at %d:%d\n", + c->nhead_lnum, c->nhead_offs); +- printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", ++ printk(KERN_ERR "\tLPT ltab is at %d:%d\n", + c->ltab_lnum, c->ltab_offs); + if (c->big_lpt) +- printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", ++ printk(KERN_ERR "\tLPT lsave is at %d:%d\n", + c->lsave_lnum, c->lsave_offs); + for (i = 0; i < c->lpt_lebs; i++) +- printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " ++ printk(KERN_ERR "\tLPT LEB %d free %d dirty %d tgc %d " + "cmt %d\n", i + c->lpt_first, c->ltab[i].free, + c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); + spin_unlock(&dbg_lock); + } + ++void dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs) ++{ ++ struct ubifs_scan_node *snod; ++ ++ printk(KERN_ERR "(pid %d) start dumping scanned data from LEB %d:%d\n", ++ current->pid, sleb->lnum, offs); ++ ++ list_for_each_entry(snod, &sleb->nodes, list) { ++ cond_resched(); ++ printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", sleb->lnum, ++ snod->offs, snod->len); ++ dbg_dump_node(c, snod->node); ++ } ++} ++ + void dbg_dump_leb(const struct ubifs_info *c, int lnum) + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; ++ void *buf; + +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + +- printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", ++ printk(KERN_ERR "(pid %d) start dumping LEB %d\n", + current->pid, lnum); +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); ++ ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for dumping LEB %d", lnum); ++ return; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); +- return; ++ goto out; + } + +- printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, ++ printk(KERN_ERR "LEB %d has %d nodes ending at %d\n", lnum, + sleb->nodes_cnt, sleb->endpt); + + list_for_each_entry(snod, &sleb->nodes, list) { + cond_resched(); +- printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum, ++ printk(KERN_ERR "Dumping node at LEB %d:%d len %d\n", lnum, + snod->offs, snod->len); + dbg_dump_node(c, snod->node); + } + +- printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", ++ printk(KERN_ERR "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); + ubifs_scan_destroy(sleb); ++ ++out: ++ vfree(buf); + return; + } + +@@ -843,6 +926,7 @@ void dbg_dump_znode(const struct ubifs_info *c, + { + int n; + const struct ubifs_zbranch *zbr; ++ char key_buf[DBG_KEY_BUF_LEN]; + + spin_lock(&dbg_lock); + if (znode->parent) +@@ -850,7 +934,7 @@ void dbg_dump_znode(const struct ubifs_info *c, + else + zbr = &c->zroot; + +- printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d" ++ printk(KERN_ERR "znode %p, LEB %d:%d len %d parent %p iip %d level %d" + " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs, + zbr->len, znode->parent, znode->iip, znode->level, + znode->child_cnt, znode->flags); +@@ -860,19 +944,23 @@ void dbg_dump_znode(const struct ubifs_info *c, + return; + } + +- printk(KERN_DEBUG "zbranches:\n"); ++ printk(KERN_ERR "zbranches:\n"); + for (n = 0; n < znode->child_cnt; n++) { + zbr = &znode->zbranch[n]; + if (znode->level > 0) +- printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key " ++ printk(KERN_ERR "\t%d: znode %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, +- DBGKEY(&zbr->key)); ++ dbg_snprintf_key(c, &zbr->key, ++ key_buf, ++ DBG_KEY_BUF_LEN)); + else +- printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key " ++ printk(KERN_ERR "\t%d: LNC %p LEB %d:%d len %d key " + "%s\n", n, zbr->znode, zbr->lnum, + zbr->offs, zbr->len, +- DBGKEY(&zbr->key)); ++ dbg_snprintf_key(c, &zbr->key, ++ key_buf, ++ DBG_KEY_BUF_LEN)); + } + spin_unlock(&dbg_lock); + } +@@ -881,16 +969,16 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) + { + int i; + +- printk(KERN_DEBUG "(pid %d) start dumping heap cat %d (%d elements)\n", ++ printk(KERN_ERR "(pid %d) start dumping heap cat %d (%d elements)\n", + current->pid, cat, heap->cnt); + for (i = 0; i < heap->cnt; i++) { + struct ubifs_lprops *lprops = heap->arr[i]; + +- printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d " ++ printk(KERN_ERR "\t%d. LEB %d hpos %d free %d dirty %d " + "flags %d\n", i, lprops->lnum, lprops->hpos, + lprops->free, lprops->dirty, lprops->flags); + } +- printk(KERN_DEBUG "(pid %d) finish dumping heap\n", current->pid); ++ printk(KERN_ERR "(pid %d) finish dumping heap\n", current->pid); + } + + void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, +@@ -898,15 +986,15 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, + { + int i; + +- printk(KERN_DEBUG "(pid %d) dumping pnode:\n", current->pid); +- printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", ++ printk(KERN_ERR "(pid %d) dumping pnode:\n", current->pid); ++ printk(KERN_ERR "\taddress %zx parent %zx cnext %zx\n", + (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); +- printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", ++ printk(KERN_ERR "\tflags %lu iip %d level %d num %d\n", + pnode->flags, iip, pnode->level, pnode->num); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + struct ubifs_lprops *lp = &pnode->lprops[i]; + +- printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n", ++ printk(KERN_ERR "\t%d: free %d dirty %d flags %d lnum %d\n", + i, lp->free, lp->dirty, lp->flags, lp->lnum); + } + } +@@ -916,20 +1004,20 @@ void dbg_dump_tnc(struct ubifs_info *c) + struct ubifs_znode *znode; + int level; + +- printk(KERN_DEBUG "\n"); +- printk(KERN_DEBUG "(pid %d) start dumping TNC tree\n", current->pid); ++ printk(KERN_ERR "\n"); ++ printk(KERN_ERR "(pid %d) start dumping TNC tree\n", current->pid); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); + level = znode->level; +- printk(KERN_DEBUG "== Level %d ==\n", level); ++ printk(KERN_ERR "== Level %d ==\n", level); + while (znode) { + if (level != znode->level) { + level = znode->level; +- printk(KERN_DEBUG "== Level %d ==\n", level); ++ printk(KERN_ERR "== Level %d ==\n", level); + } + dbg_dump_znode(c, znode); + znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); + } +- printk(KERN_DEBUG "(pid %d) finish dumping TNC tree\n", current->pid); ++ printk(KERN_ERR "(pid %d) finish dumping TNC tree\n", current->pid); + } + + static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode, +@@ -961,11 +1049,41 @@ void dbg_dump_index(struct ubifs_info *c) + void dbg_save_space_info(struct ubifs_info *c) + { + struct ubifs_debug_info *d = c->dbg; +- +- ubifs_get_lp_stats(c, &d->saved_lst); ++ int freeable_cnt; + + spin_lock(&c->space_lock); ++ memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); ++ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); ++ d->saved_idx_gc_cnt = c->idx_gc_cnt; ++ ++ /* ++ * We use a dirty hack here and zero out @c->freeable_cnt, because it ++ * affects the free space calculations, and UBIFS might not know about ++ * all freeable eraseblocks. Indeed, we know about freeable eraseblocks ++ * only when we read their lprops, and we do this only lazily, upon the ++ * need. So at any given point of time @c->freeable_cnt might be not ++ * exactly accurate. ++ * ++ * Just one example about the issue we hit when we did not zero ++ * @c->freeable_cnt. ++ * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the ++ * amount of free space in @d->saved_free ++ * 2. We re-mount R/W, which makes UBIFS to read the "lsave" ++ * information from flash, where we cache LEBs from various ++ * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' ++ * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' ++ * -> 'ubifs_get_pnode()' -> 'update_cats()' ++ * -> 'ubifs_add_to_cat()'). ++ * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt ++ * becomes %1. ++ * 4. We calculate the amount of free space when the re-mount is ++ * finished in 'dbg_check_space_info()' and it does not match ++ * @d->saved_free. ++ */ ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; + d->saved_free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); + } + +@@ -982,12 +1100,15 @@ int dbg_check_space_info(struct ubifs_info *c) + { + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; +- long long avail, free; ++ long long free; ++ int freeable_cnt; + + spin_lock(&c->space_lock); +- avail = ubifs_calc_available(c, c->min_idx_lebs); ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; ++ free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); +- free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", +@@ -1000,20 +1121,21 @@ int dbg_check_space_info(struct ubifs_info *c) + out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); +- ubifs_get_lp_stats(c, &lst); +- ++ ubifs_msg("saved budgeting info dump"); ++ dbg_dump_budg(c, &d->saved_bi); ++ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); + ubifs_msg("current lprops statistics dump"); ++ ubifs_get_lp_stats(c, &lst); + dbg_dump_lstats(&lst); +- +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_msg("current budgeting info dump"); ++ dbg_dump_budg(c, &c->bi); + dump_stack(); + return -EINVAL; + } + + /** + * dbg_check_synced_i_size - check synchronized inode size. ++ * @c: UBIFS file-system description object + * @inode: inode to check + * + * If inode is clean, synchronized inode size has to be equivalent to current +@@ -1021,12 +1143,12 @@ out: + * has to be locked). Returns %0 if synchronized inode size if correct, and + * %-EINVAL if not. + */ +-int dbg_check_synced_i_size(struct inode *inode) ++int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode) + { + int err = 0; + struct ubifs_inode *ui = ubifs_inode(inode); + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + if (!S_ISREG(inode->i_mode)) + return 0; +@@ -1059,7 +1181,7 @@ int dbg_check_synced_i_size(struct inode *inode) + * Note, it is good idea to make sure the @dir->i_mutex is locked before + * calling this function. + */ +-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) ++int dbg_check_dir(struct ubifs_info *c, const struct inode *dir) + { + unsigned int nlink = 2; + union ubifs_key key; +@@ -1067,7 +1189,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) + struct qstr nm = { .name = NULL }; + loff_t size = UBIFS_INO_NODE_SZ; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + if (!S_ISDIR(dir->i_mode)) +@@ -1101,12 +1223,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) + "but calculated size is %llu", dir->i_ino, + (unsigned long long)i_size_read(dir), + (unsigned long long)size); ++ dbg_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } + if (dir->i_nlink != nlink) { + ubifs_err("directory inode %lu has nlink %u, but calculated " + "nlink is %u", dir->i_ino, dir->i_nlink, nlink); ++ dbg_dump_inode(c, dir); + dump_stack(); + return -EINVAL; + } +@@ -1133,6 +1257,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + int err, nlen1, nlen2, cmp; + struct ubifs_dent_node *dent1, *dent2; + union ubifs_key key; ++ char key_buf[DBG_KEY_BUF_LEN]; + + ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key)); + dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS); +@@ -1163,9 +1288,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + key_read(c, &dent1->key, &key); + if (keys_cmp(c, &zbr1->key, &key)) { + dbg_err("1st entry at %d:%d has key %s", zbr1->lnum, +- zbr1->offs, DBGKEY(&key)); ++ zbr1->offs, dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_err("but it should have key %s according to tnc", +- DBGKEY(&zbr1->key)); ++ dbg_snprintf_key(c, &zbr1->key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_dump_node(c, dent1); + goto out_free; + } +@@ -1173,9 +1300,11 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + key_read(c, &dent2->key, &key); + if (keys_cmp(c, &zbr2->key, &key)) { + dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum, +- zbr1->offs, DBGKEY(&key)); ++ zbr1->offs, dbg_snprintf_key(c, &key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_err("but it should have key %s according to tnc", +- DBGKEY(&zbr2->key)); ++ dbg_snprintf_key(c, &zbr2->key, key_buf, ++ DBG_KEY_BUF_LEN)); + dbg_dump_node(c, dent2); + goto out_free; + } +@@ -1192,7 +1321,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, + dbg_err("2 xent/dent nodes with the same name"); + else + dbg_err("bad order of colliding key %s", +- DBGKEY(&key)); ++ dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); + + ubifs_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs); + dbg_dump_node(c, dent1); +@@ -1423,7 +1552,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra) + long clean_cnt = 0, dirty_cnt = 0; + int err, last; + +- if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) ++ if (!dbg_is_chk_index(c)) + return 0; + + ubifs_assert(mutex_is_locked(&c->tnc_mutex)); +@@ -1670,7 +1799,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size) + int err; + long long calc = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) ++ if (!dbg_is_chk_index(c)) + return 0; + + err = dbg_walk_index(c, NULL, add_size, &calc); +@@ -1751,6 +1880,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); ++ struct inode *inode; ++ struct ubifs_inode *ui; + + p = &fsckd->inodes.rb_node; + while (*p) { +@@ -1774,19 +1905,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, + if (!fscki) + return ERR_PTR(-ENOMEM); + ++ inode = ilookup(c->vfs_sb, inum); ++ + fscki->inum = inum; +- fscki->nlink = le32_to_cpu(ino->nlink); +- fscki->size = le64_to_cpu(ino->size); +- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); +- fscki->xattr_sz = le32_to_cpu(ino->xattr_size); +- fscki->xattr_nms = le32_to_cpu(ino->xattr_names); +- fscki->mode = le32_to_cpu(ino->mode); ++ /* ++ * If the inode is present in the VFS inode cache, use it instead of ++ * the on-flash inode which might be out-of-date. E.g., the size might ++ * be out-of-date. If we do not do this, the following may happen, for ++ * example: ++ * 1. A power cut happens ++ * 2. We mount the file-system R/O, the replay process fixes up the ++ * inode size in the VFS cache, but on on-flash. ++ * 3. 'check_leaf()' fails because it hits a data node beyond inode ++ * size. ++ */ ++ if (!inode) { ++ fscki->nlink = le32_to_cpu(ino->nlink); ++ fscki->size = le64_to_cpu(ino->size); ++ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); ++ fscki->xattr_sz = le32_to_cpu(ino->xattr_size); ++ fscki->xattr_nms = le32_to_cpu(ino->xattr_names); ++ fscki->mode = le32_to_cpu(ino->mode); ++ } else { ++ ui = ubifs_inode(inode); ++ fscki->nlink = inode->i_nlink; ++ fscki->size = inode->i_size; ++ fscki->xattr_cnt = ui->xattr_cnt; ++ fscki->xattr_sz = ui->xattr_size; ++ fscki->xattr_nms = ui->xattr_names; ++ fscki->mode = inode->i_mode; ++ iput(inode); ++ } ++ + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } ++ + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); ++ + return fscki; + } + +@@ -2217,7 +2375,7 @@ int dbg_check_filesystem(struct ubifs_info *c) + int err; + struct fsck_data fsckd; + +- if (!(ubifs_chk_flags & UBIFS_CHK_FS)) ++ if (!dbg_is_chk_fs(c)) + return 0; + + fsckd.inodes = RB_ROOT; +@@ -2252,7 +2410,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { +@@ -2319,7 +2477,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { +@@ -2379,7 +2537,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) + hashb = key_block(c, &sb->key); + + if (hasha > hashb) { +- ubifs_err("larger hash %u goes before %u", hasha, hashb); ++ ubifs_err("larger hash %u goes before %u", ++ hasha, hashb); + goto error_dump; + } + } +@@ -2395,393 +2554,351 @@ error_dump: + return 0; + } + +-static int invocation_cnt; +- +-int dbg_force_in_the_gaps(void) +-{ +- if (!dbg_force_in_the_gaps_enabled) +- return 0; +- /* Force in-the-gaps every 8th commit */ +- return !((invocation_cnt++) & 0x7); +-} +- +-/* Failure mode for recovery testing */ +- +-#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d)) +- +-struct failure_mode_info { +- struct list_head list; +- struct ubifs_info *c; +-}; +- +-static LIST_HEAD(fmi_list); +-static DEFINE_SPINLOCK(fmi_lock); +- +-static unsigned int next; +- +-static int simple_rand(void) +-{ +- if (next == 0) +- next = current->pid; +- next = next * 1103515245 + 12345; +- return (next >> 16) & 32767; +-} +- +-static void failure_mode_init(struct ubifs_info *c) +-{ +- struct failure_mode_info *fmi; +- +- fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS); +- if (!fmi) { +- ubifs_err("Failed to register failure mode - no memory"); +- return; +- } +- fmi->c = c; +- spin_lock(&fmi_lock); +- list_add_tail(&fmi->list, &fmi_list); +- spin_unlock(&fmi_lock); +-} +- +-static void failure_mode_exit(struct ubifs_info *c) +-{ +- struct failure_mode_info *fmi, *tmp; +- +- spin_lock(&fmi_lock); +- list_for_each_entry_safe(fmi, tmp, &fmi_list, list) +- if (fmi->c == c) { +- list_del(&fmi->list); +- kfree(fmi); +- } +- spin_unlock(&fmi_lock); +-} +- +-static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc) ++static inline int chance(unsigned int n, unsigned int out_of) + { +- struct failure_mode_info *fmi; +- +- spin_lock(&fmi_lock); +- list_for_each_entry(fmi, &fmi_list, list) +- if (fmi->c->ubi == desc) { +- struct ubifs_info *c = fmi->c; ++ return !!((random32() % out_of) + 1 <= n); + +- spin_unlock(&fmi_lock); +- return c; +- } +- spin_unlock(&fmi_lock); +- return NULL; + } + +-static int in_failure_mode(struct ubi_volume_desc *desc) ++static int power_cut_emulated(struct ubifs_info *c, int lnum, int write) + { +- struct ubifs_info *c = dbg_find_info(desc); +- +- if (c && dbg_failure_mode) +- return c->dbg->failure_mode; +- return 0; +-} ++ struct ubifs_debug_info *d = c->dbg; + +-static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) +-{ +- struct ubifs_info *c = dbg_find_info(desc); +- struct ubifs_debug_info *d; ++ ubifs_assert(dbg_is_tst_rcvry(c)); + +- if (!c || !dbg_failure_mode) +- return 0; +- d = c->dbg; +- if (d->failure_mode) +- return 1; +- if (!d->fail_cnt) { +- /* First call - decide delay to failure */ ++ if (!d->pc_cnt) { ++ /* First call - decide delay to the power cut */ + if (chance(1, 2)) { +- unsigned int delay = 1 << (simple_rand() >> 11); ++ unsigned long delay; + + if (chance(1, 2)) { +- d->fail_delay = 1; +- d->fail_timeout = jiffies + +- msecs_to_jiffies(delay); +- dbg_rcvry("failing after %ums", delay); ++ d->pc_delay = 1; ++ /* Fail withing 1 minute */ ++ delay = random32() % 60000; ++ d->pc_timeout = jiffies; ++ d->pc_timeout += msecs_to_jiffies(delay); ++ ubifs_warn("failing after %lums", delay); + } else { +- d->fail_delay = 2; +- d->fail_cnt_max = delay; +- dbg_rcvry("failing after %u calls", delay); ++ d->pc_delay = 2; ++ delay = random32() % 10000; ++ /* Fail within 10000 operations */ ++ d->pc_cnt_max = delay; ++ ubifs_warn("failing after %lu calls", delay); + } + } +- d->fail_cnt += 1; ++ ++ d->pc_cnt += 1; + } ++ + /* Determine if failure delay has expired */ +- if (d->fail_delay == 1) { +- if (time_before(jiffies, d->fail_timeout)) ++ if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout)) + return 0; +- } else if (d->fail_delay == 2) +- if (d->fail_cnt++ < d->fail_cnt_max) ++ if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max) + return 0; ++ + if (lnum == UBIFS_SB_LNUM) { +- if (write) { +- if (chance(1, 2)) +- return 0; +- } else if (chance(19, 20)) ++ if (write && chance(1, 2)) + return 0; +- dbg_rcvry("failing in super block LEB %d", lnum); ++ if (chance(19, 20)) ++ return 0; ++ ubifs_warn("failing in super block LEB %d", lnum); + } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { + if (chance(19, 20)) + return 0; +- dbg_rcvry("failing in master LEB %d", lnum); ++ ubifs_warn("failing in master LEB %d", lnum); + } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { +- if (write) { +- if (chance(99, 100)) +- return 0; +- } else if (chance(399, 400)) ++ if (write && chance(99, 100)) ++ return 0; ++ if (chance(399, 400)) + return 0; +- dbg_rcvry("failing in log LEB %d", lnum); ++ ubifs_warn("failing in log LEB %d", lnum); + } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { +- if (write) { +- if (chance(7, 8)) +- return 0; +- } else if (chance(19, 20)) ++ if (write && chance(7, 8)) + return 0; +- dbg_rcvry("failing in LPT LEB %d", lnum); ++ if (chance(19, 20)) ++ return 0; ++ ubifs_warn("failing in LPT LEB %d", lnum); + } else if (lnum >= c->orph_first && lnum <= c->orph_last) { +- if (write) { +- if (chance(1, 2)) +- return 0; +- } else if (chance(9, 10)) ++ if (write && chance(1, 2)) + return 0; +- dbg_rcvry("failing in orphan LEB %d", lnum); ++ if (chance(9, 10)) ++ return 0; ++ ubifs_warn("failing in orphan LEB %d", lnum); + } else if (lnum == c->ihead_lnum) { + if (chance(99, 100)) + return 0; +- dbg_rcvry("failing in index head LEB %d", lnum); ++ ubifs_warn("failing in index head LEB %d", lnum); + } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { + if (chance(9, 10)) + return 0; +- dbg_rcvry("failing in GC head LEB %d", lnum); ++ ubifs_warn("failing in GC head LEB %d", lnum); + } else if (write && !RB_EMPTY_ROOT(&c->buds) && + !ubifs_search_bud(c, lnum)) { + if (chance(19, 20)) + return 0; +- dbg_rcvry("failing in non-bud LEB %d", lnum); ++ ubifs_warn("failing in non-bud LEB %d", lnum); + } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || + c->cmt_state == COMMIT_RUNNING_REQUIRED) { + if (chance(999, 1000)) + return 0; +- dbg_rcvry("failing in bud LEB %d commit running", lnum); ++ ubifs_warn("failing in bud LEB %d commit running", lnum); + } else { + if (chance(9999, 10000)) + return 0; +- dbg_rcvry("failing in bud LEB %d commit not running", lnum); ++ ubifs_warn("failing in bud LEB %d commit not running", lnum); + } +- ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); +- d->failure_mode = 1; ++ ++ d->pc_happened = 1; ++ ubifs_warn("========== Power cut emulated =========="); + dump_stack(); + return 1; + } + +-static void cut_data(const void *buf, int len) ++static void cut_data(const void *buf, unsigned int len) + { +- int flen, i; ++ unsigned int from, to, i, ffs = chance(1, 2); + unsigned char *p = (void *)buf; + +- flen = (len * (long long)simple_rand()) >> 15; +- for (i = flen; i < len; i++) +- p[i] = 0xff; +-} ++ from = random32() % (len + 1); ++ if (chance(1, 2)) ++ to = random32() % (len - from + 1); ++ else ++ to = len; + +-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, +- int len, int check) +-{ +- if (in_failure_mode(desc)) +- return -EIO; +- return ubi_leb_read(desc, lnum, buf, offset, len, check); ++ if (from < to) ++ ubifs_warn("filled bytes %u-%u with %s", from, to - 1, ++ ffs ? "0xFFs" : "random data"); ++ ++ if (ffs) ++ for (i = from; i < to; i++) ++ p[i] = 0xFF; ++ else ++ for (i = from; i < to; i++) ++ p[i] = random32() % 0x100; + } + +-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int offset, int len, int dtype) ++int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, ++ int offs, int len, int dtype) + { + int err, failing; + +- if (in_failure_mode(desc)) +- return -EIO; +- failing = do_fail(desc, lnum, 1); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ ++ failing = power_cut_emulated(c, lnum, 1); + if (failing) + cut_data(buf, len); +- err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) + return err; + if (failing) +- return -EIO; ++ return -EROFS; + return 0; + } + +-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, ++int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, + int len, int dtype) + { + int err; + +- if (do_fail(desc, lnum, 1)) +- return -EIO; +- err = ubi_leb_change(desc, lnum, buf, len, dtype); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 1)) ++ return -EROFS; ++ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) + return err; +- if (do_fail(desc, lnum, 1)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 1)) ++ return -EROFS; + return 0; + } + +-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) ++int dbg_leb_unmap(struct ubifs_info *c, int lnum) + { + int err; + +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_erase(desc, lnum); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; ++ err = ubi_leb_unmap(c->ubi, lnum); + if (err) + return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; + return 0; + } + +-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) ++int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype) + { + int err; + +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_unmap(desc, lnum); ++ if (c->dbg->pc_happened) ++ return -EROFS; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; ++ err = ubi_leb_map(c->ubi, lnum, dtype); + if (err) + return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; ++ if (power_cut_emulated(c, lnum, 0)) ++ return -EROFS; + return 0; + } + +-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) +-{ +- if (in_failure_mode(desc)) +- return -EIO; +- return ubi_is_mapped(desc, lnum); +-} ++/* ++ * Root directory for UBIFS stuff in debugfs. Contains sub-directories which ++ * contain the stuff specific to particular file-system mounts. ++ */ ++static struct dentry *dfs_rootdir; + +-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) ++static int dfs_file_open(struct inode *inode, struct file *file) + { +- int err; +- +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- err = ubi_leb_map(desc, lnum, dtype); +- if (err) +- return err; +- if (do_fail(desc, lnum, 0)) +- return -EIO; +- return 0; ++ file->private_data = inode->i_private; ++ return nonseekable_open(inode, file); + } + + /** +- * ubifs_debugging_init - initialize UBIFS debugging. +- * @c: UBIFS file-system description object ++ * provide_user_output - provide output to the user reading a debugfs file. ++ * @val: boolean value for the answer ++ * @u: the buffer to store the answer at ++ * @count: size of the buffer ++ * @ppos: position in the @u output buffer + * +- * This function initializes debugging-related data for the file system. +- * Returns zero in case of success and a negative error code in case of ++ * This is a simple helper function which stores @val boolean value in the user ++ * buffer when the user reads one of UBIFS debugfs files. Returns amount of ++ * bytes written to @u in case of success and a negative error code in case of + * failure. + */ +-int ubifs_debugging_init(struct ubifs_info *c) ++static int provide_user_output(int val, char __user *u, size_t count, ++ loff_t *ppos) + { +- c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); +- if (!c->dbg) +- return -ENOMEM; +- +- c->dbg->buf = vmalloc(c->leb_size); +- if (!c->dbg->buf) +- goto out; ++ char buf[3]; + +- failure_mode_init(c); +- return 0; ++ if (val) ++ buf[0] = '1'; ++ else ++ buf[0] = '0'; ++ buf[1] = '\n'; ++ buf[2] = 0x00; + +-out: +- kfree(c->dbg); +- return -ENOMEM; ++ return simple_read_from_buffer(u, count, ppos, buf, 2); + } + +-/** +- * ubifs_debugging_exit - free debugging data. +- * @c: UBIFS file-system description object +- */ +-void ubifs_debugging_exit(struct ubifs_info *c) ++static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count, ++ loff_t *ppos) + { +- failure_mode_exit(c); +- vfree(c->dbg->buf); +- kfree(c->dbg); +-} ++ struct dentry *dent = file->f_path.dentry; ++ struct ubifs_info *c = file->private_data; ++ struct ubifs_debug_info *d = c->dbg; ++ int val; ++ ++ if (dent == d->dfs_chk_gen) ++ val = d->chk_gen; ++ else if (dent == d->dfs_chk_index) ++ val = d->chk_index; ++ else if (dent == d->dfs_chk_orph) ++ val = d->chk_orph; ++ else if (dent == d->dfs_chk_lprops) ++ val = d->chk_lprops; ++ else if (dent == d->dfs_chk_fs) ++ val = d->chk_fs; ++ else if (dent == d->dfs_tst_rcvry) ++ val = d->tst_rcvry; ++ else ++ return -EINVAL; + +-/* +- * Root directory for UBIFS stuff in debugfs. Contains sub-directories which +- * contain the stuff specific to particular file-system mounts. +- */ +-static struct dentry *dfs_rootdir; ++ return provide_user_output(val, u, count, ppos); ++} + + /** +- * dbg_debugfs_init - initialize debugfs file-system. ++ * interpret_user_input - interpret user debugfs file input. ++ * @u: user-provided buffer with the input ++ * @count: buffer size + * +- * UBIFS uses debugfs file-system to expose various debugging knobs to +- * user-space. This function creates "ubifs" directory in the debugfs +- * file-system. Returns zero in case of success and a negative error code in +- * case of failure. ++ * This is a helper function which interpret user input to a boolean UBIFS ++ * debugfs file. Returns %0 or %1 in case of success and a negative error code ++ * in case of failure. + */ +-int dbg_debugfs_init(void) ++static int interpret_user_input(const char __user *u, size_t count) + { +- dfs_rootdir = debugfs_create_dir("ubifs", NULL); +- if (IS_ERR(dfs_rootdir)) { +- int err = PTR_ERR(dfs_rootdir); +- ubifs_err("cannot create \"ubifs\" debugfs directory, " +- "error %d\n", err); +- return err; +- } ++ size_t buf_size; ++ char buf[8]; + +- return 0; +-} ++ buf_size = min_t(size_t, count, (sizeof(buf) - 1)); ++ if (copy_from_user(buf, u, buf_size)) ++ return -EFAULT; + +-/** +- * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. +- */ +-void dbg_debugfs_exit(void) +-{ +- debugfs_remove(dfs_rootdir); +-} ++ if (buf[0] == '1') ++ return 1; ++ else if (buf[0] == '0') ++ return 0; + +-static int open_debugfs_file(struct inode *inode, struct file *file) +-{ +- file->private_data = inode->i_private; +- return 0; ++ return -EINVAL; + } + +-static ssize_t write_debugfs_file(struct file *file, const char __user *buf, +- size_t count, loff_t *ppos) ++static ssize_t dfs_file_write(struct file *file, const char __user *u, ++ size_t count, loff_t *ppos) + { + struct ubifs_info *c = file->private_data; + struct ubifs_debug_info *d = c->dbg; ++ struct dentry *dent = file->f_path.dentry; ++ int val; + +- if (file->f_path.dentry == d->dfs_dump_lprops) ++ /* ++ * TODO: this is racy - the file-system might have already been ++ * unmounted and we'd oops in this case. The plan is to fix it with ++ * help of 'iterate_supers_type()' which we should have in v3.0: when ++ * a debugfs opened, we rember FS's UUID in file->private_data. Then ++ * whenever we access the FS via a debugfs file, we iterate all UBIFS ++ * superblocks and fine the one with the same UUID, and take the ++ * locking right. ++ * ++ * The other way to go suggested by Al Viro is to create a separate ++ * 'ubifs-debug' file-system instead. ++ */ ++ if (file->f_path.dentry == d->dfs_dump_lprops) { + dbg_dump_lprops(c); +- else if (file->f_path.dentry == d->dfs_dump_budg) { +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); +- } else if (file->f_path.dentry == d->dfs_dump_tnc) { ++ return count; ++ } ++ if (file->f_path.dentry == d->dfs_dump_budg) { ++ dbg_dump_budg(c, &c->bi); ++ return count; ++ } ++ if (file->f_path.dentry == d->dfs_dump_tnc) { + mutex_lock(&c->tnc_mutex); + dbg_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); +- } else ++ return count; ++ } ++ ++ val = interpret_user_input(u, count); ++ if (val < 0) ++ return val; ++ ++ if (dent == d->dfs_chk_gen) ++ d->chk_gen = val; ++ else if (dent == d->dfs_chk_index) ++ d->chk_index = val; ++ else if (dent == d->dfs_chk_orph) ++ d->chk_orph = val; ++ else if (dent == d->dfs_chk_lprops) ++ d->chk_lprops = val; ++ else if (dent == d->dfs_chk_fs) ++ d->chk_fs = val; ++ else if (dent == d->dfs_tst_rcvry) ++ d->tst_rcvry = val; ++ else + return -EINVAL; + +- *ppos += count; + return count; + } + + static const struct file_operations dfs_fops = { +- .open = open_debugfs_file, +- .write = write_debugfs_file, ++ .open = dfs_file_open, ++ .read = dfs_file_read, ++ .write = dfs_file_write, + .owner = THIS_MODULE, +- .llseek = default_llseek, ++ .llseek = no_llseek, + }; + + /** +@@ -2798,46 +2915,94 @@ static const struct file_operations dfs_fops = { + */ + int dbg_debugfs_init_fs(struct ubifs_info *c) + { +- int err; ++ int err, n; + const char *fname; + struct dentry *dent; + struct ubifs_debug_info *d = c->dbg; + +- sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); +- d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); +- if (IS_ERR(d->dfs_dir)) { +- err = PTR_ERR(d->dfs_dir); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- d->dfs_dir_name, err); ++ n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, ++ c->vi.ubi_num, c->vi.vol_id); ++ if (n == UBIFS_DFS_DIR_LEN) { ++ /* The array size is too small */ ++ fname = UBIFS_DFS_DIR_NAME; ++ dent = ERR_PTR(-EINVAL); + goto out; + } + ++ fname = d->dfs_dir_name; ++ dent = debugfs_create_dir(fname, dfs_rootdir); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out; ++ d->dfs_dir = dent; ++ + fname = "dump_lprops"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_lprops = dent; + + fname = "dump_budg"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_budg = dent; + + fname = "dump_tnc"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) + goto out_remove; + d->dfs_dump_tnc = dent; + ++ fname = "chk_general"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_gen = dent; ++ ++ fname = "chk_index"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_index = dent; ++ ++ fname = "chk_orphans"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_orph = dent; ++ ++ fname = "chk_lprops"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_lprops = dent; ++ ++ fname = "chk_fs"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_chk_fs = dent; ++ ++ fname = "tst_recovery"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c, ++ &dfs_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ d->dfs_tst_rcvry = dent; ++ + return 0; + + out_remove: +- err = PTR_ERR(dent); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- fname, err); + debugfs_remove_recursive(d->dfs_dir); + out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", ++ fname, err); + return err; + } + +@@ -2850,4 +3015,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c) + debugfs_remove_recursive(c->dbg->dfs_dir); + } + ++struct ubifs_global_debug_info ubifs_dbg; ++ ++static struct dentry *dfs_chk_gen; ++static struct dentry *dfs_chk_index; ++static struct dentry *dfs_chk_orph; ++static struct dentry *dfs_chk_lprops; ++static struct dentry *dfs_chk_fs; ++static struct dentry *dfs_tst_rcvry; ++ ++static ssize_t dfs_global_file_read(struct file *file, char __user *u, ++ size_t count, loff_t *ppos) ++{ ++ struct dentry *dent = file->f_path.dentry; ++ int val; ++ ++ if (dent == dfs_chk_gen) ++ val = ubifs_dbg.chk_gen; ++ else if (dent == dfs_chk_index) ++ val = ubifs_dbg.chk_index; ++ else if (dent == dfs_chk_orph) ++ val = ubifs_dbg.chk_orph; ++ else if (dent == dfs_chk_lprops) ++ val = ubifs_dbg.chk_lprops; ++ else if (dent == dfs_chk_fs) ++ val = ubifs_dbg.chk_fs; ++ else if (dent == dfs_tst_rcvry) ++ val = ubifs_dbg.tst_rcvry; ++ else ++ return -EINVAL; ++ ++ return provide_user_output(val, u, count, ppos); ++} ++ ++static ssize_t dfs_global_file_write(struct file *file, const char __user *u, ++ size_t count, loff_t *ppos) ++{ ++ struct dentry *dent = file->f_path.dentry; ++ int val; ++ ++ val = interpret_user_input(u, count); ++ if (val < 0) ++ return val; ++ ++ if (dent == dfs_chk_gen) ++ ubifs_dbg.chk_gen = val; ++ else if (dent == dfs_chk_index) ++ ubifs_dbg.chk_index = val; ++ else if (dent == dfs_chk_orph) ++ ubifs_dbg.chk_orph = val; ++ else if (dent == dfs_chk_lprops) ++ ubifs_dbg.chk_lprops = val; ++ else if (dent == dfs_chk_fs) ++ ubifs_dbg.chk_fs = val; ++ else if (dent == dfs_tst_rcvry) ++ ubifs_dbg.tst_rcvry = val; ++ else ++ return -EINVAL; ++ ++ return count; ++} ++ ++static const struct file_operations dfs_global_fops = { ++ .read = dfs_global_file_read, ++ .write = dfs_global_file_write, ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++}; ++ ++/** ++ * dbg_debugfs_init - initialize debugfs file-system. ++ * ++ * UBIFS uses debugfs file-system to expose various debugging knobs to ++ * user-space. This function creates "ubifs" directory in the debugfs ++ * file-system. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++int dbg_debugfs_init(void) ++{ ++ int err; ++ const char *fname; ++ struct dentry *dent; ++ ++ fname = "ubifs"; ++ dent = debugfs_create_dir(fname, NULL); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out; ++ dfs_rootdir = dent; ++ ++ fname = "chk_general"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_gen = dent; ++ ++ fname = "chk_index"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_index = dent; ++ ++ fname = "chk_orphans"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_orph = dent; ++ ++ fname = "chk_lprops"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_lprops = dent; ++ ++ fname = "chk_fs"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_chk_fs = dent; ++ ++ fname = "tst_recovery"; ++ dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL, ++ &dfs_global_fops); ++ if (IS_ERR_OR_NULL(dent)) ++ goto out_remove; ++ dfs_tst_rcvry = dent; ++ ++ return 0; ++ ++out_remove: ++ debugfs_remove_recursive(dfs_rootdir); ++out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n", ++ fname, err); ++ return err; ++} ++ ++/** ++ * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. ++ */ ++void dbg_debugfs_exit(void) ++{ ++ debugfs_remove_recursive(dfs_rootdir); ++} ++ ++/** ++ * ubifs_debugging_init - initialize UBIFS debugging. ++ * @c: UBIFS file-system description object ++ * ++ * This function initializes debugging-related data for the file system. ++ * Returns zero in case of success and a negative error code in case of ++ * failure. ++ */ ++int ubifs_debugging_init(struct ubifs_info *c) ++{ ++ c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); ++ if (!c->dbg) ++ return -ENOMEM; ++ ++ return 0; ++} ++ ++/** ++ * ubifs_debugging_exit - free debugging data. ++ * @c: UBIFS file-system description object ++ */ ++void ubifs_debugging_exit(struct ubifs_info *c) ++{ ++ kfree(c->dbg); ++} ++ + #endif /* CONFIG_UBIFS_FS_DEBUG */ +diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h +index 555ba13..13917ce 100644 +--- a/fs/ubifs/debug.h ++++ b/fs/ubifs/debug.h +@@ -31,17 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, + + #ifdef CONFIG_UBIFS_FS_DEBUG + ++/* ++ * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi" ++ * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte. ++ */ ++#define UBIFS_DFS_DIR_NAME "ubi%d_%d" ++#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1) ++ + /** + * ubifs_debug_info - per-FS debugging information. +- * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' +- * @failure_mode: failure mode for recovery testing +- * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls +- * @fail_timeout: time in jiffies when delay of failure mode expires +- * @fail_cnt: current number of calls to failure mode I/O functions +- * @fail_cnt_max: number of calls by which to delay failure mode ++ * ++ * @pc_happened: non-zero if an emulated power cut happened ++ * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls ++ * @pc_timeout: time in jiffies when delay of failure mode expires ++ * @pc_cnt: current number of calls to failure mode I/O functions ++ * @pc_cnt_max: number of calls by which to delay failure mode ++ * + * @chk_lpt_sz: used by LPT tree size checker + * @chk_lpt_sz2: used by LPT tree size checker + * @chk_lpt_wastage: used by LPT tree size checker +@@ -51,24 +59,40 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, + * @new_ihead_offs: used by debugging to check @c->ihead_offs + * + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') +- * @saved_free: saved free space (used by 'dbg_save_space_info()') ++ * @saved_bi: saved budgeting information ++ * @saved_free: saved amount of free space ++ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt ++ * ++ * @chk_gen: if general extra checks are enabled ++ * @chk_index: if index xtra checks are enabled ++ * @chk_orph: if orphans extra checks are enabled ++ * @chk_lprops: if lprops extra checks are enabled ++ * @chk_fs: if UBIFS contents extra checks are enabled ++ * @tst_rcvry: if UBIFS recovery testing mode enabled + * +- * dfs_dir_name: name of debugfs directory containing this file-system's files +- * dfs_dir: direntry object of the file-system debugfs directory +- * dfs_dump_lprops: "dump lprops" debugfs knob +- * dfs_dump_budg: "dump budgeting information" debugfs knob +- * dfs_dump_tnc: "dump TNC" debugfs knob ++ * @dfs_dir_name: name of debugfs directory containing this file-system's files ++ * @dfs_dir: direntry object of the file-system debugfs directory ++ * @dfs_dump_lprops: "dump lprops" debugfs knob ++ * @dfs_dump_budg: "dump budgeting information" debugfs knob ++ * @dfs_dump_tnc: "dump TNC" debugfs knob ++ * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks ++ * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks ++ * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks ++ * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks ++ * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks ++ * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing + */ + struct ubifs_debug_info { +- void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; +- int failure_mode; +- int fail_delay; +- unsigned long fail_timeout; +- unsigned int fail_cnt; +- unsigned int fail_cnt_max; ++ ++ int pc_happened; ++ int pc_delay; ++ unsigned long pc_timeout; ++ unsigned int pc_cnt; ++ unsigned int pc_cnt_max; ++ + long long chk_lpt_sz; + long long chk_lpt_sz2; + long long chk_lpt_wastage; +@@ -78,13 +102,47 @@ struct ubifs_debug_info { + int new_ihead_offs; + + struct ubifs_lp_stats saved_lst; ++ struct ubifs_budg_info saved_bi; + long long saved_free; ++ int saved_idx_gc_cnt; ++ ++ unsigned int chk_gen:1; ++ unsigned int chk_index:1; ++ unsigned int chk_orph:1; ++ unsigned int chk_lprops:1; ++ unsigned int chk_fs:1; ++ unsigned int tst_rcvry:1; + +- char dfs_dir_name[100]; ++ char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1]; + struct dentry *dfs_dir; + struct dentry *dfs_dump_lprops; + struct dentry *dfs_dump_budg; + struct dentry *dfs_dump_tnc; ++ struct dentry *dfs_chk_gen; ++ struct dentry *dfs_chk_index; ++ struct dentry *dfs_chk_orph; ++ struct dentry *dfs_chk_lprops; ++ struct dentry *dfs_chk_fs; ++ struct dentry *dfs_tst_rcvry; ++}; ++ ++/** ++ * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information. ++ * ++ * @chk_gen: if general extra checks are enabled ++ * @chk_index: if index xtra checks are enabled ++ * @chk_orph: if orphans extra checks are enabled ++ * @chk_lprops: if lprops extra checks are enabled ++ * @chk_fs: if UBIFS contents extra checks are enabled ++ * @tst_rcvry: if UBIFS recovery testing mode enabled ++ */ ++struct ubifs_global_debug_info { ++ unsigned int chk_gen:1; ++ unsigned int chk_index:1; ++ unsigned int chk_orph:1; ++ unsigned int chk_lprops:1; ++ unsigned int chk_fs:1; ++ unsigned int tst_rcvry:1; + }; + + #define ubifs_assert(expr) do { \ +@@ -103,173 +161,90 @@ struct ubifs_debug_info { + } \ + } while (0) + +-#define dbg_dump_stack() do { \ +- if (!dbg_failure_mode) \ +- dump_stack(); \ +-} while (0) +- +-/* Generic debugging messages */ +-#define dbg_msg(fmt, ...) do { \ +- spin_lock(&dbg_lock); \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ +- __func__, ##__VA_ARGS__); \ +- spin_unlock(&dbg_lock); \ +-} while (0) +- +-#define dbg_do_msg(typ, fmt, ...) do { \ +- if (ubifs_msg_flags & typ) \ +- dbg_msg(fmt, ##__VA_ARGS__); \ +-} while (0) ++#define dbg_dump_stack() dump_stack() + + #define dbg_err(fmt, ...) do { \ +- spin_lock(&dbg_lock); \ + ubifs_err(fmt, ##__VA_ARGS__); \ +- spin_unlock(&dbg_lock); \ + } while (0) + +-const char *dbg_key_str0(const struct ubifs_info *c, +- const union ubifs_key *key); +-const char *dbg_key_str1(const struct ubifs_info *c, +- const union ubifs_key *key); ++#define ubifs_dbg_msg(type, fmt, ...) \ ++ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__) + +-/* +- * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message +- * macros. +- */ +-#define DBGKEY(key) dbg_key_str0(c, (key)) +-#define DBGKEY1(key) dbg_key_str1(c, (key)) ++#define DBG_KEY_BUF_LEN 32 ++#define ubifs_dbg_msg_key(type, key, fmt, ...) do { \ ++ char __tmp_key_buf[DBG_KEY_BUF_LEN]; \ ++ pr_debug("UBIFS DBG " type ": " fmt "%s\n", ##__VA_ARGS__, \ ++ dbg_snprintf_key(c, key, __tmp_key_buf, DBG_KEY_BUF_LEN)); \ ++} while (0) + +-/* General messages */ +-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) ++/* Just a debugging messages not related to any specific UBIFS subsystem */ ++#define dbg_msg(fmt, ...) \ ++ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ ++ __func__, ##__VA_ARGS__) + ++/* General messages */ ++#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) + /* Additional journal messages */ +-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) +- ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) ++#define dbg_jnlk(key, fmt, ...) \ ++ ubifs_dbg_msg_key("jnl", key, fmt, ##__VA_ARGS__) + /* Additional TNC messages */ +-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) +- ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) ++#define dbg_tnck(key, fmt, ...) \ ++ ubifs_dbg_msg_key("tnc", key, fmt, ##__VA_ARGS__) + /* Additional lprops messages */ +-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) +- ++#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) + /* Additional LEB find messages */ +-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) +- ++#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) + /* Additional mount messages */ +-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) +- ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) ++#define dbg_mntk(key, fmt, ...) \ ++ ubifs_dbg_msg_key("mnt", key, fmt, ##__VA_ARGS__) + /* Additional I/O messages */ +-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) +- ++#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) + /* Additional commit messages */ +-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) +- ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) + /* Additional budgeting messages */ +-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) +- ++#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) + /* Additional log messages */ +-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) +- ++#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) + /* Additional gc messages */ +-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) +- ++#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) + /* Additional scan messages */ +-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) +- ++#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) + /* Additional recovery messages */ +-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) + +-/* +- * Debugging message type flags (must match msg_type_names in debug.c). +- * +- * UBIFS_MSG_GEN: general messages +- * UBIFS_MSG_JNL: journal messages +- * UBIFS_MSG_MNT: mount messages +- * UBIFS_MSG_CMT: commit messages +- * UBIFS_MSG_FIND: LEB find messages +- * UBIFS_MSG_BUDG: budgeting messages +- * UBIFS_MSG_GC: garbage collection messages +- * UBIFS_MSG_TNC: TNC messages +- * UBIFS_MSG_LP: lprops messages +- * UBIFS_MSG_IO: I/O messages +- * UBIFS_MSG_LOG: log messages +- * UBIFS_MSG_SCAN: scan messages +- * UBIFS_MSG_RCVRY: recovery messages +- */ +-enum { +- UBIFS_MSG_GEN = 0x1, +- UBIFS_MSG_JNL = 0x2, +- UBIFS_MSG_MNT = 0x4, +- UBIFS_MSG_CMT = 0x8, +- UBIFS_MSG_FIND = 0x10, +- UBIFS_MSG_BUDG = 0x20, +- UBIFS_MSG_GC = 0x40, +- UBIFS_MSG_TNC = 0x80, +- UBIFS_MSG_LP = 0x100, +- UBIFS_MSG_IO = 0x200, +- UBIFS_MSG_LOG = 0x400, +- UBIFS_MSG_SCAN = 0x800, +- UBIFS_MSG_RCVRY = 0x1000, +-}; +- +-/* Debugging message type flags for each default debug message level */ +-#define UBIFS_MSG_LVL_0 0 +-#define UBIFS_MSG_LVL_1 0x1 +-#define UBIFS_MSG_LVL_2 0x7f +-#define UBIFS_MSG_LVL_3 0xffff +- +-/* +- * Debugging check flags (must match chk_names in debug.c). +- * +- * UBIFS_CHK_GEN: general checks +- * UBIFS_CHK_TNC: check TNC +- * UBIFS_CHK_IDX_SZ: check index size +- * UBIFS_CHK_ORPH: check orphans +- * UBIFS_CHK_OLD_IDX: check the old index +- * UBIFS_CHK_LPROPS: check lprops +- * UBIFS_CHK_FS: check the file-system +- */ +-enum { +- UBIFS_CHK_GEN = 0x1, +- UBIFS_CHK_TNC = 0x2, +- UBIFS_CHK_IDX_SZ = 0x4, +- UBIFS_CHK_ORPH = 0x8, +- UBIFS_CHK_OLD_IDX = 0x10, +- UBIFS_CHK_LPROPS = 0x20, +- UBIFS_CHK_FS = 0x40, +-}; ++extern struct ubifs_global_debug_info ubifs_dbg; + +-/* +- * Special testing flags (must match tst_names in debug.c). +- * +- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method +- * UBIFS_TST_RCVRY: failure mode for recovery testing +- */ +-enum { +- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, +- UBIFS_TST_RCVRY = 0x4, +-}; +- +-#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +-#else +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +-#endif +- +-#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +-#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +-#else +-#define UBIFS_CHK_FLAGS_DEFAULT 0 +-#endif +- +-extern spinlock_t dbg_lock; +- +-extern unsigned int ubifs_msg_flags; +-extern unsigned int ubifs_chk_flags; +-extern unsigned int ubifs_tst_flags; ++static inline int dbg_is_chk_gen(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen); ++} ++static inline int dbg_is_chk_index(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_index || c->dbg->chk_index); ++} ++static inline int dbg_is_chk_orph(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph); ++} ++static inline int dbg_is_chk_lprops(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops); ++} ++static inline int dbg_is_chk_fs(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs); ++} ++static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) ++{ ++ return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry); ++} ++static inline int dbg_is_power_cut(const struct ubifs_info *c) ++{ ++ return !!c->dbg->pc_happened; ++} + + int ubifs_debugging_init(struct ubifs_info *c); + void ubifs_debugging_exit(struct ubifs_info *c); +@@ -280,17 +255,21 @@ const char *dbg_cstate(int cmt_state); + const char *dbg_jhead(int jhead); + const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); +-void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); ++const char *dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, int len); ++void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode); + void dbg_dump_node(const struct ubifs_info *c, const void *node); + void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, + int offs); + void dbg_dump_budget_req(const struct ubifs_budget_req *req); + void dbg_dump_lstats(const struct ubifs_lp_stats *lst); +-void dbg_dump_budg(struct ubifs_info *c); ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); + void dbg_dump_lprops(struct ubifs_info *c); + void dbg_dump_lpt_info(struct ubifs_info *c); + void dbg_dump_leb(const struct ubifs_info *c, int lnum); ++void dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs); + void dbg_dump_znode(const struct ubifs_info *c, + const struct ubifs_znode *znode); + void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat); +@@ -313,14 +292,13 @@ int dbg_check_cats(struct ubifs_info *c); + int dbg_check_ltab(struct ubifs_info *c); + int dbg_chk_lpt_free_spc(struct ubifs_info *c); + int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); +-int dbg_check_synced_i_size(struct inode *inode); +-int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); ++int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode); ++int dbg_check_dir(struct ubifs_info *c, const struct inode *dir); + int dbg_check_tnc(struct ubifs_info *c, int extra); + int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); + int dbg_check_filesystem(struct ubifs_info *c); + void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); +-int dbg_check_lprops(struct ubifs_info *c); + int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); + int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, +@@ -328,57 +306,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); + int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); + +-/* Force the use of in-the-gaps method for testing */ +- +-#define dbg_force_in_the_gaps_enabled \ +- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) +- +-int dbg_force_in_the_gaps(void); +- +-/* Failure mode for recovery testing */ +- +-#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) +- +-#ifndef UBIFS_DBG_PRESERVE_UBI +- +-#define ubi_leb_read dbg_leb_read +-#define ubi_leb_write dbg_leb_write +-#define ubi_leb_change dbg_leb_change +-#define ubi_leb_erase dbg_leb_erase +-#define ubi_leb_unmap dbg_leb_unmap +-#define ubi_is_mapped dbg_is_mapped +-#define ubi_leb_map dbg_leb_map +- +-#endif +- +-int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, +- int len, int check); +-int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int offset, int len, int dtype); +-int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, +- int len, int dtype); +-int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum); +-int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum); +-int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum); +-int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype); +- +-static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf, +- int offset, int len) +-{ +- return dbg_leb_read(desc, lnum, buf, offset, len, 0); +-} +- +-static inline int dbg_write(struct ubi_volume_desc *desc, int lnum, +- const void *buf, int offset, int len) +-{ +- return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN); +-} +- +-static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, +- const void *buf, int len) +-{ +- return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN); +-} ++int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype); ++int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype); ++int dbg_leb_unmap(struct ubifs_info *c, int lnum); ++int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype); + + /* Debugfs-related stuff */ + int dbg_debugfs_init(void); +@@ -390,116 +323,158 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); + + /* Use "if (0)" to make compiler check arguments even if debugging is off */ + #define ubifs_assert(expr) do { \ +- if (0 && (expr)) \ ++ if (0) \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ + } while (0) + +-#define dbg_err(fmt, ...) do { \ +- if (0) \ +- ubifs_err(fmt, ##__VA_ARGS__); \ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) + +-#define dbg_msg(fmt, ...) do { \ +- if (0) \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ +- current->pid, __func__, ##__VA_ARGS__); \ ++#define DBGKEY(key) ((char *)(key)) ++#define DBGKEY1(key) ((char *)(key)) ++ ++#define ubifs_dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ pr_debug(fmt "\n", ##__VA_ARGS__); \ + } while (0) + + #define dbg_dump_stack() + #define ubifs_assert_cmt_locked(c) + +-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +- +-#define DBGKEY(key) ((char *)(key)) +-#define DBGKEY1(key) ((char *)(key)) +- +-static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } +-static inline void ubifs_debugging_exit(struct ubifs_info *c) {} +-static inline const char *dbg_ntype(int type) { return ""; } +-static inline const char *dbg_cstate(int cmt_state) { return ""; } +-static inline const char *dbg_jhead(int jhead) { return ""; } +-static inline const char *dbg_get_key_dump(const struct ubifs_info *c, +- const union ubifs_key *key) { return ""; } +-static inline void dbg_dump_inode(const struct ubifs_info *c, +- const struct inode *inode) {} ++#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnlk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnck(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mntk(key, fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++ ++static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } ++static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } ++static inline const char *dbg_ntype(int type) { return ""; } ++static inline const char *dbg_cstate(int cmt_state) { return ""; } ++static inline const char *dbg_jhead(int jhead) { return ""; } ++static inline const char * ++dbg_get_key_dump(const struct ubifs_info *c, ++ const union ubifs_key *key) { return ""; } ++static inline const char * ++dbg_snprintf_key(const struct ubifs_info *c, ++ const union ubifs_key *key, char *buffer, ++ int len) { return ""; } ++static inline void dbg_dump_inode(struct ubifs_info *c, ++ const struct inode *inode) { return; } + static inline void dbg_dump_node(const struct ubifs_info *c, +- const void *node) {} ++ const void *node) { return; } + static inline void dbg_dump_lpt_node(const struct ubifs_info *c, +- void *node, int lnum, int offs) {} +-static inline void dbg_dump_budget_req(const struct ubifs_budget_req *req) {} +-static inline void dbg_dump_lstats(const struct ubifs_lp_stats *lst) {} +-static inline void dbg_dump_budg(struct ubifs_info *c) {} ++ void *node, int lnum, ++ int offs) { return; } ++static inline void ++dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } ++static inline void ++dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } ++static inline void ++dbg_dump_budg(struct ubifs_info *c, ++ const struct ubifs_budg_info *bi) { return; } + static inline void dbg_dump_lprop(const struct ubifs_info *c, +- const struct ubifs_lprops *lp) {} +-static inline void dbg_dump_lprops(struct ubifs_info *c) {} +-static inline void dbg_dump_lpt_info(struct ubifs_info *c) {} +-static inline void dbg_dump_leb(const struct ubifs_info *c, int lnum) {} +-static inline void dbg_dump_znode(const struct ubifs_info *c, +- const struct ubifs_znode *znode) {} ++ const struct ubifs_lprops *lp) { return; } ++static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } ++static inline void dbg_dump_leb(const struct ubifs_info *c, ++ int lnum) { return; } ++static inline void ++dbg_dump_sleb(const struct ubifs_info *c, ++ const struct ubifs_scan_leb *sleb, int offs) { return; } ++static inline void ++dbg_dump_znode(const struct ubifs_info *c, ++ const struct ubifs_znode *znode) { return; } + static inline void dbg_dump_heap(struct ubifs_info *c, +- struct ubifs_lpt_heap *heap, int cat) {} ++ struct ubifs_lpt_heap *heap, ++ int cat) { return; } + static inline void dbg_dump_pnode(struct ubifs_info *c, +- struct ubifs_pnode *pnode, struct ubifs_nnode *parent, int iip) {} +-static inline void dbg_dump_tnc(struct ubifs_info *c) {} +-static inline void dbg_dump_index(struct ubifs_info *c) {} +-static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) {} ++ struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, ++ int iip) { return; } ++static inline void dbg_dump_tnc(struct ubifs_info *c) { return; } ++static inline void dbg_dump_index(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; } + + static inline int dbg_walk_index(struct ubifs_info *c, +- dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv) +- { return 0; } +- +-/* Checking functions */ +-static inline void dbg_save_space_info(struct ubifs_info *c) {} +-static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } +-static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } +-static inline int dbg_old_index_check_init(struct ubifs_info *c, +- struct ubifs_zbranch *zroot) { return 0; } +-static inline int dbg_check_old_index(struct ubifs_info *c, +- struct ubifs_zbranch *zroot) { return 0; } +-static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } +-static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } +-static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } ++ dbg_leaf_callback leaf_cb, ++ dbg_znode_callback znode_cb, ++ void *priv) { return 0; } ++static inline void dbg_save_space_info(struct ubifs_info *c) { return; } ++static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } ++static inline int ++dbg_old_index_check_init(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int ++dbg_check_old_index(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } ++static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } + static inline int dbg_chk_lpt_sz(struct ubifs_info *c, +- int action, int len) { return 0; } +-static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } +-static inline int dbg_check_dir_size(struct ubifs_info *c, +- const struct inode *dir) { return 0; } +-static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } ++ int action, int len) { return 0; } ++static inline int ++dbg_check_synced_i_size(const struct ubifs_info *c, ++ struct inode *inode) { return 0; } ++static inline int dbg_check_dir(struct ubifs_info *c, ++ const struct inode *dir) { return 0; } ++static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } + static inline int dbg_check_idx_size(struct ubifs_info *c, +- long long idx_size) { return 0; } +-static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } ++ long long idx_size) { return 0; } ++static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } + static inline void dbg_check_heap(struct ubifs_info *c, +- struct ubifs_lpt_heap *heap, int cat, int add_pos) {} ++ struct ubifs_lpt_heap *heap, ++ int cat, int add_pos) { return; } + static inline int dbg_check_lpt_nodes(struct ubifs_info *c, +- struct ubifs_cnode *cnode, int row, int col) { return 0; } ++ struct ubifs_cnode *cnode, int row, int col) { return 0; } + static inline int dbg_check_inode_size(struct ubifs_info *c, +- const struct inode *inode, loff_t size) { return 0; } +-static inline int dbg_check_data_nodes_order(struct ubifs_info *c, +- struct list_head *head) { return 0; } +-static inline int dbg_check_nondata_nodes_order(struct ubifs_info *c, +- struct list_head *head) { return 0; } +- +-#define dbg_force_in_the_gaps_enabled 0 +-static inline int dbg_force_in_the_gaps(void) { return 0; } +-#define dbg_failure_mode 0 +- +-static inline int dbg_debugfs_init(void) { return 0; } +-static inline void dbg_debugfs_exit(void) {} +-static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } +-static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } ++ const struct inode *inode, ++ loff_t size) { return 0; } ++static inline int ++dbg_check_data_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++static inline int ++dbg_check_nondata_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++ ++static inline int dbg_leb_write(struct ubifs_info *c, int lnum, ++ const void *buf, int offset, ++ int len, int dtype) { return 0; } ++static inline int dbg_leb_change(struct ubifs_info *c, int lnum, ++ const void *buf, int len, ++ int dtype) { return 0; } ++static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; } ++static inline int dbg_leb_map(struct ubifs_info *c, int lnum, ++ int dtype) { return 0; } ++ ++static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; } ++static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; } ++ ++static inline int dbg_debugfs_init(void) { return 0; } ++static inline void dbg_debugfs_exit(void) { return; } ++static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } ++static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } + + #endif /* !CONFIG_UBIFS_FS_DEBUG */ + #endif /* !__UBIFS_DEBUG_H__ */ +diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c +index 14f64b6..9c5e3c5 100644 +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + * UBIFS has to fully control "clean <-> dirty" transitions of inodes + * to make budgeting work. + */ +- inode->i_flags |= (S_NOCMTIME); ++ inode->i_flags |= S_NOCMTIME; + + inode_init_owner(inode, dir, mode); + inode->i_mtime = inode->i_atime = inode->i_ctime = +@@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, + + #ifdef CONFIG_UBIFS_FS_DEBUG + +-static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) ++static int dbg_check_name(const struct ubifs_info *c, ++ const struct ubifs_dent_node *dent, ++ const struct qstr *nm) + { +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + if (le16_to_cpu(dent->nlen) != nm->len) + return -EINVAL; +@@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) + + #else + +-#define dbg_check_name(dent, nm) 0 ++#define dbg_check_name(c, dent, nm) 0 + + #endif + +@@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, + goto out; + } + +- if (dbg_check_name(dent, &dentry->d_name)) { ++ if (dbg_check_name(c, dent, &dentry->d_name)) { + err = -EINVAL; + goto out; + } +@@ -540,7 +542,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + if (inode->i_nlink == 0) + return -ENOENT; + +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -595,7 +597,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) + inode->i_nlink, dir->i_ino); + ubifs_assert(mutex_is_locked(&dir->i_mutex)); + ubifs_assert(mutex_is_locked(&inode->i_mutex)); +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -621,7 +623,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +@@ -711,7 +713,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c +index d77db7e..7cf738a 100644 +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c) + */ + static void release_existing_page_budget(struct ubifs_info *c) + { +- struct ubifs_budget_req req = { .dd_growth = c->page_budget}; ++ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; + + ubifs_release_budget(c, &req); + } +@@ -448,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { + /* + * We change whole page so no need to load it. But we +- * have to set the @PG_checked flag to make the further +- * code know that the page is new. This might be not +- * true, but it is better to budget more than to read +- * the page from the media. ++ * do not know whether this page exists on the media or ++ * not, so we assume the latter because it requires ++ * larger budget. The assumption is that it is better ++ * to budget a bit more than to read the page from the ++ * media. Thus, we are setting the @PG_checked flag ++ * here. + */ + SetPageChecked(page); + skipped_read = 1; +@@ -559,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, + dbg_gen("copied %d instead of %d, read page and repeat", + copied, len); + cancel_budget(c, page, ui, appending); ++ ClearPageChecked(page); + + /* + * Return 0 to force VFS to repeat the whole operation, or the +@@ -968,11 +971,11 @@ static int do_writepage(struct page *page, int len) + * the page locked, and it locks @ui_mutex. However, write-back does take inode + * @i_mutex, which means other VFS operations may be run on this inode at the + * same time. And the problematic one is truncation to smaller size, from where +- * we have to call 'truncate_setsize()', which first changes @inode->i_size, then +- * drops the truncated pages. And while dropping the pages, it takes the page +- * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with +- * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This +- * means that @inode->i_size is changed while @ui_mutex is unlocked. ++ * we have to call 'truncate_setsize()', which first changes @inode->i_size, ++ * then drops the truncated pages. And while dropping the pages, it takes the ++ * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' ++ * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. ++ * This means that @inode->i_size is changed while @ui_mutex is unlocked. + * + * XXX(truncate): with the new truncate sequence this is not true anymore, + * and the calls to truncate_setsize can be move around freely. They should +@@ -1186,7 +1189,7 @@ out_budg: + if (budgeted) + ubifs_release_budget(c, &req); + else { +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return err; +@@ -1260,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) + if (err) + return err; + +- err = dbg_check_synced_i_size(inode); ++ err = dbg_check_synced_i_size(c, inode); + if (err) + return err; + +@@ -1309,6 +1312,13 @@ int ubifs_fsync(struct file *file, int datasync) + + dbg_gen("syncing inode %lu", inode->i_ino); + ++ if (c->ro_mount) ++ /* ++ * For some really strange reasons VFS does not filter out ++ * 'fsync()' for R/O mounted file-systems as per 2.6.39. ++ */ ++ return 0; ++ + /* + * VFS has already synchronized dirty pages for this inode. Synchronize + * the inode unless this is a 'datasync()' call. +@@ -1426,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) + } + + /* +- * mmap()d file has taken write protection fault and is being made +- * writable. UBIFS must ensure page is budgeted for. ++ * mmap()d file has taken write protection fault and is being made writable. ++ * UBIFS must ensure page is budgeted for. + */ +-static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ++static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, ++ struct vm_fault *vmf) + { + struct page *page = vmf->page; + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; +@@ -1530,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) + { + int err; + +- /* 'generic_file_mmap()' takes care of NOMMU case */ + err = generic_file_mmap(file, vma); + if (err) + return err; +diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c +index 1d54383..2559d17 100644 +--- a/fs/ubifs/find.c ++++ b/fs/ubifs/find.c +@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + * But if the index takes fewer LEBs than it is reserved for it, + * this function must avoid picking those reserved LEBs. + */ +- if (c->min_idx_lebs >= c->lst.idx_lebs) { +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + exclude_index = 1; + } + spin_unlock(&c->space_lock); +@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, + pick_free = 0; + } else { + spin_lock(&c->space_lock); +- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); ++ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); + spin_unlock(&c->space_lock); + } + +@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, + + /* Check if there are enough empty LEBs for commit */ + spin_lock(&c->space_lock); +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c +index 151f108..ded29f6 100644 +--- a/fs/ubifs/gc.c ++++ b/fs/ubifs/gc.c +@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c) + if (err) + return err; + ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ return err; ++ + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); + if (err) + return err; +@@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c) + * This function compares data nodes @a and @b. Returns %1 if @a has greater + * inode or block number, and %-1 otherwise. + */ +-int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) + { + ino_t inuma, inumb; + struct ubifs_info *c = priv; +@@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) + * first and sorted by length in descending order. Directory entry nodes go + * after inode nodes and are sorted in ascending hash valuer order. + */ +-int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) ++static int nondata_nodes_cmp(void *priv, struct list_head *a, ++ struct list_head *b) + { + ino_t inuma, inumb; + struct ubifs_info *c = priv; +@@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) + ubifs_assert(c->gc_lnum != lnum); + ubifs_assert(wbuf->lnum != lnum); + ++ if (lp->free + lp->dirty == c->leb_size) { ++ /* Special case - a free LEB */ ++ dbg_gc("LEB %d is free, return it", lp->lnum); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ ++ if (lp->free != c->leb_size) { ++ /* ++ * Write buffers must be sync'd before unmapping ++ * freeable LEBs, because one of them may contain data ++ * which obsoletes something in 'lp->pnum'. ++ */ ++ err = gc_sync_wbufs(c); ++ if (err) ++ return err; ++ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, ++ 0, 0, 0, 0); ++ if (err) ++ return err; ++ } ++ err = ubifs_leb_unmap(c, lp->lnum); ++ if (err) ++ return err; ++ ++ if (c->gc_lnum == -1) { ++ c->gc_lnum = lnum; ++ return LEB_RETAINED; ++ } ++ ++ return LEB_FREED; ++ } ++ + /* + * We scan the entire LEB even though we only really need to scan up to + * (c->leb_size - lp->free). +@@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) + "(min. space %d)", lp.lnum, lp.free, lp.dirty, + lp.free + lp.dirty, min_space); + +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- dbg_gc("LEB %d is free, return it", lp.lnum); +- /* +- * ubifs_find_dirty_leb() doesn't return freeable index +- * LEBs. +- */ +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- if (lp.free != c->leb_size) { +- /* +- * Write buffers must be sync'd before +- * unmapping freeable LEBs, because one of them +- * may contain data which obsoletes something +- * in 'lp.pnum'. +- */ +- ret = gc_sync_wbufs(c); +- if (ret) +- goto out; +- ret = ubifs_change_one_lp(c, lp.lnum, +- c->leb_size, 0, 0, 0, +- 0); +- if (ret) +- goto out; +- } +- ret = ubifs_leb_unmap(c, lp.lnum); +- if (ret) +- goto out; +- ret = lp.lnum; +- break; +- } +- + space_before = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum == -1) + space_before = 0; +diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c +index d821731..9228950 100644 +--- a/fs/ubifs/io.c ++++ b/fs/ubifs/io.c +@@ -31,6 +31,26 @@ + * buffer is full or when it is not used for some time (by timer). This is + * similar to the mechanism is used by JFFS2. + * ++ * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum ++ * write size (@c->max_write_size). The latter is the maximum amount of bytes ++ * the underlying flash is able to program at a time, and writing in ++ * @c->max_write_size units should presumably be faster. Obviously, ++ * @c->min_io_size <= @c->max_write_size. Write-buffers are of ++ * @c->max_write_size bytes in size for maximum performance. However, when a ++ * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size ++ * boundary) which contains data is written, not the whole write-buffer, ++ * because this is more space-efficient. ++ * ++ * This optimization adds few complications to the code. Indeed, on the one ++ * hand, we want to write in optimal @c->max_write_size bytes chunks, which ++ * also means aligning writes at the @c->max_write_size bytes offsets. On the ++ * other hand, we do not want to waste space when synchronizing the write ++ * buffer, so during synchronization we writes in smaller chunks. And this makes ++ * the next write offset to be not aligned to @c->max_write_size bytes. So the ++ * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned ++ * to @c->max_write_size bytes again. We do this by temporarily shrinking ++ * write-buffer size (@wbuf->size). ++ * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many +@@ -46,8 +66,8 @@ + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * +- * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes +- * every time they are read from the flash media. ++ * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when ++ * they are read from the flash media. + */ + + #include <linux/crc32.h> +@@ -66,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) + c->no_chk_data_crc = 0; + c->vfs_sb->s_flags |= MS_RDONLY; + ubifs_warn("switched to read-only mode, error %d", err); ++ dump_stack(); ++ } ++} ++ ++/* ++ * Below are simple wrappers over UBI I/O functions which include some ++ * additional checks and UBIFS debugging stuff. See corresponding UBI function ++ * for more information. ++ */ ++ ++int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, ++ int len, int even_ebadmsg) ++{ ++ int err; ++ ++ err = ubi_read(c->ubi, lnum, buf, offs, len); ++ /* ++ * In case of %-EBADMSG print the error message only if the ++ * @even_ebadmsg is true. ++ */ ++ if (err && (err != -EBADMSG || even_ebadmsg)) { ++ ubifs_err("reading %d bytes from LEB %d:%d failed, error %d", ++ len, lnum, offs, err); + dbg_dump_stack(); + } ++ return err; ++} ++ ++int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); ++ else ++ err = dbg_leb_write(c, lnum, buf, offs, len, dtype); ++ if (err) { ++ ubifs_err("writing %d bytes to LEB %d:%d failed, error %d", ++ len, lnum, offs, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); ++ else ++ err = dbg_leb_change(c, lnum, buf, len, dtype); ++ if (err) { ++ ubifs_err("changing %d bytes in LEB %d failed, error %d", ++ len, lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_unmap(struct ubifs_info *c, int lnum) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_unmap(c->ubi, lnum); ++ else ++ err = dbg_leb_unmap(c, lnum); ++ if (err) { ++ ubifs_err("unmap LEB %d failed, error %d", lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype) ++{ ++ int err; ++ ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) ++ return -EROFS; ++ if (!dbg_is_tst_rcvry(c)) ++ err = ubi_leb_map(c->ubi, lnum, dtype); ++ else ++ err = dbg_leb_map(c, lnum, dtype); ++ if (err) { ++ ubifs_err("mapping LEB %d failed, error %d", lnum, err); ++ ubifs_ro_mode(c, err); ++ dbg_dump_stack(); ++ } ++ return err; ++} ++ ++int ubifs_is_mapped(const struct ubifs_info *c, int lnum) ++{ ++ int err; ++ ++ err = ubi_is_mapped(c->ubi, lnum); ++ if (err < 0) { ++ ubifs_err("ubi_is_mapped failed for LEB %d, error %d", ++ lnum, err); ++ dbg_dump_stack(); ++ } ++ return err; + } + + /** +@@ -88,8 +225,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is +- * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is +- * ignored and CRC is checked. ++ * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are ++ * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC ++ * is checked. This is because during mounting or re-mounting from R/O mode to ++ * R/W mode we may read journal nodes (when replying the journal or doing the ++ * recovery) and the journal nodes may potentially be corrupted, so checking is ++ * required. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. +@@ -131,8 +272,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, + node_len > c->ranges[type].max_len) + goto out_len; + +- if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && +- c->no_chk_data_crc) ++ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && ++ !c->remounting_rw && c->no_chk_data_crc) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -343,11 +484,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. ++ * ++ * Note, although write-buffers are of @c->max_write_size, this function does ++ * not necessarily writes all @c->max_write_size bytes to the flash. Instead, ++ * if the write-buffer is only partially filled with data, only the used part ++ * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. ++ * This way we waste less space. + */ + int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + { + struct ubifs_info *c = wbuf->c; +- int err, dirt; ++ int err, dirt, sync_len; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) +@@ -357,27 +504,49 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + dbg_io("LEB %d:%d, %d bytes, jhead %s", + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); + ubifs_assert(!(wbuf->avail & 7)); +- ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); ++ ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->ro_error) + return -EROFS; + +- ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); +- if (err) { +- ubifs_err("cannot write %d bytes to LEB %d:%d", +- c->min_io_size, wbuf->lnum, wbuf->offs); +- dbg_dump_stack(); ++ /* ++ * Do not write whole write buffer but write only the minimum necessary ++ * amount of min. I/O units. ++ */ ++ sync_len = ALIGN(wbuf->used, c->min_io_size); ++ dirt = sync_len - wbuf->used; ++ if (dirt) ++ ubifs_pad(c, wbuf->buf + wbuf->used, dirt); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len, ++ wbuf->dtype); ++ if (err) + return err; +- } +- +- dirt = wbuf->avail; + + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += sync_len; ++ /* ++ * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. ++ * But our goal is to optimize writes and make sure we write in ++ * @c->max_write_size chunks and to @c->max_write_size-aligned offset. ++ * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make ++ * sure that @wbuf->offs + @wbuf->size is aligned to ++ * @c->max_write_size. This way we make sure that after next ++ * write-buffer flush we are again at the optimal offset (aligned to ++ * @c->max_write_size). ++ */ ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -396,8 +565,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + * @dtype: data type + * + * This function targets the write-buffer to logical eraseblock @lnum:@offs. +- * The write-buffer is synchronized if it is not empty. Returns zero in case of +- * success and a negative error code in case of failure. ++ * The write-buffer has to be empty. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype) +@@ -409,18 +578,18 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); +- +- if (wbuf->used > 0) { +- int err = ubifs_wbuf_sync_nolock(wbuf); +- +- if (err) +- return err; +- } ++ ubifs_assert(wbuf->used == 0); + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; +- wbuf->avail = c->min_io_size; ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + wbuf->dtype = dtype; +@@ -500,8 +669,9 @@ out_timers: + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it +- * does not take whole minimal I/O unit. Instead, the node will sit in RAM +- * until the write-buffer is synchronized (e.g., by timer). ++ * does not take whole max. write unit (@c->max_write_size). Instead, the node ++ * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or ++ * because more data are appended to the write-buffer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more +@@ -510,7 +680,7 @@ out_timers: + int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + { + struct ubifs_info *c = wbuf->c; +- int err, written, n, aligned_len = ALIGN(len, 8), offs; ++ int err, written, n, aligned_len = ALIGN(len, 8); + + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), +@@ -518,9 +688,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); +- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ++ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; +@@ -542,15 +718,19 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + if (aligned_len == wbuf->avail) { + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, +- wbuf->offs, c->min_io_size, +- wbuf->dtype); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, ++ wbuf->offs, wbuf->size, ++ wbuf->dtype); + if (err) + goto out; + + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += wbuf->size; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -564,39 +744,63 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + goto exit; + } + +- /* +- * The node is large enough and does not fit entirely within current +- * minimal I/O unit. We have to fill and flush write-buffer and switch +- * to the next min. I/O unit. +- */ +- dbg_io("flush jhead %s wbuf to LEB %d:%d", +- dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); +- memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); +- if (err) +- goto out; ++ written = 0; ++ ++ if (wbuf->used) { ++ /* ++ * The node is large enough and does not fit entirely within ++ * current available space. We have to fill and flush ++ * write-buffer and switch to the next max. write unit. ++ */ ++ dbg_io("flush jhead %s wbuf to LEB %d:%d", ++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); ++ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; ++ ++ wbuf->offs += wbuf->size; ++ len -= wbuf->avail; ++ aligned_len -= wbuf->avail; ++ written += wbuf->avail; ++ } else if (wbuf->offs & (c->max_write_size - 1)) { ++ /* ++ * The write-buffer offset is not aligned to ++ * @c->max_write_size and @wbuf->size is less than ++ * @c->max_write_size. Write @wbuf->size bytes to make sure the ++ * following writes are done in optimal @c->max_write_size ++ * chunks. ++ */ ++ dbg_io("write %d bytes to LEB %d:%d", ++ wbuf->size, wbuf->lnum, wbuf->offs); ++ err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; + +- offs = wbuf->offs + c->min_io_size; +- len -= wbuf->avail; +- aligned_len -= wbuf->avail; +- written = wbuf->avail; ++ wbuf->offs += wbuf->size; ++ len -= wbuf->size; ++ aligned_len -= wbuf->size; ++ written += wbuf->size; ++ } + + /* +- * The remaining data may take more whole min. I/O units, so write the +- * remains multiple to min. I/O unit size directly to the flash media. ++ * The remaining data may take more whole max. write units, so write the ++ * remains multiple to max. write unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ +- n = aligned_len >> c->min_io_shift; ++ n = aligned_len >> c->max_write_shift; + if (n) { +- n <<= c->min_io_shift; +- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); +- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, +- wbuf->dtype); ++ n <<= c->max_write_shift; ++ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, ++ wbuf->offs); ++ err = ubifs_leb_write(c, wbuf->lnum, buf + written, ++ wbuf->offs, n, wbuf->dtype); + if (err) + goto out; +- offs += n; ++ wbuf->offs += n; + aligned_len -= n; + len -= n; + written += n; +@@ -606,14 +810,17 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + if (aligned_len) + /* + * And now we have what's left and what does not take whole +- * min. I/O unit, so write it to the write-buffer and we are ++ * max. write unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + +- wbuf->offs = offs; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size - aligned_len; + wbuf->used = aligned_len; +- wbuf->avail = c->min_io_size - aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +@@ -666,18 +873,15 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); + + if (c->ro_error) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); +- err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); +- if (err) { +- ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", +- buf_len, lnum, offs, err); ++ err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype); ++ if (err) + dbg_dump_node(c, buf); +- dbg_dump_stack(); +- } + + return err; + } +@@ -729,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, + + if (rlen > 0) { + /* Read everything that goes before write-buffer */ +- err = ubi_read(c->ubi, lnum, buf, offs, rlen); +- if (err && err != -EBADMSG) { +- ubifs_err("failed to read node %d from LEB %d:%d, " +- "error %d", type, lnum, offs, err); +- dbg_dump_stack(); ++ err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0); ++ if (err && err != -EBADMSG) + return err; +- } + } + + if (type != ch->node_type) { +@@ -790,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); + +- err = ubi_read(c->ubi, lnum, buf, offs, len); +- if (err && err != -EBADMSG) { +- ubifs_err("cannot read node %d from LEB %d:%d, error %d", +- type, lnum, offs, err); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 0); ++ if (err && err != -EBADMSG) + return err; +- } + + if (type != ch->node_type) { + ubifs_err("bad node type (%d but expected %d)", +@@ -837,11 +1034,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) + { + size_t size; + +- wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); ++ wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + +- size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); ++ size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); +@@ -851,7 +1048,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; +- wbuf->avail = c->min_io_size; ++ /* ++ * If the LEB starts at the max. write size aligned address, then ++ * write-buffer size has to be set to @c->max_write_size. Otherwise, ++ * set it to something smaller so that it ends at the closest max. ++ * write size boundary. ++ */ ++ size = c->max_write_size - (c->leb_start % c->max_write_size); ++ wbuf->avail = wbuf->size = size; + wbuf->dtype = UBI_UNKNOWN; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); +diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c +index 914f1bd..2f438ab 100644 +--- a/fs/ubifs/journal.c ++++ b/fs/ubifs/journal.c +@@ -141,14 +141,8 @@ again: + * LEB with some empty space. + */ + lnum = ubifs_find_free_space(c, len, &offs, squeeze); +- if (lnum >= 0) { +- /* Found an LEB, add it to the journal head */ +- err = ubifs_add_bud_to_log(c, jhead, lnum, offs); +- if (err) +- goto out_return; +- /* A new bud was successfully allocated and added to the log */ ++ if (lnum >= 0) + goto out; +- } + + err = lnum; + if (err != -ENOSPC) +@@ -203,12 +197,23 @@ again: + return 0; + } + +- err = ubifs_add_bud_to_log(c, jhead, lnum, 0); +- if (err) +- goto out_return; + offs = 0; + + out: ++ /* ++ * Make sure we synchronize the write-buffer before we add the new bud ++ * to the log. Otherwise we may have a power cut after the log ++ * reference node for the last bud (@lnum) is written but before the ++ * write-buffer data are written to the next-to-last bud ++ * (@wbuf->lnum). And the effect would be that the recovery would see ++ * that there is corruption in the next-to-last bud. ++ */ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ goto out_return; ++ err = ubifs_add_bud_to_log(c, jhead, lnum, offs); ++ if (err) ++ goto out_return; + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); + if (err) + goto out_unlock; +@@ -380,10 +385,8 @@ out: + if (err == -ENOSPC) { + /* This are some budgeting problems, print useful information */ + down_write(&c->commit_sem); +- spin_lock(&c->space_lock); + dbg_dump_stack(); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + cmt_retries = dbg_check_lprops(c); + up_write(&c->commit_sem); +@@ -666,6 +669,7 @@ out_free: + + out_release: + release_head(c, BASEHD); ++ kfree(dent); + out_ro: + ubifs_ro_mode(c, err); + if (last_reference) +@@ -690,17 +694,26 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + { + struct ubifs_data_node *data; + int err, lnum, offs, compr_type, out_len; +- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; ++ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; + struct ubifs_inode *ui = ubifs_inode(inode); + +- dbg_jnl("ino %lu, blk %u, len %d, key %s", +- (unsigned long)key_inum(c, key), key_block(c, key), len, +- DBGKEY(key)); ++ dbg_jnlk(key, "ino %lu, blk %u, len %d, key ", ++ (unsigned long)key_inum(c, key), key_block(c, key), len); + ubifs_assert(len <= UBIFS_BLOCK_SIZE); + +- data = kmalloc(dlen, GFP_NOFS); +- if (!data) +- return -ENOMEM; ++ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); ++ if (!data) { ++ /* ++ * Fall-back to the write reserve buffer. Note, we might be ++ * currently on the memory reclaim path, when the kernel is ++ * trying to free some memory by writing out dirty pages. The ++ * write reserve buffer helps us to guarantee that we are ++ * always able to write the data. ++ */ ++ allocated = 0; ++ mutex_lock(&c->write_reserve_mutex); ++ data = c->write_reserve_buf; ++ } + + data->ch.node_type = UBIFS_DATA_NODE; + key_write(c, key, &data->key); +@@ -736,7 +749,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, + goto out_ro; + + finish_reservation(c); +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return 0; + + out_release: +@@ -745,7 +761,10 @@ out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); + out_free: +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return err; + } + +@@ -1157,7 +1176,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, + dn = (void *)trun + UBIFS_TRUN_NODE_SZ; + blk = new_size >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &key, inum, blk); +- dbg_jnl("last block key %s", DBGKEY(&key)); ++ dbg_jnlk(&key, "last block key "); + err = ubifs_tnc_lookup(c, &key, dn); + if (err == -ENOENT) + dlen = 0; /* Not found (so it is a hole) */ +diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c +index 4d0cb12..f9fd068 100644 +--- a/fs/ubifs/log.c ++++ b/fs/ubifs/log.c +@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) + } + + /** +- * next_log_lnum - switch to the next log LEB. +- * @c: UBIFS file-system description object +- * @lnum: current log LEB +- */ +-static inline int next_log_lnum(const struct ubifs_info *c, int lnum) +-{ +- lnum += 1; +- if (lnum > c->log_last) +- lnum = UBIFS_LOG_LNUM; +- +- return lnum; +-} +- +-/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) + } + + /** +- * ubifs_create_buds_lists - create journal head buds lists for remount rw. +- * @c: UBIFS file-system description object +- */ +-void ubifs_create_buds_lists(struct ubifs_info *c) +-{ +- struct rb_node *p; +- +- spin_lock(&c->buds_lock); +- p = rb_first(&c->buds); +- while (p) { +- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); +- struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; +- +- list_add_tail(&bud->list, &jhead->buds_list); +- p = rb_next(p); +- } +- spin_unlock(&c->buds_lock); +-} +- +-/** + * ubifs_add_bud_to_log - add a new bud to the log. + * @c: UBIFS file-system description object + * @jhead: journal head the bud belongs to +@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -296,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + * an unclean reboot, because the target LEB might have been + * unmapped, but not yet physically erased. + */ +- err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); ++ err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM); + if (err) + goto out_unlock; + } +@@ -317,8 +283,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) + return 0; + + out_unlock: +- if (err != -EAGAIN) +- ubifs_ro_mode(c, err); + mutex_unlock(&c->log_mutex); + kfree(ref); + kfree(bud); +@@ -445,7 +409,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) + + /* Switch to the next log LEB */ + if (c->lhead_offs) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -466,7 +430,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -553,7 +517,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; +- lnum = next_log_lnum(c, lnum)) { ++ lnum = ubifs_next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) +@@ -662,7 +626,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, + err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + if (err) + return err; +- *lnum = next_log_lnum(c, *lnum); ++ *lnum = ubifs_next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); +@@ -732,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); +@@ -752,7 +716,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; +@@ -786,7 +750,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c) + struct ubifs_bud *bud; + long long bud_bytes = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + spin_lock(&c->buds_lock); +diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c +index 4d4ca38..f8a181e 100644 +--- a/fs/ubifs/lprops.c ++++ b/fs/ubifs/lprops.c +@@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) + pnode = (struct ubifs_pnode *)container_of(lprops - pos, + struct ubifs_pnode, + lprops[0]); +- return !test_bit(COW_ZNODE, &pnode->flags) && ++ return !test_bit(COW_CNODE, &pnode->flags) && + test_bit(DIRTY_CNODE, &pnode->flags); + } + +@@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c) + struct list_head *pos; + int i, cat; + +- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return 0; + + list_for_each_entry(lprops, &c->empty_list, list) { +@@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + { + int i = 0, j, err = 0; + +- if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) ++ if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c)) + return; + + for (i = 0; i < heap->cnt; i++) { +@@ -1007,21 +1007,11 @@ out: + } + + /** +- * struct scan_check_data - data provided to scan callback function. +- * @lst: LEB properties statistics +- * @err: error code +- */ +-struct scan_check_data { +- struct ubifs_lp_stats lst; +- int err; +-}; +- +-/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory +- * @data: information passed to and from the caller of the scan ++ * @lst: lprops statistics to update + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree +@@ -1030,12 +1020,12 @@ struct scan_check_data { + */ + static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, +- struct scan_check_data *data) ++ struct ubifs_lp_stats *lst) + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_lp_stats *lst = &data->lst; +- int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; ++ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; ++ void *buf = NULL; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { +@@ -1043,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c, + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); +- goto out; ++ return -EINVAL; + } + } + +@@ -1077,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c, + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + } +@@ -1089,36 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c, + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); +- if (IS_ERR(sleb)) { +- /* +- * After an unclean unmount, empty and freeable LEBs +- * may contain garbage. +- */ +- if (lp->free == c->leb_size) { +- ubifs_err("scan errors were in empty LEB " +- "- continuing checking"); +- lst->empty_lebs += 1; +- lst->total_free += c->leb_size; +- lst->total_dark += ubifs_calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; +- } ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) ++ return -ENOMEM; + +- if (lp->free + lp->dirty == c->leb_size && +- !(lp->flags & LPROPS_INDEX)) { +- ubifs_err("scan errors were in freeable LEB " +- "- continuing checking"); +- lst->total_free += lp->free; +- lst->total_dirty += lp->dirty; +- lst->total_dark += ubifs_calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; ++ /* ++ * After an unclean unmount, empty and freeable LEBs ++ * may contain garbage - do not scan them. ++ */ ++ if (lp->free == c->leb_size) { ++ lst->empty_lebs += 1; ++ lst->total_free += c->leb_size; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ if (lp->free + lp->dirty == c->leb_size && ++ !(lp->flags & LPROPS_INDEX)) { ++ lst->total_free += lp->free; ++ lst->total_dirty += lp->dirty; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); ++ if (IS_ERR(sleb)) { ++ ret = PTR_ERR(sleb); ++ if (ret == -EUCLEAN) { ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); + } +- data->err = PTR_ERR(sleb); +- return LPT_SCAN_STOP; ++ goto out; + } + + is_idx = -1; +@@ -1236,6 +1230,7 @@ static int scan_check_cb(struct ubifs_info *c, + } + + ubifs_scan_destroy(sleb); ++ vfree(buf); + return LPT_SCAN_CONTINUE; + + out_print: +@@ -1245,9 +1240,10 @@ out_print: + dbg_dump_leb(c, lnum); + out_destroy: + ubifs_scan_destroy(sleb); ++ ret = -EINVAL; + out: +- data->err = -EINVAL; +- return LPT_SCAN_STOP; ++ vfree(buf); ++ return ret; + } + + /** +@@ -1264,10 +1260,9 @@ out: + int dbg_check_lprops(struct ubifs_info *c) + { + int i, err; +- struct scan_check_data data; +- struct ubifs_lp_stats *lst = &data.lst; ++ struct ubifs_lp_stats lst; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + /* +@@ -1280,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c) + return err; + } + +- memset(lst, 0, sizeof(struct ubifs_lp_stats)); +- +- data.err = 0; ++ memset(&lst, 0, sizeof(struct ubifs_lp_stats)); + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, +- &data); ++ &lst); + if (err && err != -ENOSPC) + goto out; +- if (data.err) { +- err = data.err; +- goto out; +- } + +- if (lst->empty_lebs != c->lst.empty_lebs || +- lst->idx_lebs != c->lst.idx_lebs || +- lst->total_free != c->lst.total_free || +- lst->total_dirty != c->lst.total_dirty || +- lst->total_used != c->lst.total_used) { ++ if (lst.empty_lebs != c->lst.empty_lebs || ++ lst.idx_lebs != c->lst.idx_lebs || ++ lst.total_free != c->lst.total_free || ++ lst.total_dirty != c->lst.total_dirty || ++ lst.total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", +- lst->empty_lebs, lst->idx_lebs, lst->total_free, +- lst->total_dirty, lst->total_used); ++ lst.empty_lebs, lst.idx_lebs, lst.total_free, ++ lst.total_dirty, lst.total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, +@@ -1311,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c) + goto out; + } + +- if (lst->total_dead != c->lst.total_dead || +- lst->total_dark != c->lst.total_dark) { ++ if (lst.total_dead != c->lst.total_dead || ++ lst.total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", +- lst->total_dead, lst->total_dark); ++ lst.total_dead, lst.total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; +diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c +index 72775d3..66d59d0 100644 +--- a/fs/ubifs/lpt.c ++++ b/fs/ubifs/lpt.c +@@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + set_ltab(c, lnum, c->leb_size - alen, + alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, +- UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, ++ UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + alen = ALIGN(len, c->min_io_size); + set_ltab(c, lnum, c->leb_size - alen, alen - len); + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + p = buf; +@@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, + + /* Write remaining buffer */ + memset(p, 0xff, alen - len); +- err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM); + if (err) + goto out; + +@@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { +- err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); ++ err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1); + if (err) + goto out; + err = ubifs_unpack_nnode(c, buf, nnode); +@@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + + out: + ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); ++ dbg_dump_stack(); + kfree(nnode); + return err; + } +@@ -1270,10 +1271,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); +- if (!pnode) { +- err = -ENOMEM; +- goto out; +- } ++ if (!pnode) ++ return -ENOMEM; ++ + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB +@@ -1291,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + lprops->flags = ubifs_categorize_lprops(c, lprops); + } + } else { +- err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); ++ err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1); + if (err) + goto out; + err = unpack_pnode(c, buf, pnode); +@@ -1313,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip) + out: + ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); + dbg_dump_pnode(c, pnode, parent, iip); ++ dbg_dump_stack(); + dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); + kfree(pnode); + return err; +@@ -1332,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c) + buf = vmalloc(c->ltab_sz); + if (!buf) + return -ENOMEM; +- err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); ++ err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1); + if (err) + goto out; + err = unpack_ltab(c, buf); +@@ -1355,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c) + buf = vmalloc(c->lsave_sz); + if (!buf) + return -ENOMEM; +- err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); ++ err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs, ++ c->lsave_sz, 1); + if (err) + goto out; + err = unpack_lsave(c, buf); +@@ -1815,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c, + if (c->big_lpt) + nnode->num = calc_nnode_num_from_parent(c, parent, iip); + } else { +- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, +- c->nnode_sz); ++ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, ++ c->nnode_sz, 1); + if (err) + return ERR_PTR(err); + err = ubifs_unpack_nnode(c, buf, nnode); +@@ -1884,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c, + ubifs_assert(branch->lnum >= c->lpt_first && + branch->lnum <= c->lpt_last); + ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); +- err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, +- c->pnode_sz); ++ err = ubifs_leb_read(c, branch->lnum, buf, branch->offs, ++ c->pnode_sz, 1); + if (err) + return ERR_PTR(err); + err = unpack_pnode(c, buf, pnode); +@@ -1984,12 +1986,11 @@ again: + + if (path[h].in_tree) + continue; +- nnode = kmalloc(sz, GFP_NOFS); ++ nnode = kmemdup(&path[h].nnode, sz, GFP_NOFS); + if (!nnode) { + err = -ENOMEM; + goto out; + } +- memcpy(nnode, &path[h].nnode, sz); + parent = nnode->parent; + parent->nbranch[nnode->iip].nnode = nnode; + path[h].ptr.nnode = nnode; +@@ -2002,12 +2003,11 @@ again: + const size_t sz = sizeof(struct ubifs_pnode); + struct ubifs_nnode *parent; + +- pnode = kmalloc(sz, GFP_NOFS); ++ pnode = kmemdup(&path[h].pnode, sz, GFP_NOFS); + if (!pnode) { + err = -ENOMEM; + goto out; + } +- memcpy(pnode, &path[h].pnode, sz); + parent = pnode->parent; + parent->nbranch[pnode->iip].pnode = pnode; + path[h].ptr.pnode = pnode; +@@ -2225,7 +2225,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + struct ubifs_cnode *cn; + int num, iip = 0, err; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + while (cnode) { +diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c +index 5c90dec..cddd6bd 100644 +--- a/fs/ubifs/lpt_commit.c ++++ b/fs/ubifs/lpt_commit.c +@@ -27,8 +27,15 @@ + + #include <linux/crc16.h> + #include <linux/slab.h> ++#include <linux/random.h> + #include "ubifs.h" + ++#ifdef CONFIG_UBIFS_FS_DEBUG ++static int dbg_populate_lsave(struct ubifs_info *c); ++#else ++#define dbg_populate_lsave(c) 0 ++#endif ++ + /** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object +@@ -110,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c) + return 0; + cnt += 1; + while (1) { +- ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); +- __set_bit(COW_ZNODE, &cnode->flags); ++ ubifs_assert(!test_bit(COW_CNODE, &cnode->flags)); ++ __set_bit(COW_CNODE, &cnode->flags); + cnext = next_dirty_cnode(cnode); + if (!cnext) { + cnode->cnext = c->lpt_cnext; +@@ -459,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c) + */ + clear_bit(DIRTY_CNODE, &cnode->flags); + smp_mb__before_clear_bit(); +- clear_bit(COW_ZNODE, &cnode->flags); ++ clear_bit(COW_CNODE, &cnode->flags); + smp_mb__after_clear_bit(); + offs += len; + dbg_chk_lpt_sz(c, 1, len); +@@ -586,7 +593,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, + if (nnode->nbranch[iip].lnum) + break; + } +- } while (iip >= UBIFS_LPT_FANOUT); ++ } while (iip >= UBIFS_LPT_FANOUT); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); +@@ -815,6 +822,10 @@ static void populate_lsave(struct ubifs_info *c) + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } ++ ++ if (dbg_populate_lsave(c)) ++ return; ++ + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) +@@ -1150,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum) + void *buf = c->lpt_buf; + + dbg_lp("LEB %d", lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) + return err; +- } ++ + while (1) { + if (!is_a_node(c, buf, len)) { + int pad_len; +@@ -1628,29 +1639,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) + { + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; +- void *buf = c->dbg->buf; ++ void *buf, *p; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + +- dbg_lp("LEB %d", lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); +- return err; ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for ltab checking"); ++ return 0; + } ++ ++ dbg_lp("LEB %d", lnum); ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) ++ goto out; ++ + while (1) { +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int i, pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } +- if (!dbg_is_all_ff(buf, len)) { ++ if (!dbg_is_all_ff(p, len)) { + dbg_msg("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; +@@ -1668,16 +1685,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } +- return err; ++ goto out; + } +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; +- buf += node_len; ++ p += node_len; + len -= node_len; + } ++ ++ err = 0; ++out: ++ vfree(buf); ++ return err; + } + + /** +@@ -1690,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c) + { + int lnum, err, i, cnt; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + /* Bring the entire tree into memory */ +@@ -1733,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c) + long long free = 0; + int i; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + for (i = 0; i < c->lpt_lebs; i++) { +@@ -1775,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) + long long chk_lpt_sz, lpt_sz; + int err = 0; + +- if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) ++ if (!dbg_is_chk_lprops(c)) + return 0; + + switch (action) { +@@ -1870,25 +1892,30 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) + static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + { + int err, len = c->leb_size, node_type, node_num, node_len, offs; +- void *buf = c->dbg->buf; ++ void *buf, *p; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); +- err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); +- if (err) { +- ubifs_err("cannot read LEB %d, error %d", lnum, err); ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to dump LPT"); + return; + } ++ ++ err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1); ++ if (err) ++ goto out; ++ + while (1) { + offs = c->leb_size - len; +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { + printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + continue; + } +@@ -1898,7 +1925,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + break; + } + +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { +@@ -1923,7 +1950,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + else + printk(KERN_DEBUG "LEB %d:%d, nnode, ", + lnum, offs); +- err = ubifs_unpack_nnode(c, buf, &nnode); ++ err = ubifs_unpack_nnode(c, p, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { + printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); +@@ -1944,15 +1971,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); +- return; ++ goto out; + } + +- buf += node_len; ++ p += node_len; + len -= node_len; + } + + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); ++out: ++ vfree(buf); ++ return; + } + + /** +@@ -1974,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c) + current->pid); + } + ++/** ++ * dbg_populate_lsave - debugging version of 'populate_lsave()' ++ * @c: UBIFS file-system description object ++ * ++ * This is a debugging version for 'populate_lsave()' which populates lsave ++ * with random LEBs instead of useful LEBs, which is good for test coverage. ++ * Returns zero if lsave has not been populated (this debugging feature is ++ * disabled) an non-zero if lsave has been populated. ++ */ ++static int dbg_populate_lsave(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ int i; ++ ++ if (!dbg_is_chk_gen(c)) ++ return 0; ++ if (random32() & 3) ++ return 0; ++ ++ for (i = 0; i < c->lsave_cnt; i++) ++ c->lsave[i] = c->main_first; ++ ++ list_for_each_entry(lprops, &c->empty_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->freeable_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ ++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ ++ return 1; ++} ++ + #endif /* CONFIG_UBIFS_FS_DEBUG */ +diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c +index 21f47af..278c238 100644 +--- a/fs/ubifs/master.c ++++ b/fs/ubifs/master.c +@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) + } + + main_sz = (long long)c->main_lebs * c->leb_size; +- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { ++ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { + err = 9; + goto out; + } +@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) + } + + if (c->lst.total_dead + c->lst.total_dark + +- c->lst.total_used + c->old_idx_sz > main_sz) { ++ c->lst.total_used + c->bi.old_idx_sz > main_sz) { + err = 21; + goto out; + } +@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); +- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); ++ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); +@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) + c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); + c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); + +- c->calc_idx_sz = c->old_idx_sz; ++ c->calc_idx_sz = c->bi.old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; +diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h +index c3de04d..ee7cb5e 100644 +--- a/fs/ubifs/misc.h ++++ b/fs/ubifs/misc.h +@@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode) + } + + /** ++ * ubifs_zn_obsolete - check if znode is obsolete. ++ * @znode: znode to check ++ * ++ * This helper function returns %1 if @znode is obsolete and %0 otherwise. ++ */ ++static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode) ++{ ++ return !!test_bit(OBSOLETE_ZNODE, &znode->flags); ++} ++ ++/** ++ * ubifs_zn_cow - check if znode has to be copied on write. ++ * @znode: znode to check ++ * ++ * This helper function returns %1 if @znode is has COW flag set and %0 ++ * otherwise. ++ */ ++static inline int ubifs_zn_cow(const struct ubifs_znode *znode) ++{ ++ return !!test_bit(COW_ZNODE, &znode->flags); ++} ++ ++/** + * ubifs_wake_up_bgt - wake up background thread. + * @c: UBIFS file-system description object + */ +@@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf) + } + + /** +- * ubifs_leb_unmap - unmap an LEB. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to unmap +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_unmap(c->ubi, lnum); +- if (err) { +- ubifs_err("unmap LEB %d failed, error %d", lnum, err); +- return err; +- } +- +- return 0; +-} +- +-/** +- * ubifs_leb_write - write to a LEB. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to write +- * @buf: buffer to write from +- * @offs: offset within LEB to write to +- * @len: length to write +- * @dtype: data type +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, +- const void *buf, int offs, int len, int dtype) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); +- if (err) { +- ubifs_err("writing %d bytes at %d:%d, error %d", +- len, lnum, offs, err); +- return err; +- } +- +- return 0; +-} +- +-/** +- * ubifs_leb_change - atomic LEB change. +- * @c: UBIFS file-system description object +- * @lnum: LEB number to write +- * @buf: buffer to write from +- * @len: length to write +- * @dtype: data type +- * +- * This function returns %0 on success and a negative error code on failure. +- */ +-static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, +- const void *buf, int len, int dtype) +-{ +- int err; +- +- ubifs_assert(!c->ro_media && !c->ro_mount); +- if (c->ro_error) +- return -EROFS; +- err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); +- if (err) { +- ubifs_err("changing %d bytes in LEB %d, error %d", +- len, lnum, err); +- return err; +- } +- +- return 0; +-} +- +-/** + * ubifs_encode_dev - encode device node IDs. + * @dev: UBIFS device node information + * @rdev: device IDs to encode +@@ -340,4 +283,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) + mutex_unlock(&c->lp_mutex); + } + ++/** ++ * ubifs_next_log_lnum - switch to the next log LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: current log LEB ++ * ++ * This helper function returns the log LEB number which goes next after LEB ++ * 'lnum'. ++ */ ++static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) ++{ ++ lnum += 1; ++ if (lnum > c->log_last) ++ lnum = UBIFS_LOG_LNUM; ++ ++ return lnum; ++} ++ + #endif /* __UBIFS_MISC_H__ */ +diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c +index 82009c7..c542c73 100644 +--- a/fs/ubifs/orphan.c ++++ b/fs/ubifs/orphan.c +@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c) + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); + if (IS_ERR(sleb)) { + if (PTR_ERR(sleb) == -EUCLEAN) +- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); ++ sleb = ubifs_recover_leb(c, lnum, 0, ++ c->sbuf, -1); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -892,15 +893,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb) + static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) + { + int lnum, err = 0; ++ void *buf; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to check orphans"); ++ return 0; ++ } ++ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -912,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) + break; + } + ++ vfree(buf); + return err; + } + +@@ -920,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c) + struct check_info ci; + int err; + +- if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) ++ if (!dbg_is_chk_orph(c)) + return 0; + + ci.last_ino = 0; +diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c +index 77e9b87..2a935b3 100644 +--- a/fs/ubifs/recovery.c ++++ b/fs/ubifs/recovery.c +@@ -28,6 +28,23 @@ + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. ++ * ++ * The general UBIFS approach to the recovery is that it recovers from ++ * corruptions which could be caused by power cuts, but it refuses to recover ++ * from corruption caused by other reasons. And UBIFS tries to distinguish ++ * between these 2 reasons of corruptions and silently recover in the former ++ * case and loudly complain in the latter case. ++ * ++ * UBIFS writes only to erased LEBs, so it writes only to the flash space ++ * containing only 0xFFs. UBIFS also always writes strictly from the beginning ++ * of the LEB to the end. And UBIFS assumes that the underlying flash media ++ * writes in @c->max_write_size bytes at a time. ++ * ++ * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. ++ * I/O unit corresponding to offset X to contain corrupted data, all the ++ * following min. I/O units have to contain empty space (all 0xFFs). If this is ++ * not true, the corruption cannot be the result of a power cut, and UBIFS ++ * refuses to mount. + */ + + #include <linux/crc32.h> +@@ -100,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf, + if (!sbuf) + return -ENOMEM; + +- err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); ++ err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0); + if (err && err != -EBADMSG) + goto out_free; + +@@ -196,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c, + mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); + + ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); +- err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM); + if (err) + goto out; +- err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); ++ err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM); + if (err) + goto out; + out: +@@ -257,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c) + if (cor1) + goto out_err; + mst = mst1; +- } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { ++ } else if (offs1 == 0 && ++ c->leb_size - offs2 - sz < sz) { + /* 1st LEB was unmapped and written, 2nd not */ + if (cor1) + goto out_err; +@@ -300,6 +318,32 @@ int ubifs_recover_master_node(struct ubifs_info *c) + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); ++ ++ /* ++ * We had to recover the master node, which means there was an ++ * unclean reboot. However, it is possible that the master node ++ * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. ++ * E.g., consider the following chain of events: ++ * ++ * 1. UBIFS was cleanly unmounted, so the master node is clean ++ * 2. UBIFS is being mounted R/W and starts changing the master ++ * node in the first (%UBIFS_MST_LNUM). A power cut happens, ++ * so this LEB ends up with some amount of garbage at the ++ * end. ++ * 3. UBIFS is being mounted R/O. We reach this place and ++ * recover the master node from the second LEB ++ * (%UBIFS_MST_LNUM + 1). But we cannot update the media ++ * because we are being mounted R/O. We have to defer the ++ * operation. ++ * 4. However, this master node (@c->mst_node) is marked as ++ * clean (since the step 1). And if we just return, the ++ * mount code will be confused and won't recover the master ++ * node when it is re-mounter R/W later. ++ * ++ * Thus, to force the recovery by marking the master node as ++ * dirty. ++ */ ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; +@@ -362,8 +406,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c) + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data +- * is in @buf, otherwise %0 is returned. The determination is made by checking +- * for subsequent empty space starting from the next @c->min_io_size boundary. ++ * is in @buf, otherwise %0 is returned. The determination is made by checking ++ * for subsequent empty space starting from the next @c->max_write_size ++ * boundary. + */ + static int is_last_write(const struct ubifs_info *c, void *buf, int offs) + { +@@ -371,10 +416,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) + uint8_t *p; + + /* +- * Round up to the next @c->min_io_size boundary i.e. @offs is in the +- * last wbuf written. After that should be empty space. ++ * Round up to the next @c->max_write_size boundary i.e. @offs is in ++ * the last wbuf written. After that should be empty space. + */ +- empty_offs = ALIGN(offs + 1, c->min_io_size); ++ empty_offs = ALIGN(offs + 1, c->max_write_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; + return is_empty(p, check_len); +@@ -429,7 +474,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + int skip, dlen = le32_to_cpu(ch->len); + + /* Check for empty space after the corrupt node's common header */ +- skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; ++ skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* +@@ -441,7 +486,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, + return 0; + } + /* Now we know the corrupt node's length we can skip over it */ +- skip = ALIGN(offs + dlen, c->min_io_size) - offs; ++ skip = ALIGN(offs + dlen, c->max_write_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; +@@ -495,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + int len = ALIGN(endpt, c->min_io_size); + + if (start) { +- err = ubi_read(c->ubi, lnum, sleb->buf, 0, +- start); ++ err = ubifs_leb_read(c, lnum, sleb->buf, 0, ++ start, 1); + if (err) + return err; + } +@@ -510,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + ubifs_pad(c, buf, pad_len); + } + } +- err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, +- UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, sleb->buf, len, ++ UBI_UNKNOWN); + if (err) + return err; + } +@@ -520,16 +565,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, + } + + /** +- * drop_incomplete_group - drop nodes from an incomplete group. ++ * drop_last_group - drop the last group of nodes. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * +- * This function returns %1 if nodes are dropped and %0 otherwise. ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * group of nodes of the scanned LEB. + */ +-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) ++static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) + { +- int dropped = 0; +- + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; +@@ -538,15 +582,40 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) +- return dropped; +- dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); ++ break; ++ ++ dbg_rcvry("dropping grouped node at %d:%d", ++ sleb->lnum, snod->offs); ++ *offs = snod->offs; ++ list_del(&snod->list); ++ kfree(snod); ++ sleb->nodes_cnt -= 1; ++ } ++} ++ ++/** ++ * drop_last_node - drop the last node. ++ * @sleb: scanned LEB information ++ * @offs: offset of dropped nodes is returned here ++ * @grouped: non-zero if whole group of nodes have to be dropped ++ * ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * node of the scanned LEB. ++ */ ++static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) ++{ ++ struct ubifs_scan_node *snod; ++ ++ if (!list_empty(&sleb->nodes)) { ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, ++ list); ++ ++ dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; +- dropped = 1; + } +- return dropped; + } + + /** +@@ -555,7 +624,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use +- * @grouped: nodes may be grouped for recovery ++ * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not ++ * belong to any journal head) + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. +@@ -563,25 +633,21 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) + * found, and a negative error code in case of failure. + */ + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped) ++ int offs, void *sbuf, int jhead) + { +- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; +- int empty_chkd = 0, start = offs; ++ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; ++ int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + +- dbg_rcvry("%d:%d", lnum, offs); ++ dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + +- if (sleb->ecc) +- need_clean = 1; +- ++ ubifs_assert(len >= 8); + while (len >= 8) { +- int ret; +- + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + +@@ -591,8 +657,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + * Scan quietly until there is an error from which we cannot + * recover + */ +- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); +- ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; +@@ -605,104 +670,127 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, + offs += node_len; + buf += node_len; + len -= node_len; +- continue; +- } +- +- if (ret > 0) { ++ } else if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; +- continue; +- } +- +- if (ret == SCANNED_EMPTY_SPACE) { +- if (!is_empty(buf, len)) { +- if (!is_last_write(c, buf, offs)) +- break; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } +- empty_chkd = 1; ++ } else if (ret == SCANNED_EMPTY_SPACE || ++ ret == SCANNED_GARBAGE || ++ ret == SCANNED_A_BAD_PAD_NODE || ++ ret == SCANNED_A_CORRUPT_NODE) { ++ dbg_rcvry("found corruption (%d) at %d:%d", ++ ret, lnum, offs); + break; +- } +- +- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (ret == SCANNED_A_CORRUPT_NODE) +- if (no_more_nodes(c, buf, len, lnum, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (quiet) { +- /* Redo the last scan but noisily */ +- quiet = 0; +- continue; +- } +- +- switch (ret) { +- case SCANNED_GARBAGE: +- dbg_err("garbage"); +- goto corrupted; +- case SCANNED_A_CORRUPT_NODE: +- case SCANNED_A_BAD_PAD_NODE: +- dbg_err("bad node"); +- goto corrupted; +- default: +- dbg_err("unknown"); ++ } else { ++ dbg_err("unexpected return value %d", ret); + err = -EINVAL; + goto error; + } + } + +- if (!empty_chkd && !is_empty(buf, len)) { +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } else { ++ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { ++ if (!is_last_write(c, buf, offs)) ++ goto corrupted_rescan; ++ } else if (ret == SCANNED_A_CORRUPT_NODE) { ++ if (!no_more_nodes(c, buf, len, lnum, offs)) ++ goto corrupted_rescan; ++ } else if (!is_empty(buf, len)) { ++ if (!is_last_write(c, buf, offs)) { + int corruption = first_non_ff(buf, len); + ++ /* ++ * See header comment for this file for more ++ * explanations about the reasons we have this check. ++ */ + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ +- offs = corruption; ++ offs += corruption; + buf += corruption; + goto corrupted; + } + } + +- /* Drop nodes from incomplete group */ +- if (grouped && drop_incomplete_group(sleb, &offs)) { +- buf = sbuf + offs; +- len = c->leb_size - offs; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } ++ min_io_unit = round_down(offs, c->min_io_size); ++ if (grouped) ++ /* ++ * If nodes are grouped, always drop the incomplete group at ++ * the end. ++ */ ++ drop_last_group(sleb, &offs); + +- if (offs % c->min_io_size) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; ++ if (jhead == GCHD) { ++ /* ++ * If this LEB belongs to the GC head then while we are in the ++ * middle of the same min. I/O unit keep dropping nodes. So ++ * basically, what we want is to make sure that the last min. ++ * I/O unit where we saw the corruption is dropped completely ++ * with all the uncorrupted nodes which may possibly sit there. ++ * ++ * In other words, let's name the min. I/O unit where the ++ * corruption starts B, and the previous min. I/O unit A. The ++ * below code tries to deal with a situation when half of B ++ * contains valid nodes or the end of a valid node, and the ++ * second half of B contains corrupted data or garbage. This ++ * means that UBIFS had been writing to B just before the power ++ * cut happened. I do not know how realistic is this scenario ++ * that half of the min. I/O unit had been written successfully ++ * and the other half not, but this is possible in our 'failure ++ * mode emulation' infrastructure at least. ++ * ++ * So what is the problem, why we need to drop those nodes? Why ++ * can't we just clean-up the second half of B by putting a ++ * padding node there? We can, and this works fine with one ++ * exception which was reproduced with power cut emulation ++ * testing and happens extremely rarely. ++ * ++ * Imagine the file-system is full, we run GC which starts ++ * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is ++ * the current GC head LEB). The @c->gc_lnum is -1, which means ++ * that GC will retain LEB X and will try to continue. Imagine ++ * that LEB X is currently the dirtiest LEB, and the amount of ++ * used space in LEB Y is exactly the same as amount of free ++ * space in LEB X. ++ * ++ * And a power cut happens when nodes are moved from LEB X to ++ * LEB Y. We are here trying to recover LEB Y which is the GC ++ * head LEB. We find the min. I/O unit B as described above. ++ * Then we clean-up LEB Y by padding min. I/O unit. And later ++ * 'ubifs_rcvry_gc_commit()' function fails, because it cannot ++ * find a dirty LEB which could be GC'd into LEB Y! Even LEB X ++ * does not match because the amount of valid nodes there does ++ * not fit the free space in LEB Y any more! And this is ++ * because of the padding node which we added to LEB Y. The ++ * user-visible effect of this which I once observed and ++ * analysed is that we cannot mount the file-system with ++ * -ENOSPC error. ++ * ++ * So obviously, to make sure that situation does not happen we ++ * should free min. I/O unit B in LEB Y completely and the last ++ * used min. I/O unit in LEB Y should be A. This is basically ++ * what the below code tries to do. ++ */ ++ while (offs > min_io_unit) ++ drop_last_node(sleb, &offs); + } + ++ buf = sbuf + offs; ++ len = c->leb_size - offs; ++ ++ clean_buf(c, &buf, lnum, &offs, &len); + ubifs_end_scan(c, sleb, lnum, offs); + +- if (need_clean) { +- err = fix_unclean_leb(c, sleb, start); +- if (err) +- goto error; +- } ++ err = fix_unclean_leb(c, sleb, start); ++ if (err) ++ goto error; + + return sleb; + ++corrupted_rescan: ++ /* Re-scan the corrupted data with verbose messages */ ++ dbg_err("corruptio %d", ret); ++ ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +@@ -733,7 +821,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs, + return -ENOMEM; + if (c->leb_size - offs < UBIFS_CS_NODE_SZ) + goto out_err; +- err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); ++ err = ubifs_leb_read(c, lnum, (void *)cs_node, offs, ++ UBIFS_CS_NODE_SZ, 0); + if (err && err != -EBADMSG) + goto out_free; + ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); +@@ -819,7 +908,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + } + ubifs_scan_destroy(sleb); + } +- return ubifs_recover_leb(c, lnum, offs, sbuf, 0); ++ return ubifs_recover_leb(c, lnum, offs, sbuf, -1); + } + + /** +@@ -833,15 +922,10 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + * + * This function returns %0 on success and a negative error code on failure. + */ +-static int recover_head(const struct ubifs_info *c, int lnum, int offs, +- void *sbuf) ++static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf) + { +- int len, err; ++ int len = c->max_write_size, err; + +- if (c->min_io_size > 1) +- len = c->min_io_size; +- else +- len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + +@@ -849,15 +933,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, + return 0; + + /* Read at the head location and check it is empty flash */ +- err = ubi_read(c->ubi, lnum, sbuf, offs, len); ++ err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1); + if (err || !is_empty(sbuf, len)) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); +- err = ubi_read(c->ubi, lnum, sbuf, 0, offs); ++ err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1); + if (err) + return err; +- return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); ++ return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN); + } + + return 0; +@@ -880,7 +964,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, + * + * This function returns %0 on success and a negative error code on failure. + */ +-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) ++int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf) + { + int err; + +@@ -900,7 +984,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) + } + + /** +- * clean_an_unclean_leb - read and write a LEB to remove corruption. ++ * clean_an_unclean_leb - read and write a LEB to remove corruption. + * @c: UBIFS file-system description object + * @ucleb: unclean LEB information + * @sbuf: LEB-sized buffer to use +@@ -911,7 +995,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) + * + * This function returns %0 on success and a negative error code on failure. + */ +-static int clean_an_unclean_leb(const struct ubifs_info *c, ++static int clean_an_unclean_leb(struct ubifs_info *c, + struct ubifs_unclean_leb *ucleb, void *sbuf) + { + int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; +@@ -927,7 +1011,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + return 0; + } + +- err = ubi_read(c->ubi, lnum, buf, offs, len); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 0); + if (err && err != -EBADMSG) + return err; + +@@ -987,7 +1071,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + } + + /* Write back the LEB atomically */ +- err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN); + if (err) + return err; + +@@ -1007,7 +1091,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c, + * + * This function returns %0 on success and a negative error code on failure. + */ +-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) ++int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf) + { + dbg_rcvry("recovery"); + while (!list_empty(&c->unclean_leb_list)) { +@@ -1026,6 +1110,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) + } + + /** ++ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty ++ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++static int grab_empty_leb(struct ubifs_info *c) ++{ ++ int lnum, err; ++ ++ /* ++ * Note, it is very important to first search for an empty LEB and then ++ * run the commit, not vice-versa. The reason is that there might be ++ * only one empty LEB at the moment, the one which has been the ++ * @c->gc_lnum just before the power cut happened. During the regular ++ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no ++ * one but GC can grab it. But at this moment this single empty LEB is ++ * not marked as taken, so if we run commit - what happens? Right, the ++ * commit will grab it and write the index there. Remember that the ++ * index always expands as long as there is free space, and it only ++ * starts consolidating when we run out of space. ++ * ++ * IOW, if we run commit now, we might not be able to find a free LEB ++ * after this. ++ */ ++ lnum = ubifs_find_free_leb_for_idx(c); ++ if (lnum < 0) { ++ dbg_err("could not find an empty LEB"); ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); ++ return lnum; ++ } ++ ++ /* Reset the index flag */ ++ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_INDEX, 0); ++ if (err) ++ return err; ++ ++ c->gc_lnum = lnum; ++ dbg_rcvry("found empty LEB %d, run commit", lnum); ++ ++ return ubifs_run_commit(c); ++} ++ ++/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * +@@ -1047,71 +1178,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + { + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; +- int lnum, err; ++ int err; ++ ++ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); + + c->gc_lnum = -1; +- if (wbuf->lnum == -1) { +- dbg_rcvry("no GC head LEB"); +- goto find_free; +- } +- /* +- * See whether the used space in the dirtiest LEB fits in the GC head +- * LEB. +- */ +- if (wbuf->offs == c->leb_size) { +- dbg_rcvry("no room in GC head LEB"); +- goto find_free; +- } ++ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) ++ return grab_empty_leb(c); ++ + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { +- /* +- * There are no dirty or empty LEBs subject to here being +- * enough for the index. Try to use +- * 'ubifs_find_free_leb_for_idx()', which will return any empty +- * LEBs (ignoring index requirements). If the index then +- * doesn't have enough LEBs the recovery commit will fail - +- * which is the same result anyway i.e. recovery fails. So +- * there is no problem ignoring index requirements and just +- * grabbing a free LEB since we have already established there +- * is not a dirty LEB we could have used instead. +- */ +- if (err == -ENOSPC) { +- dbg_rcvry("could not find a dirty LEB"); +- goto find_free; +- } +- return err; +- } +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- lnum = lp.lnum; +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- if (lp.free != c->leb_size) { +- err = ubifs_change_one_lp(c, lnum, c->leb_size, +- 0, 0, 0, 0); +- if (err) +- return err; +- } +- err = ubifs_leb_unmap(c, lnum); +- if (err) +- return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); +- } +- /* +- * There was no empty LEB so the used space in the dirtiest LEB must fit +- * in the GC head LEB. +- */ +- if (lp.free + lp.dirty < wbuf->offs) { +- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", +- lnum, wbuf->lnum, wbuf->offs); +- err = ubifs_return_leb(c, lnum); +- if (err) ++ if (err != -ENOSPC) + return err; +- goto find_free; ++ ++ dbg_rcvry("could not find a dirty LEB"); ++ return grab_empty_leb(c); + } ++ ++ ubifs_assert(!(lp.flags & LPROPS_INDEX)); ++ ubifs_assert(lp.free + lp.dirty >= wbuf->offs); ++ + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. +@@ -1120,11 +1206,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + err = ubifs_run_commit(c); + if (err) + return err; +- /* +- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC +- * - use locking to keep 'ubifs_assert()' happy. +- */ +- dbg_rcvry("GC'ing LEB %d", lnum); ++ ++ dbg_rcvry("GC'ing LEB %d", lp.lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { +@@ -1140,37 +1223,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) + err = -EINVAL; + return err; + } +- if (err != LEB_RETAINED) { +- dbg_err("GC returned %d", err); ++ ++ ubifs_assert(err == LEB_RETAINED); ++ if (err != LEB_RETAINED) + return -EINVAL; +- } ++ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- return 0; + +-find_free: +- /* +- * There is no GC head LEB or the free space in the GC head LEB is too +- * small, or there are not dirty LEBs. Allocate gc_lnum by calling +- * 'ubifs_find_free_leb_for_idx()' so GC is not run. +- */ +- lnum = ubifs_find_free_leb_for_idx(c); +- if (lnum < 0) { +- dbg_err("could not find an empty LEB"); +- return lnum; +- } +- /* And reset the index flag */ +- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, +- LPROPS_INDEX, 0); +- if (err) +- return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); ++ dbg_rcvry("allocated LEB %d for GC", lp.lnum); ++ return 0; + } + + /** +@@ -1393,7 +1456,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) + if (i_size >= e->d_size) + return 0; + /* Read the LEB */ +- err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); ++ err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1); + if (err) + goto out; + /* Change the size field and recalculate the CRC */ +@@ -1409,10 +1472,10 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) + len -= 1; + len = ALIGN(len + 1, c->min_io_size); + /* Atomically write the fixed LEB back again */ +- err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); ++ err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); + if (err) + goto out; +- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", ++ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", + (unsigned long)e->inum, lnum, offs, i_size, e->d_size); + return 0; + +@@ -1461,20 +1524,27 @@ int ubifs_recover_size(struct ubifs_info *c) + e->i_size = le64_to_cpu(ino->size); + } + } ++ + if (e->exists && e->i_size < e->d_size) { +- if (!e->inode && c->ro_mount) { ++ if (c->ro_mount) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; ++ struct ubifs_inode *ui; ++ ++ ubifs_assert(!e->inode); + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); ++ ++ ui = ubifs_inode(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + (unsigned long)e->inum, +- e->d_size, inode->i_size); ++ inode->i_size, e->d_size); + inode->i_size = e->d_size; +- ubifs_inode(inode)->ui_size = e->d_size; ++ ui->ui_size = e->d_size; ++ ui->synced_i_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; +@@ -1489,9 +1559,11 @@ int ubifs_recover_size(struct ubifs_info *c) + iput(e->inode); + } + } ++ + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } ++ + return 0; + } +diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c +index eed0fcf..b007637 100644 +--- a/fs/ubifs/replay.c ++++ b/fs/ubifs/replay.c +@@ -33,43 +33,32 @@ + */ + + #include "ubifs.h" +- +-/* +- * Replay flags. +- * +- * REPLAY_DELETION: node was deleted +- * REPLAY_REF: node is a reference node +- */ +-enum { +- REPLAY_DELETION = 1, +- REPLAY_REF = 2, +-}; ++#include <linux/list_sort.h> + + /** +- * struct replay_entry - replay tree entry. ++ * struct replay_entry - replay list entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length ++ * @deletion: non-zero if this entry corresponds to a node deletion + * @sqnum: node sequence number +- * @flags: replay flags +- * @rb: links the replay tree ++ * @list: links the replay list + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size +- * @free: amount of free space in a bud +- * @dirty: amount of dirty space in a bud from padding and deletion nodes + * +- * UBIFS journal replay must compare node sequence numbers, which means it must +- * build a tree of node information to insert into the TNC. ++ * The replay process first scans all buds and builds the replay list, then ++ * sorts the replay list in nodes sequence number order, and then inserts all ++ * the replay entries to the TNC. + */ + struct replay_entry { + int lnum; + int offs; + int len; ++ unsigned int deletion:1; + unsigned long long sqnum; +- int flags; +- struct rb_node rb; ++ struct list_head list; + union ubifs_key key; + union { + struct qstr nm; +@@ -77,10 +66,6 @@ struct replay_entry { + loff_t old_size; + loff_t new_size; + }; +- struct { +- int free; +- int dirty; +- }; + }; + }; + +@@ -88,57 +73,64 @@ struct replay_entry { + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object +- * @free: free bytes in the bud + * @sqnum: reference node sequence number ++ * @free: free bytes in the bud ++ * @dirty: dirty bytes in the bud + */ + struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; +- int free; + unsigned long long sqnum; ++ int free; ++ int dirty; + }; + + /** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object +- * @r: replay entry of bud ++ * @b: bud entry which describes the bud ++ * ++ * This function makes sure the LEB properties of bud @b are set correctly ++ * after the replay. Returns zero in case of success and a negative error code ++ * in case of failure. + */ +-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) ++static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) + { + const struct ubifs_lprops *lp; + int err = 0, dirty; + + ubifs_get_lprops(c); + +- lp = ubifs_lpt_lookup_dirty(c, r->lnum); ++ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + dirty = lp->dirty; +- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { ++ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + /* + * The LEB was added to the journal with a starting offset of + * zero which means the LEB must have been empty. The LEB +- * property values should be lp->free == c->leb_size and +- * lp->dirty == 0, but that is not the case. The reason is that +- * the LEB was garbage collected. The garbage collector resets +- * the free and dirty space without recording it anywhere except +- * lprops, so if there is not a commit then lprops does not have +- * that information next time the file system is mounted. ++ * property values should be @lp->free == @c->leb_size and ++ * @lp->dirty == 0, but that is not the case. The reason is that ++ * the LEB had been garbage collected before it became the bud, ++ * and there was not commit inbetween. The garbage collector ++ * resets the free and dirty space without recording it ++ * anywhere except lprops, so if there was no commit then ++ * lprops does not have that information. + * + * We do not need to adjust free space because the scan has told + * us the exact value which is recorded in the replay entry as +- * r->free. ++ * @b->free. + * + * However we do need to subtract from the dirty space the + * amount of space that the garbage collector reclaimed, which + * is the whole LEB minus the amount of space that was free. + */ +- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); +- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); + dirty -= c->leb_size - lp->free; + /* +@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) + */ + if (dirty != 0) + dbg_msg("LEB %d lp: %d free %d dirty " +- "replay: %d free %d dirty", r->lnum, lp->free, +- lp->dirty, r->free, r->dirty); ++ "replay: %d free %d dirty", b->bud->lnum, ++ lp->free, lp->dirty, b->free, b->dirty); + } +- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, ++ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } ++ ++ /* Make sure the journal head points to the latest bud */ ++ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, ++ b->bud->lnum, c->leb_size - b->free, ++ UBI_SHORTTERM); ++ + out: + ubifs_release_lprops(c); + return err; + } + + /** ++ * set_buds_lprops - set free and dirty space for all replayed buds. ++ * @c: UBIFS file-system description object ++ * ++ * This function sets LEB properties for all replayed buds. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int set_buds_lprops(struct ubifs_info *c) ++{ ++ struct bud_entry *b; ++ int err; ++ ++ list_for_each_entry(b, &c->replay_buds, list) { ++ err = set_bud_lprops(c, b); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation +@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) + */ + static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + { +- int err, deletion = ((r->flags & REPLAY_DELETION) != 0); ++ int err; + +- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, +- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); ++ dbg_mntk(&r->key, "LEB %d:%d len %d deletion %d sqnum %llu key ", ++ r->lnum, r->offs, r->len, r->deletion, r->sqnum); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + +- if (r->flags & REPLAY_REF) +- err = set_bud_lprops(c, r); +- else if (is_hash_key(c, &r->key)) { +- if (deletion) ++ if (is_hash_key(c, &r->key)) { ++ if (r->deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { +- if (deletion) ++ if (r->deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { +@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + return err; + + if (c->need_recovery) +- err = ubifs_recover_size_accum(c, &r->key, deletion, ++ err = ubifs_recover_size_accum(c, &r->key, r->deletion, + r->new_size); + } + +@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + } + + /** +- * destroy_replay_tree - destroy the replay. +- * @c: UBIFS file-system description object ++ * replay_entries_cmp - compare 2 replay entries. ++ * @priv: UBIFS file-system description object ++ * @a: first replay entry ++ * @a: second replay entry + * +- * Destroy the replay tree. ++ * This is a comparios function for 'list_sort()' which compares 2 replay ++ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has ++ * greater sequence number and %-1 otherwise. + */ +-static void destroy_replay_tree(struct ubifs_info *c) ++static int replay_entries_cmp(void *priv, struct list_head *a, ++ struct list_head *b) + { +- struct rb_node *this = c->replay_tree.rb_node; +- struct replay_entry *r; +- +- while (this) { +- if (this->rb_left) { +- this = this->rb_left; +- continue; +- } else if (this->rb_right) { +- this = this->rb_right; +- continue; +- } +- r = rb_entry(this, struct replay_entry, rb); +- this = rb_parent(this); +- if (this) { +- if (this->rb_left == &r->rb) +- this->rb_left = NULL; +- else +- this->rb_right = NULL; +- } +- if (is_hash_key(c, &r->key)) +- kfree(r->nm.name); +- kfree(r); +- } +- c->replay_tree = RB_ROOT; ++ struct replay_entry *ra, *rb; ++ ++ cond_resched(); ++ if (a == b) ++ return 0; ++ ++ ra = list_entry(a, struct replay_entry, list); ++ rb = list_entry(b, struct replay_entry, list); ++ ubifs_assert(ra->sqnum != rb->sqnum); ++ if (ra->sqnum > rb->sqnum) ++ return 1; ++ return -1; + } + + /** +- * apply_replay_tree - apply the replay tree to the TNC. ++ * apply_replay_list - apply the replay list to the TNC. + * @c: UBIFS file-system description object + * +- * Apply the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. ++ * Apply all entries in the replay list to the TNC. Returns zero in case of ++ * success and a negative error code in case of failure. + */ +-static int apply_replay_tree(struct ubifs_info *c) ++static int apply_replay_list(struct ubifs_info *c) + { +- struct rb_node *this = rb_first(&c->replay_tree); ++ struct replay_entry *r; ++ int err; + +- while (this) { +- struct replay_entry *r; +- int err; ++ list_sort(c, &c->replay_list, &replay_entries_cmp); + ++ list_for_each_entry(r, &c->replay_list, list) { + cond_resched(); + +- r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; +- this = rb_next(this); + } ++ + return 0; + } + + /** +- * insert_node - insert a node to the replay tree. ++ * destroy_replay_list - destroy the replay. ++ * @c: UBIFS file-system description object ++ * ++ * Destroy the replay list. ++ */ ++static void destroy_replay_list(struct ubifs_info *c) ++{ ++ struct replay_entry *r, *tmp; ++ ++ list_for_each_entry_safe(r, tmp, &c->replay_list, list) { ++ if (is_hash_key(c, &r->key)) ++ kfree(r->nm.name); ++ list_del(&r->list); ++ kfree(r); ++ } ++} ++ ++/** ++ * insert_node - insert a node to the replay list + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) + * @old_size: truncation old size + * @new_size: truncation new size + * +- * This function inserts a scanned non-direntry node to the replay tree. The +- * replay tree is an RB-tree containing @struct replay_entry elements which are +- * indexed by the sequence number. The replay tree is applied at the very end +- * of the replay process. Since the tree is sorted in sequence number order, +- * the older modifications are applied first. This function returns zero in +- * case of success and a negative error code in case of failure. ++ * This function inserts a scanned non-direntry node to the replay list. The ++ * replay list contains @struct replay_entry elements, and we sort this list in ++ * sequence number order before applying it. The replay list is applied at the ++ * very end of the replay process. Since the list is sorted in sequence number ++ * order, the older modifications are applied first. This function returns zero ++ * in case of success and a negative error code in case of failure. + */ + static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + ++ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); ++ + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; +@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; +- r->flags = (deletion ? REPLAY_DELETION : 0); ++ key_copy(c, key, &r->key); + r->old_size = old_size; + r->new_size = new_size; +- key_copy(c, key, &r->key); + +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + + /** +- * insert_dent - insert a directory entry node into the replay tree. ++ * insert_dent - insert a directory entry node into the replay list. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * +- * This function inserts a scanned directory entry node to the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. +- * +- * This function is also used for extended attribute entries because they are +- * implemented as directory entry nodes. ++ * This function inserts a scanned directory entry node or an extended ++ * attribute entry to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + ++ dbg_mntk(key, "add LEB %d:%d, key ", lnum, offs); + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } +- if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; ++ + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); +@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; ++ key_copy(c, key, &r->key); + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; +- r->flags = (deletion ? REPLAY_DELETION : 0); +- key_copy(c, key, &r->key); + +- ubifs_assert(!*p); +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + +@@ -482,29 +473,90 @@ int ubifs_validate_entry(struct ubifs_info *c, + } + + /** ++ * is_last_bud - check if the bud is the last in the journal head. ++ * @c: UBIFS file-system description object ++ * @bud: bud description object ++ * ++ * This function checks if bud @bud is the last bud in its journal head. This ++ * information is then used by 'replay_bud()' to decide whether the bud can ++ * have corruptions or not. Indeed, only last buds can be corrupted by power ++ * cuts. Returns %1 if this is the last bud, and %0 if not. ++ */ ++static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) ++{ ++ struct ubifs_jhead *jh = &c->jheads[bud->jhead]; ++ struct ubifs_bud *next; ++ uint32_t data; ++ int err; ++ ++ if (list_is_last(&bud->list, &jh->buds_list)) ++ return 1; ++ ++ /* ++ * The following is a quirk to make sure we work correctly with UBIFS ++ * images used with older UBIFS. ++ * ++ * Normally, the last bud will be the last in the journal head's list ++ * of bud. However, there is one exception if the UBIFS image belongs ++ * to older UBIFS. This is fairly unlikely: one would need to use old ++ * UBIFS, then have a power cut exactly at the right point, and then ++ * try to mount this image with new UBIFS. ++ * ++ * The exception is: it is possible to have 2 buds A and B, A goes ++ * before B, and B is the last, bud B is contains no data, and bud A is ++ * corrupted at the end. The reason is that in older versions when the ++ * journal code switched the next bud (from A to B), it first added a ++ * log reference node for the new bud (B), and only after this it ++ * synchronized the write-buffer of current bud (A). But later this was ++ * changed and UBIFS started to always synchronize the write-buffer of ++ * the bud (A) before writing the log reference for the new bud (B). ++ * ++ * But because older UBIFS always synchronized A's write-buffer before ++ * writing to B, we can recognize this exceptional situation but ++ * checking the contents of bud B - if it is empty, then A can be ++ * treated as the last and we can recover it. ++ * ++ * TODO: remove this piece of code in a couple of years (today it is ++ * 16.05.2011). ++ */ ++ next = list_entry(bud->list.next, struct ubifs_bud, list); ++ if (!list_is_last(&next->list, &jh->buds_list)) ++ return 0; ++ ++ err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1); ++ if (err) ++ return 0; ++ ++ return data == 0xFFFFFFFF; ++} ++ ++/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object +- * @lnum: bud logical eraseblock number to replay +- * @offs: bud start offset +- * @jhead: journal head to which this bud belongs +- * @free: amount of free space in the bud is returned here +- * @dirty: amount of dirty space from padding and deletion nodes is returned +- * here ++ * @b: bud entry which describes the bud + * +- * This function returns zero in case of success and a negative error code in +- * case of failure. ++ * This function replays bud @bud, recovers it if needed, and adds all nodes ++ * from this bud to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ +-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, +- int *free, int *dirty) ++static int replay_bud(struct ubifs_info *c, struct bud_entry *b) + { +- int err = 0, used = 0; ++ int is_last = is_last_bud(c, b->bud); ++ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_bud *bud; + +- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); +- if (c->need_recovery) +- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); ++ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", ++ lnum, b->bud->jhead, offs, is_last); ++ ++ if (c->need_recovery && is_last) ++ /* ++ * Recover only last LEBs in the journal heads, because power ++ * cuts may cause corruptions only in these LEBs, because only ++ * these LEBs could possibly be written to at the power cut ++ * time. ++ */ ++ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); + else + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); + if (IS_ERR(sleb)) +@@ -620,19 +672,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, + goto out; + } + +- bud = ubifs_search_bud(c, lnum); +- if (!bud) +- BUG(); +- ++ ubifs_assert(ubifs_search_bud(c, lnum)); + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + +- if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) +- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, +- sleb->endpt, UBI_SHORTTERM); +- +- *dirty = sleb->endpt - offs - used; +- *free = c->leb_size - sleb->endpt; ++ b->dirty = sleb->endpt - offs - used; ++ b->free = c->leb_size - sleb->endpt; ++ dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); + + out: + ubifs_scan_destroy(sleb); +@@ -646,55 +692,6 @@ out_dump: + } + + /** +- * insert_ref_node - insert a reference node to the replay tree. +- * @c: UBIFS file-system description object +- * @lnum: node logical eraseblock number +- * @offs: node offset +- * @sqnum: sequence number +- * @free: amount of free space in bud +- * @dirty: amount of dirty space from padding and deletion nodes +- * +- * This function inserts a reference node to the replay tree and returns zero +- * in case of success or a negative error code in case of failure. +- */ +-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, +- unsigned long long sqnum, int free, int dirty) +-{ +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +- struct replay_entry *r; +- +- dbg_mnt("add ref LEB %d:%d", lnum, offs); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay tree"); +- return -EINVAL; +- } +- +- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +- if (!r) +- return -ENOMEM; +- +- r->lnum = lnum; +- r->offs = offs; +- r->sqnum = sqnum; +- r->flags = REPLAY_REF; +- r->free = free; +- r->dirty = dirty; +- +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); +- return 0; +-} +- +-/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * +@@ -704,17 +701,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, + static int replay_buds(struct ubifs_info *c) + { + struct bud_entry *b; +- int err, uninitialized_var(free), uninitialized_var(dirty); ++ int err; ++ unsigned long long prev_sqnum = 0; + + list_for_each_entry(b, &c->replay_buds, list) { +- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, +- &free, &dirty); +- if (err) +- return err; +- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, +- free, dirty); ++ err = replay_bud(c, b); + if (err) + return err; ++ ++ ubifs_assert(b->sqnum > prev_sqnum); ++ prev_sqnum = b->sqnum; + } + + return 0; +@@ -1054,25 +1050,29 @@ int ubifs_replay_journal(struct ubifs_info *c) + if (err) + goto out; + +- err = apply_replay_tree(c); ++ err = apply_replay_list(c); ++ if (err) ++ goto out; ++ ++ err = set_buds_lprops(c); + if (err) + goto out; + + /* +- * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable +- * to roughly estimate index growth. Things like @c->min_idx_lebs ++ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable ++ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ +- c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); +- c->budg_uncommitted_idx *= c->max_idx_node_sz; ++ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); ++ c->bi.uncommitted_idx *= c->max_idx_node_sz; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + (unsigned long)c->highest_inum); + out: +- destroy_replay_tree(c); ++ destroy_replay_list(c); + destroy_bud_list(c); + c->replaying = 0; + return err; +diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c +index bf31b47..771f7fb 100644 +--- a/fs/ubifs/sb.c ++++ b/fs/ubifs/sb.c +@@ -247,7 +247,7 @@ static int create_default_filesystem(struct ubifs_info *c) + mst->total_dirty = cpu_to_le64(tmp64); + + /* The indexing LEB does not contribute to dark space */ +- tmp64 = (c->main_lebs - 1) * c->dark_wm; ++ tmp64 = ((long long)(c->main_lebs - 1) * c->dark_wm); + mst->total_dark = cpu_to_le64(tmp64); + + mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ); +@@ -410,13 +410,23 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) + } + + if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) { +- err = 7; ++ ubifs_err("too few main LEBs count %d, must be at least %d", ++ c->main_lebs, UBIFS_MIN_MAIN_LEBS); + goto failed; + } + +- if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS || +- c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) { +- err = 8; ++ max_bytes = (long long)c->leb_size * UBIFS_MIN_BUD_LEBS; ++ if (c->max_bud_bytes < max_bytes) { ++ ubifs_err("too small journal (%lld bytes), must be at least " ++ "%lld bytes", c->max_bud_bytes, max_bytes); ++ goto failed; ++ } ++ ++ max_bytes = (long long)c->leb_size * c->main_lebs; ++ if (c->max_bud_bytes > max_bytes) { ++ ubifs_err("too large journal size (%lld bytes), only %lld bytes" ++ "available in the main area", ++ c->max_bud_bytes, max_bytes); + goto failed; + } + +@@ -450,7 +460,6 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) + goto failed; + } + +- max_bytes = c->main_lebs * (long long)c->leb_size; + if (c->rp_size < 0 || max_bytes < c->rp_size) { + err = 14; + goto failed; +@@ -475,7 +484,8 @@ failed: + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error +- * code. ++ * code. Note, the user of this function is responsible of kfree()'ing the ++ * returned superblock buffer. + */ + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) + { +@@ -616,6 +626,7 @@ int ubifs_read_superblock(struct ubifs_info *c) + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + memcpy(&c->uuid, &sup->uuid, 16); + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); ++ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; +@@ -650,3 +661,152 @@ out: + kfree(sup); + return err; + } ++ ++/** ++ * fixup_leb - fixup/unmap an LEB containing free space. ++ * @c: UBIFS file-system description object ++ * @lnum: the LEB number to fix up ++ * @len: number of used bytes in LEB (starting at offset 0) ++ * ++ * This function reads the contents of the given LEB number @lnum, then fixes ++ * it up, so that empty min. I/O units in the end of LEB are actually erased on ++ * flash (rather than being just all-0xff real data). If the LEB is completely ++ * empty, it is simply unmapped. ++ */ ++static int fixup_leb(struct ubifs_info *c, int lnum, int len) ++{ ++ int err; ++ ++ ubifs_assert(len >= 0); ++ ubifs_assert(len % c->min_io_size == 0); ++ ubifs_assert(len < c->leb_size); ++ ++ if (len == 0) { ++ dbg_mnt("unmap empty LEB %d", lnum); ++ return ubifs_leb_unmap(c, lnum); ++ } ++ ++ dbg_mnt("fixup LEB %d, data len %d", lnum, len); ++ err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1); ++ if (err) ++ return err; ++ ++ return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN); ++} ++ ++/** ++ * fixup_free_space - find & remap all LEBs containing free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function walks through all LEBs in the filesystem and fiexes up those ++ * containing free/empty space. ++ */ ++static int fixup_free_space(struct ubifs_info *c) ++{ ++ int lnum, err = 0; ++ struct ubifs_lprops *lprops; ++ ++ ubifs_get_lprops(c); ++ ++ /* Fixup LEBs in the master area */ ++ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { ++ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); ++ if (err) ++ goto out; ++ } ++ ++ /* Unmap unused log LEBs */ ++ lnum = ubifs_next_log_lnum(c, c->lhead_lnum); ++ while (lnum != c->ltail_lnum) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ lnum = ubifs_next_log_lnum(c, lnum); ++ } ++ ++ /* Fixup the current log head */ ++ err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); ++ if (err) ++ goto out; ++ ++ /* Fixup LEBs in the LPT area */ ++ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { ++ int free = c->ltab[lnum - c->lpt_first].free; ++ ++ if (free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - free); ++ if (err) ++ goto out; ++ } ++ } ++ ++ /* Unmap LEBs in the orphans area */ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ } ++ ++ /* Fixup LEBs in the main area */ ++ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { ++ lprops = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ if (lprops->free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - lprops->free); ++ if (err) ++ goto out; ++ } ++ } ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_fixup_free_space - find & fix all LEBs with free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function fixes up LEBs containing free space on first mount, if the ++ * appropriate flag was set when the FS was created. Each LEB with one or more ++ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure ++ * the free space is actually erased. E.g., this is necessary for some NAND ++ * chips, since the free space may have been programmed like real "0xff" data ++ * (generating a non-0xff ECC), causing future writes to the not-really-erased ++ * NAND pages to behave badly. After the space is fixed up, the superblock flag ++ * is cleared, so that this is skipped for all future mounts. ++ */ ++int ubifs_fixup_free_space(struct ubifs_info *c) ++{ ++ int err; ++ struct ubifs_sb_node *sup; ++ ++ ubifs_assert(c->space_fixup); ++ ubifs_assert(!c->ro_mount); ++ ++ ubifs_msg("start fixing up free space"); ++ ++ err = fixup_free_space(c); ++ if (err) ++ return err; ++ ++ sup = ubifs_read_sb_node(c); ++ if (IS_ERR(sup)) ++ return PTR_ERR(sup); ++ ++ /* Free-space fixup is no longer required */ ++ c->space_fixup = 0; ++ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); ++ ++ err = ubifs_write_sb_node(c, sup); ++ kfree(sup); ++ if (err) ++ return err; ++ ++ ubifs_msg("free space fixup complete"); ++ return err; ++} +diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c +index 3e1ee57..37383e8 100644 +--- a/fs/ubifs/scan.c ++++ b/fs/ubifs/scan.c +@@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum, + INIT_LIST_HEAD(&sleb->nodes); + sleb->buf = sbuf; + +- err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); ++ err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0); + if (err && err != -EBADMSG) { + ubifs_err("cannot read %d bytes from LEB %d:%d," + " error %d", c->leb_size - offs, lnum, offs, err); +@@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, + int len; + + ubifs_err("corruption at LEB %d:%d", lnum, offs); +- if (dbg_failure_mode) ++ if (dbg_is_tst_rcvry(c)) + return; + len = c->leb_size - offs; + if (len > 8192) +@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, + if (!quiet) + ubifs_err("empty space starts at non-aligned offset %d", + offs); +- goto corrupted;; ++ goto corrupted; + } + + ubifs_end_scan(c, sleb, lnum, offs); +diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c +index 46961c0..d8f5d0f 100644 +--- a/fs/ubifs/shrinker.c ++++ b/fs/ubifs/shrinker.c +@@ -283,7 +283,11 @@ int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); + + if (nr == 0) +- return clean_zn_cnt; ++ /* ++ * Due to the way UBIFS updates the clean znode counter it may ++ * temporarily be negative. ++ */ ++ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + + if (!clean_zn_cnt) { + /* +diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c +index 91fac54..83651cd 100644 +--- a/fs/ubifs/super.c ++++ b/fs/ubifs/super.c +@@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) + if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) + return 4; + +- if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) ++ if (ui->xattr && !S_ISREG(inode->i_mode)) + return 5; + + if (!ubifs_compr_present(ui->compr_type)) { +@@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode) + ubifs_compr_name(ui->compr_type)); + } + +- err = dbg_check_dir_size(c, inode); ++ err = dbg_check_dir(c, inode); + return err; + } + +@@ -367,7 +367,7 @@ out: + ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + done: +@@ -504,9 +504,12 @@ static int init_constants_early(struct ubifs_info *c) + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; ++ c->leb_start = c->di.leb_start; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; ++ c->max_write_size = c->di.max_write_size; ++ c->max_write_shift = fls(c->max_write_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", +@@ -526,6 +529,18 @@ static int init_constants_early(struct ubifs_info *c) + } + + /* ++ * Maximum write size has to be greater or equivalent to min. I/O ++ * size, and be multiple of min. I/O size. ++ */ ++ if (c->max_write_size < c->min_io_size || ++ c->max_write_size % c->min_io_size || ++ !is_power_of_2(c->max_write_size)) { ++ ubifs_err("bad write buffer size %d for %d min. I/O unit", ++ c->max_write_size, c->min_io_size); ++ return -EINVAL; ++ } ++ ++ /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. +@@ -533,6 +548,10 @@ static int init_constants_early(struct ubifs_info *c) + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; ++ if (c->max_write_size < c->min_io_size) { ++ c->max_write_size = c->min_io_size; ++ c->max_write_shift = c->min_io_shift; ++ } + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); +@@ -667,11 +686,11 @@ static int init_constants_sb(struct ubifs_info *c) + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so +- * it is not included into 'c->inode_budget'. ++ * it is not included into 'c->bi.inode_budget'. + */ +- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; +- c->inode_budget = UBIFS_INO_NODE_SZ; +- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; ++ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; ++ c->bi.inode_budget = UBIFS_INO_NODE_SZ; ++ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes +@@ -715,7 +734,7 @@ static void init_constants_master(struct ubifs_info *c) + { + long long tmp64; + +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + /* +@@ -784,15 +803,18 @@ static int alloc_wbufs(struct ubifs_info *c) + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; ++ c->jheads[i].grouped = 1; + } + + c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; + /* + * Garbage Collector head likely contains long-term data and +- * does not need to be synchronized by timer. ++ * does not need to be synchronized by timer. Also GC head nodes are ++ * not grouped. + */ + c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; + c->jheads[GCHD].wbuf.no_timer = 1; ++ c->jheads[GCHD].grouped = 0; + + return 0; + } +@@ -884,7 +906,7 @@ static int check_volume_empty(struct ubifs_info *c) + + c->empty = 1; + for (lnum = 0; lnum < c->leb_cnt; lnum++) { +- err = ubi_is_mapped(c->ubi, lnum); ++ err = ubifs_is_mapped(c, lnum); + if (unlikely(err < 0)) + return err; + if (err == 1) { +@@ -1117,8 +1139,8 @@ static int check_free_space(struct ubifs_info *c) + { + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { +- ubifs_err("insufficient free space to mount in read/write mode"); +- dbg_dump_budg(c); ++ ubifs_err("insufficient free space to mount in R/W mode"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + return -ENOSPC; + } +@@ -1194,11 +1216,14 @@ static int mount_ubifs(struct ubifs_info *c) + if (c->bulk_read == 1) + bu_init(c); + +- /* +- * We have to check all CRCs, even for data nodes, when we mount the FS +- * (specifically, when we are replaying). +- */ +- c->always_chk_crc = 1; ++ if (!c->ro_mount) { ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, ++ GFP_KERNEL); ++ if (!c->write_reserve_buf) ++ goto out_free; ++ } ++ ++ c->mounting = 1; + + err = ubifs_read_superblock(c); + if (err) +@@ -1227,12 +1252,12 @@ static int mount_ubifs(struct ubifs_info *c) + goto out_free; + } + ++ err = alloc_wbufs(c); ++ if (err) ++ goto out_cbuf; ++ + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); + if (!c->ro_mount) { +- err = alloc_wbufs(c); +- if (err) +- goto out_cbuf; +- + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { +@@ -1254,12 +1279,25 @@ static int mount_ubifs(struct ubifs_info *c) + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; +- if (!c->ro_mount) { +- err = ubifs_recover_inl_heads(c, c->sbuf); +- if (err) +- goto out_master; +- } +- } else if (!c->ro_mount) { ++ } ++ ++ if (c->need_recovery && !c->ro_mount) { ++ err = ubifs_recover_inl_heads(c, c->sbuf); ++ if (err) ++ goto out_master; ++ } ++ ++ err = ubifs_lpt_init(c, 1, !c->ro_mount); ++ if (err) ++ goto out_master; ++ ++ if (!c->ro_mount && c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out_master; ++ } ++ ++ if (!c->ro_mount) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. +@@ -1267,14 +1305,10 @@ static int mount_ubifs(struct ubifs_info *c) + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) +- goto out_master; ++ goto out_lpt; + } + +- err = ubifs_lpt_init(c, 1, !c->ro_mount); +- if (err) +- goto out_lpt; +- +- err = dbg_check_idx_size(c, c->old_idx_sz); ++ err = dbg_check_idx_size(c, c->bi.old_idx_sz); + if (err) + goto out_lpt; + +@@ -1283,7 +1317,7 @@ static int mount_ubifs(struct ubifs_info *c) + goto out_journal; + + /* Calculate 'min_idx_lebs' after journal replay */ +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); + if (err) +@@ -1374,7 +1408,7 @@ static int mount_ubifs(struct ubifs_info *c) + if (err) + goto out_infos; + +- c->always_chk_crc = 0; ++ c->mounting = 0; + + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); +@@ -1395,6 +1429,7 @@ static int mount_ubifs(struct ubifs_info *c) + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); ++ dbg_msg("max. write size: %d bytes", c->max_write_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size >> 10); + dbg_msg("data journal heads: %d", +@@ -1411,7 +1446,8 @@ static int mount_ubifs(struct ubifs_info *c) + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("index LEBs: %d", c->lst.idx_lebs); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", +- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); ++ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, ++ c->bi.old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); +@@ -1424,9 +1460,9 @@ static int mount_ubifs(struct ubifs_info *c) + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); +- dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", +- UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, +- UBIFS_MAX_DENT_NODE_SZ); ++ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", ++ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, ++ UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); +@@ -1466,6 +1502,7 @@ out_wbufs: + out_cbuf: + kfree(c->cbuf); + out_free: ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1504,6 +1541,7 @@ static void ubifs_umount(struct ubifs_info *c) + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1535,7 +1573,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) + mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); + c->remounting_rw = 1; +- c->always_chk_crc = 1; ++ c->ro_mount = 0; + + err = check_free_space(c); + if (err) +@@ -1551,6 +1589,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); ++ kfree(sup); + if (err) + goto out; + } +@@ -1590,16 +1629,14 @@ static int ubifs_remount_rw(struct ubifs_info *c) + goto out; + } + +- err = ubifs_lpt_init(c, 0, 1); +- if (err) ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); ++ if (!c->write_reserve_buf) + goto out; + +- err = alloc_wbufs(c); ++ err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + +- ubifs_create_buds_lists(c); +- + /* Create background thread */ + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { +@@ -1634,20 +1671,37 @@ static int ubifs_remount_rw(struct ubifs_info *c) + if (err) + goto out; + ++ dbg_gen("re-mounted read-write"); ++ c->remounting_rw = 0; ++ + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); ++ } else { ++ /* ++ * Do not run the debugging space check if the were doing ++ * recovery, because when we saved the information we had the ++ * file-system in a state where the TNC and lprops has been ++ * modified in memory, but all the I/O operations (including a ++ * commit) were deferred. So the file-system was in ++ * "non-committed" state. Now the file-system is in committed ++ * state, and of course the amount of free space will change ++ * because, for example, the old index size was imprecise. ++ */ ++ err = dbg_check_space_info(c); ++ } ++ ++ if (c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out; + } + +- dbg_gen("re-mounted read-write"); +- c->ro_mount = 0; +- c->remounting_rw = 0; +- c->always_chk_crc = 0; +- err = dbg_check_space_info(c); + mutex_unlock(&c->umount_mutex); + return err; + + out: ++ c->ro_mount = 1; + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { +@@ -1655,11 +1709,12 @@ out: + c->bgt = NULL; + } + free_wbufs(c); ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; +- c->always_chk_crc = 0; + mutex_unlock(&c->umount_mutex); + return err; + } +@@ -1696,9 +1751,10 @@ static void ubifs_remount_ro(struct ubifs_info *c) + if (err) + ubifs_ro_mode(c, err); + +- free_wbufs(c); + vfree(c->orph_buf); + c->orph_buf = NULL; ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); +@@ -1722,10 +1778,11 @@ static void ubifs_put_super(struct super_block *sb) + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ +- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); +- ubifs_assert(c->budg_idx_growth == 0); +- ubifs_assert(c->budg_dd_growth == 0); +- ubifs_assert(c->budg_data_growth == 0); ++ if (!c->ro_error) { ++ ubifs_assert(c->bi.idx_growth == 0); ++ ubifs_assert(c->bi.dd_growth == 0); ++ ubifs_assert(c->bi.data_growth == 0); ++ } + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker +@@ -1929,6 +1986,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); ++ mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; +@@ -1946,6 +2004,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); ++ c->no_chk_data_crc = 1; + + c->vfs_sb = sb; + c->highest_inum = UBIFS_FIRST_INO; +diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c +index ad9cf01..16ad84d 100644 +--- a/fs/ubifs/tnc.c ++++ b/fs/ubifs/tnc.c +@@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c, + __set_bit(DIRTY_ZNODE, &zn->flags); + __clear_bit(COW_ZNODE, &zn->flags); + +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(znode)); + __set_bit(OBSOLETE_ZNODE, &znode->flags); + + if (znode->level != 0) { +@@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, + struct ubifs_znode *zn; + int err; + +- if (!test_bit(COW_ZNODE, &znode->flags)) { ++ if (!ubifs_zn_cow(znode)) { + /* znode is not being committed */ + if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { + atomic_long_inc(&c->dirty_zn_cnt); +@@ -344,12 +344,11 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, + return err; + } + +- lnc_node = kmalloc(zbr->len, GFP_NOFS); ++ lnc_node = kmemdup(node, zbr->len, GFP_NOFS); + if (!lnc_node) + /* We don't have to have the cache, so no error */ + return 0; + +- memcpy(lnc_node, node, zbr->len); + zbr->leaf = lnc_node; + return 0; + } +@@ -447,8 +446,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr, + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if +- * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always +- * checked. ++ * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to ++ * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is ++ * because during mounting or re-mounting from R/O mode to R/W mode we may read ++ * journal nodes (when replying the journal or doing the recovery) and the ++ * journal nodes may potentially be corrupted, so checking is required. + */ + static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +@@ -459,7 +461,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, + + dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + +- err = ubi_read(c->ubi, lnum, buf, offs, len); ++ err = ubifs_leb_read(c, lnum, buf, offs, len, 1); + if (err) { + ubifs_err("cannot read node type %d from LEB %d:%d, error %d", + type, lnum, offs, err); +@@ -476,7 +478,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, + if (node_len != len) + return 0; + +- if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) ++ if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && ++ !c->remounting_rw) + return 1; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -502,7 +505,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + { + int ret; + +- dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key)); ++ dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs); + + ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum, + zbr->offs); +@@ -516,8 +519,8 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, + ret = 0; + } + if (ret == 0 && c->replaying) +- dbg_mnt("dangling branch LEB %d:%d len %d, key %s", +- zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); ++ dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ", ++ zbr->lnum, zbr->offs, zbr->len); + return ret; + } + +@@ -992,9 +995,9 @@ static int fallible_resolve_collision(struct ubifs_info *c, + if (adding || !o_znode) + return 0; + +- dbg_mnt("dangling match LEB %d:%d len %d %s", ++ dbg_mntk(key, "dangling match LEB %d:%d len %d key ", + o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs, +- o_znode->zbranch[o_n].len, DBGKEY(key)); ++ o_znode->zbranch[o_n].len); + *zn = o_znode; + *n = o_n; + return 1; +@@ -1176,7 +1179,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + +- dbg_tnc("search key %s", DBGKEY(key)); ++ dbg_tnck(key, "search key "); + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); + + znode = c->zroot.znode; +@@ -1312,7 +1315,7 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + unsigned long time = get_seconds(); + +- dbg_tnc("search and dirty key %s", DBGKEY(key)); ++ dbg_tnck(key, "search and dirty key "); + + znode = c->zroot.znode; + if (unlikely(!znode)) { +@@ -1662,7 +1665,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, + if (!overlap) { + /* We may safely unlock the write-buffer and read the data */ + spin_unlock(&wbuf->lock); +- return ubi_read(c->ubi, lnum, buf, offs, len); ++ return ubifs_leb_read(c, lnum, buf, offs, len, 0); + } + + /* Don't read under wbuf */ +@@ -1676,7 +1679,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, + + if (rlen > 0) + /* Read everything that goes before write-buffer */ +- return ubi_read(c->ubi, lnum, buf, offs, rlen); ++ return ubifs_leb_read(c, lnum, buf, offs, rlen, 0); + + return 0; + } +@@ -1719,8 +1722,8 @@ static int validate_data_node(struct ubifs_info *c, void *buf, + if (!keys_eq(c, &zbr->key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); +- dbg_tnc("looked for key %s found node's key %s", +- DBGKEY(&zbr->key), DBGKEY1(&key1)); ++ dbg_tnck(&zbr->key, "looked for key "); ++ dbg_tnck(&key1, "found node's key "); + goto out_err; + } + +@@ -1763,7 +1766,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) + if (wbuf) + err = read_wbuf(wbuf, bu->buf, len, lnum, offs); + else +- err = ubi_read(c->ubi, lnum, bu->buf, offs, len); ++ err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0); + + /* Check for a race with GC */ + if (maybe_leb_gced(c, lnum, bu->gc_seq)) +@@ -1773,7 +1776,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) + ubifs_err("failed to read from LEB %d:%d, error %d", + lnum, offs, err); + dbg_dump_stack(); +- dbg_tnc("key %s", DBGKEY(&bu->key)); ++ dbg_tnck(&bu->key, "key "); + return err; + } + +@@ -1808,7 +1811,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, + int found, n, err; + struct ubifs_znode *znode; + +- dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); ++ dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name); + mutex_lock(&c->tnc_mutex); + found = ubifs_lookup_level0(c, key, &znode, &n); + if (!found) { +@@ -1982,8 +1985,7 @@ again: + zp = znode->parent; + if (znode->child_cnt < c->fanout) { + ubifs_assert(n != c->fanout); +- dbg_tnc("inserted at %d level %d, key %s", n, znode->level, +- DBGKEY(key)); ++ dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level); + + insert_zbranch(znode, zbr, n); + +@@ -1998,7 +2000,7 @@ again: + * Unfortunately, @znode does not have more empty slots and we have to + * split it. + */ +- dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key)); ++ dbg_tnck(key, "splitting level %d, key ", znode->level); + + if (znode->alt) + /* +@@ -2092,7 +2094,7 @@ do_split: + } + + /* Insert new key and branch */ +- dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key)); ++ dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level); + + insert_zbranch(zi, zbr, n); + +@@ -2168,7 +2170,7 @@ int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key)); ++ dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len); + found = lookup_level0_dirty(c, key, &znode, &n); + if (!found) { + struct ubifs_zbranch zbr; +@@ -2217,8 +2219,8 @@ int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum, +- old_offs, lnum, offs, len, DBGKEY(key)); ++ dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum, ++ old_offs, lnum, offs, len); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2300,8 +2302,8 @@ int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name, +- DBGKEY(key)); ++ dbg_tnck(key, "LEB %d:%d, name '%.*s', key ", ++ lnum, offs, nm->len, nm->name); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2394,7 +2396,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + /* Delete without merge for now */ + ubifs_assert(znode->level == 0); + ubifs_assert(n >= 0 && n < c->fanout); +- dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key)); ++ dbg_tnck(&znode->zbranch[n].key, "deleting key "); + + zbr = &znode->zbranch[n]; + lnc_free(zbr); +@@ -2419,7 +2421,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + */ + + do { +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(znode)); + ubifs_assert(ubifs_zn_dirty(znode)); + + zp = znode->parent; +@@ -2475,9 +2477,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n) + c->zroot.offs = zbr->offs; + c->zroot.len = zbr->len; + c->zroot.znode = znode; +- ubifs_assert(!test_bit(OBSOLETE_ZNODE, +- &zp->flags)); +- ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags)); ++ ubifs_assert(!ubifs_zn_obsolete(zp)); ++ ubifs_assert(ubifs_zn_dirty(zp)); + atomic_long_dec(&c->dirty_zn_cnt); + + if (zp->cnext) { +@@ -2505,7 +2506,7 @@ int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key) + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("key %s", DBGKEY(key)); ++ dbg_tnck(key, "key "); + found = lookup_level0_dirty(c, key, &znode, &n); + if (found < 0) { + err = found; +@@ -2536,7 +2537,7 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + struct ubifs_znode *znode; + + mutex_lock(&c->tnc_mutex); +- dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key)); ++ dbg_tnck(key, "%.*s, key ", nm->len, nm->name); + err = lookup_level0_dirty(c, key, &znode, &n); + if (err < 0) + goto out_unlock; +@@ -2553,11 +2554,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { +- znode = dirty_cow_bottom_up(c, znode); +- if (IS_ERR(znode)) { +- err = PTR_ERR(znode); +- goto out_unlock; +- } ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } + } + err = tnc_delete(c, znode, n); + } +@@ -2651,7 +2652,7 @@ int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key, + dbg_dump_znode(c, znode); + goto out_unlock; + } +- dbg_tnc("removing %s", DBGKEY(key)); ++ dbg_tnck(key, "removing key "); + } + if (k) { + for (i = n + 1 + k; i < znode->child_cnt; i++) +@@ -2771,7 +2772,7 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c, + struct ubifs_zbranch *zbr; + union ubifs_key *dkey; + +- dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key)); ++ dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)"); + ubifs_assert(is_hash_key(c, key)); + + mutex_lock(&c->tnc_mutex); +@@ -2861,7 +2862,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c) + struct ubifs_znode *znode = cnext; + + cnext = cnext->cnext; +- if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ if (ubifs_zn_obsolete(znode)) + kfree(znode); + } while (cnext && cnext != c->cnext); + } +@@ -2872,12 +2873,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) + */ + void ubifs_tnc_close(struct ubifs_info *c) + { +- long clean_freed; +- + tnc_destroy_cnext(c); + if (c->zroot.znode) { +- clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); +- atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); ++ long n; ++ ++ ubifs_destroy_tnc_subtree(c->zroot.znode); ++ n = atomic_long_read(&c->clean_zn_cnt); ++ atomic_long_sub(n, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); +@@ -3296,7 +3298,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + + if (!S_ISREG(inode->i_mode)) + return 0; +- if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ if (!dbg_is_chk_gen(c)) + return 0; + + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; +@@ -3329,12 +3331,13 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + + out_dump: + block = key_block(c, key); +- ubifs_err("inode %lu has size %lld, but there are data at offset %lld " +- "(data key %s)", (unsigned long)inode->i_ino, size, +- ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); ++ ubifs_err("inode %lu has size %lld, but there are data at offset %lld", ++ (unsigned long)inode->i_ino, size, ++ ((loff_t)block) << UBIFS_BLOCK_SHIFT); ++ mutex_unlock(&c->tnc_mutex); + dbg_dump_inode(c, inode); + dbg_dump_stack(); +- err = -EINVAL; ++ return -EINVAL; + + out_unlock: + mutex_unlock(&c->tnc_mutex); +diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c +index 53288e5..4c15f07 100644 +--- a/fs/ubifs/tnc_commit.c ++++ b/fs/ubifs/tnc_commit.c +@@ -22,6 +22,7 @@ + + /* This file implements TNC functions for committing */ + ++#include <linux/random.h> + #include "ubifs.h" + + /** +@@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx, + atomic_long_dec(&c->dirty_zn_cnt); + + ubifs_assert(ubifs_zn_dirty(znode)); +- ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(ubifs_zn_cow(znode)); + ++ /* ++ * Note, unlike 'write_index()' we do not add memory barriers here ++ * because this function is called with @c->tnc_mutex locked. ++ */ + __clear_bit(DIRTY_ZNODE, &znode->flags); + __clear_bit(COW_ZNODE, &znode->flags); + +@@ -377,15 +382,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) + c->gap_lebs = NULL; + return err; + } +- if (!dbg_force_in_the_gaps_enabled) { ++ if (!dbg_is_chk_index(c)) { + /* + * Do not print scary warnings if the debugging + * option which forces in-the-gaps is enabled. + */ +- ubifs_err("out of space"); +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_warn("out of space"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ +@@ -493,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c) + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +- if (c->min_io_size == 1) { +- buf_offs += ALIGN(len, 8); +- if (next_len) { +- if (buf_offs + next_len <= c->leb_size) +- continue; +- err = ubifs_update_one_lp(c, lnum, 0, +- c->leb_size - buf_offs, 0, 0); +- if (err) +- return err; +- lnum = -1; +- continue; +- } +- err = ubifs_update_one_lp(c, lnum, +- c->leb_size - buf_offs, 0, 0, 0); +- if (err) +- return err; +- break; +- } +- + /* Update buffer positions */ + wlen = used + len; + used += ALIGN(len, 8); +@@ -660,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c) + } + cnt += 1; + while (1) { +- ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(!ubifs_zn_cow(znode)); + __set_bit(COW_ZNODE, &znode->flags); + znode->alt = 0; + cnext = find_next_dirty(znode); +@@ -706,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt) + c->ilebs[c->ileb_cnt++] = lnum; + dbg_cmt("LEB %d", lnum); + } +- if (dbg_force_in_the_gaps()) ++ if (dbg_is_chk_index(c) && !(random32() & 7)) + return -ENOSPC; + return 0; + } +@@ -796,16 +780,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) + spin_lock(&c->space_lock); + /* + * Although we have not finished committing yet, update size of the +- * committed index ('c->old_idx_sz') and zero out the index growth ++ * committed index ('c->bi.old_idx_sz') and zero out the index growth + * budget. It is OK to do this now, because we've reserved all the + * space which is needed to commit the index, and it is save for the + * budgeting subsystem to assume the index is already committed, + * even though it is not. + */ +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- c->old_idx_sz = c->calc_idx_sz; +- c->budg_uncommitted_idx = 0; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ c->bi.old_idx_sz = c->calc_idx_sz; ++ c->bi.uncommitted_idx = 0; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + mutex_unlock(&c->tnc_mutex); + +@@ -832,7 +816,7 @@ static int write_index(struct ubifs_info *c) + struct ubifs_idx_node *idx; + struct ubifs_znode *znode, *cnext; + int i, lnum, offs, len, next_len, buf_len, buf_offs, used; +- int avail, wlen, err, lnum_pos = 0; ++ int avail, wlen, err, lnum_pos = 0, blen, nxt_offs; + + cnext = c->enext; + if (!cnext) +@@ -909,7 +893,7 @@ static int write_index(struct ubifs_info *c) + cnext = znode->cnext; + + ubifs_assert(ubifs_zn_dirty(znode)); +- ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); ++ ubifs_assert(ubifs_zn_cow(znode)); + + /* + * It is important that other threads should see %DIRTY_ZNODE +@@ -924,6 +908,28 @@ static int write_index(struct ubifs_info *c) + clear_bit(COW_ZNODE, &znode->flags); + smp_mb__after_clear_bit(); + ++ /* ++ * We have marked the znode as clean but have not updated the ++ * @c->clean_zn_cnt counter. If this znode becomes dirty again ++ * before 'free_obsolete_znodes()' is called, then ++ * @c->clean_zn_cnt will be decremented before it gets ++ * incremented (resulting in 2 decrements for the same znode). ++ * This means that @c->clean_zn_cnt may become negative for a ++ * while. ++ * ++ * Q: why we cannot increment @c->clean_zn_cnt? ++ * A: because we do not have the @c->tnc_mutex locked, and the ++ * following code would be racy and buggy: ++ * ++ * if (!ubifs_zn_obsolete(znode)) { ++ * atomic_long_inc(&c->clean_zn_cnt); ++ * atomic_long_inc(&ubifs_clean_zn_cnt); ++ * } ++ * ++ * Thus, we just delay the @c->clean_zn_cnt update until we ++ * have the mutex locked. ++ */ ++ + /* Do not access znode from this point on */ + + /* Update buffer positions */ +@@ -940,65 +946,38 @@ static int write_index(struct ubifs_info *c) + else + next_len = ubifs_idx_node_sz(c, cnext->child_cnt); + +- if (c->min_io_size == 1) { +- /* +- * Write the prepared index node immediately if there is +- * no minimum IO size +- */ +- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +- wlen, UBI_SHORTTERM); +- if (err) +- return err; +- buf_offs += ALIGN(wlen, 8); +- if (next_len) { +- used = 0; +- avail = buf_len; +- if (buf_offs + next_len > c->leb_size) { +- err = ubifs_update_one_lp(c, lnum, +- LPROPS_NC, 0, 0, LPROPS_TAKEN); +- if (err) +- return err; +- lnum = -1; +- } ++ nxt_offs = buf_offs + used + next_len; ++ if (next_len && nxt_offs <= c->leb_size) { ++ if (avail > 0) + continue; +- } ++ else ++ blen = buf_len; + } else { +- int blen, nxt_offs = buf_offs + used + next_len; +- +- if (next_len && nxt_offs <= c->leb_size) { +- if (avail > 0) +- continue; +- else +- blen = buf_len; +- } else { +- wlen = ALIGN(wlen, 8); +- blen = ALIGN(wlen, c->min_io_size); +- ubifs_pad(c, c->cbuf + wlen, blen - wlen); +- } +- /* +- * The buffer is full or there are no more znodes +- * to do +- */ +- err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, +- blen, UBI_SHORTTERM); +- if (err) +- return err; +- buf_offs += blen; +- if (next_len) { +- if (nxt_offs > c->leb_size) { +- err = ubifs_update_one_lp(c, lnum, +- LPROPS_NC, 0, 0, LPROPS_TAKEN); +- if (err) +- return err; +- lnum = -1; +- } +- used -= blen; +- if (used < 0) +- used = 0; +- avail = buf_len - used; +- memmove(c->cbuf, c->cbuf + blen, used); +- continue; ++ wlen = ALIGN(wlen, 8); ++ blen = ALIGN(wlen, c->min_io_size); ++ ubifs_pad(c, c->cbuf + wlen, blen - wlen); ++ } ++ ++ /* The buffer is full or there are no more znodes to do */ ++ err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen, ++ UBI_SHORTTERM); ++ if (err) ++ return err; ++ buf_offs += blen; ++ if (next_len) { ++ if (nxt_offs > c->leb_size) { ++ err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, ++ 0, LPROPS_TAKEN); ++ if (err) ++ return err; ++ lnum = -1; + } ++ used -= blen; ++ if (used < 0) ++ used = 0; ++ avail = buf_len - used; ++ memmove(c->cbuf, c->cbuf + blen, used); ++ continue; + } + break; + } +@@ -1031,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c) + do { + znode = cnext; + cnext = znode->cnext; +- if (test_bit(OBSOLETE_ZNODE, &znode->flags)) ++ if (ubifs_zn_obsolete(znode)) + kfree(znode); + else { + znode->cnext = NULL; +diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c +index b48db99..dc28fe6 100644 +--- a/fs/ubifs/tnc_misc.c ++++ b/fs/ubifs/tnc_misc.c +@@ -328,8 +328,8 @@ static int read_znode(struct ubifs_info *c, int lnum, int offs, int len, + case UBIFS_XENT_KEY: + break; + default: +- dbg_msg("bad key type at slot %d: %s", i, +- DBGKEY(&zbr->key)); ++ dbg_msg("bad key type at slot %d: %d", ++ i, key_type(c, &zbr->key)); + err = 3; + goto out_dump; + } +@@ -475,7 +475,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + zbr->offs); + + if (err) { +- dbg_tnc("key %s", DBGKEY(key)); ++ dbg_tnck(key, "key "); + return err; + } + +@@ -484,8 +484,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, + if (!keys_eq(c, key, &key1)) { + ubifs_err("bad key in node at LEB %d:%d", + zbr->lnum, zbr->offs); +- dbg_tnc("looked for key %s found node's key %s", +- DBGKEY(key), DBGKEY1(&key1)); ++ dbg_tnck(key, "looked for key "); ++ dbg_tnck(&key1, "but found node's key "); + dbg_dump_node(c, node); + return -EINVAL; + } +diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h +index 191ca78..e24380c 100644 +--- a/fs/ubifs/ubifs-media.h ++++ b/fs/ubifs/ubifs-media.h +@@ -408,9 +408,11 @@ enum { + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set ++ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed + */ + enum { + UBIFS_FLG_BIGLPT = 0x02, ++ UBIFS_FLG_SPACE_FIXUP = 0x04, + }; + + /** +@@ -434,7 +436,7 @@ struct ubifs_ch { + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * union ubifs_dev_desc - device node descriptor. +@@ -448,7 +450,7 @@ struct ubifs_ch { + union ubifs_dev_desc { + __le32 new; + __le64 huge; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ino_node - inode node. +@@ -509,7 +511,7 @@ struct ubifs_ino_node { + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_dent_node - directory entry node. +@@ -534,7 +536,7 @@ struct ubifs_dent_node { + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_data_node - data node. +@@ -555,7 +557,7 @@ struct ubifs_data_node { + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_trun_node - truncation node. +@@ -575,7 +577,7 @@ struct ubifs_trun_node { + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_pad_node - padding node. +@@ -586,7 +588,7 @@ struct ubifs_trun_node { + struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_sb_node - superblock node. +@@ -644,7 +646,7 @@ struct ubifs_sb_node { + __u8 uuid[16]; + __le32 ro_compat_version; + __u8 padding2[3968]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_mst_node - master node. +@@ -711,7 +713,7 @@ struct ubifs_mst_node { + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ref_node - logical eraseblock reference node. +@@ -727,7 +729,7 @@ struct ubifs_ref_node { + __le32 offs; + __le32 jhead; + __u8 padding[28]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_branch - key/reference/length branch +@@ -741,7 +743,7 @@ struct ubifs_branch { + __le32 offs; + __le32 len; + __u8 key[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_idx_node - indexing node. +@@ -755,7 +757,7 @@ struct ubifs_idx_node { + __le16 child_cnt; + __le16 level; + __u8 branches[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_cs_node - commit start node. +@@ -765,7 +767,7 @@ struct ubifs_idx_node { + struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_orph_node - orphan node. +@@ -777,6 +779,6 @@ struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +-} __attribute__ ((packed)); ++} __packed; + + #endif /* __UBIFS_MEDIA_H__ */ +diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h +index 381d6b2..caf9e4b 100644 +--- a/fs/ubifs/ubifs.h ++++ b/fs/ubifs/ubifs.h +@@ -84,9 +84,6 @@ + #define INUM_WARN_WATERMARK 0xFFF00000 + #define INUM_WATERMARK 0xFFFFFF00 + +-/* Largest key size supported in this implementation */ +-#define CUR_MAX_KEY_LEN UBIFS_SK_LEN +- + /* Maximum number of entries in each LPT (LEB category) heap */ + #define LPT_HEAP_SZ 256 + +@@ -151,6 +148,12 @@ + */ + #define WORST_COMPR_FACTOR 2 + ++/* ++ * How much memory is needed for a buffer where we comress a data node. ++ */ ++#define COMPRESSED_DATA_NODE_BUF_SZ \ ++ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) ++ + /* Maximum expected tree height for use by bottom_up_buf */ + #define BOTTOM_UP_HEIGHT 64 + +@@ -224,14 +227,14 @@ enum { + * LPT cnode flag bits. + * + * DIRTY_CNODE: cnode is dirty +- * COW_CNODE: cnode is being committed and must be copied before writing + * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), +- * so it can (and must) be freed when the commit is finished ++ * so it can (and must) be freed when the commit is finished ++ * COW_CNODE: cnode is being committed and must be copied before writing + */ + enum { + DIRTY_CNODE = 0, +- COW_CNODE = 1, +- OBSOLETE_CNODE = 2, ++ OBSOLETE_CNODE = 1, ++ COW_CNODE = 2, + }; + + /* +@@ -271,10 +274,10 @@ struct ubifs_old_idx { + + /* The below union makes it easier to deal with keys */ + union ubifs_key { +- uint8_t u8[CUR_MAX_KEY_LEN]; +- uint32_t u32[CUR_MAX_KEY_LEN/4]; +- uint64_t u64[CUR_MAX_KEY_LEN/8]; +- __le32 j32[CUR_MAX_KEY_LEN/4]; ++ uint8_t u8[UBIFS_SK_LEN]; ++ uint32_t u32[UBIFS_SK_LEN/4]; ++ uint64_t u64[UBIFS_SK_LEN/8]; ++ __le32 j32[UBIFS_SK_LEN/4]; + }; + + /** +@@ -383,9 +386,9 @@ struct ubifs_gced_idx_leb { + * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses + * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot + * make sure @inode->i_size is always changed under @ui_mutex, because it +- * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock +- * with 'ubifs_writepage()' (see file.c). All the other inode fields are +- * changed under @ui_mutex, so they do not need "shadow" fields. Note, one ++ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would ++ * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields ++ * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * could consider to rework locking and base it on "shadow" fields. + */ + struct ubifs_inode { +@@ -646,6 +649,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used: number of used bytes in the write-buffer ++ * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep +@@ -680,6 +684,7 @@ struct ubifs_wbuf { + int offs; + int avail; + int used; ++ int size; + int dtype; + int jhead; + int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); +@@ -714,12 +719,14 @@ struct ubifs_bud { + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head ++ * @grouped: non-zero if UBIFS groups nodes when writing to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ + struct ubifs_jhead { + struct ubifs_wbuf wbuf; + struct list_head buds_list; ++ unsigned int grouped:1; + }; + + /** +@@ -929,6 +936,40 @@ struct ubifs_mount_opts { + unsigned int compr_type:2; + }; + ++/** ++ * struct ubifs_budg_info - UBIFS budgeting information. ++ * @idx_growth: amount of bytes budgeted for index growth ++ * @data_growth: amount of bytes budgeted for cached data ++ * @dd_growth: amount of bytes budgeted for cached data that will make ++ * other data dirty ++ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but ++ * which still have to be taken into account because the index ++ * has not been committed so far ++ * @old_idx_sz: size of index on flash ++ * @min_idx_lebs: minimum number of LEBs required for the index ++ * @nospace: non-zero if the file-system does not have flash space (used as ++ * optimization) ++ * @nospace_rp: the same as @nospace, but additionally means that even reserved ++ * pool is full ++ * @page_budget: budget for a page (constant, nenver changed after mount) ++ * @inode_budget: budget for an inode (constant, nenver changed after mount) ++ * @dent_budget: budget for a directory entry (constant, nenver changed after ++ * mount) ++ */ ++struct ubifs_budg_info { ++ long long idx_growth; ++ long long data_growth; ++ long long dd_growth; ++ long long uncommitted_idx; ++ unsigned long long old_idx_sz; ++ int min_idx_lebs; ++ unsigned int nospace:1; ++ unsigned int nospace_rp:1; ++ int page_budget; ++ int inode_budget; ++ int dent_budget; ++}; ++ + struct ubifs_debug_info; + + /** +@@ -972,6 +1013,7 @@ struct ubifs_debug_info; + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * + * @big_lpt: flag that LPT is too big to write whole during commit ++ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + * recovery) + * @bulk_read: enable bulk-reads +@@ -1003,6 +1045,11 @@ struct ubifs_debug_info; + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * ++ * @write_reserve_mutex: protects @write_reserve_buf ++ * @write_reserve_buf: on the write path we allocate memory, which might ++ * sometimes be unavailable, in which case we use this ++ * write reserve buffer ++ * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log +@@ -1024,7 +1071,12 @@ struct ubifs_debug_info; + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) ++ * @max_write_shift: number of bits in @max_write_size minus one + * @leb_size: logical eraseblock size in bytes ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @half_leb_size: half LEB size + * @idx_leb_size: how many bytes of an LEB are effectively available when it is + * used to store indexing nodes (@leb_size - @max_idx_node_sz) +@@ -1039,32 +1091,14 @@ struct ubifs_debug_info; + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * +- * @budg_idx_growth: amount of bytes budgeted for index growth +- * @budg_data_growth: amount of bytes budgeted for cached data +- * @budg_dd_growth: amount of bytes budgeted for cached data that will make +- * other data dirty +- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, +- * but which still have to be taken into account because +- * the index has not been committed so far +- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, +- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, +- * @nospace, and @nospace_rp; +- * @min_idx_lebs: minimum number of LEBs required for the index +- * @old_idx_sz: size of index on flash ++ * @space_lock: protects @bi and @lst ++ * @lst: lprops statistics ++ * @bi: budgeting information + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) +- * @lst: lprops statistics +- * @nospace: non-zero if the file-system does not have flash space (used as +- * optimization) +- * @nospace_rp: the same as @nospace, but additionally means that even reserved +- * pool is full +- * +- * @page_budget: budget for a page +- * @inode_budget: budget for an inode +- * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash +- * I/O unit ++ * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary +@@ -1166,22 +1200,20 @@ struct ubifs_debug_info; + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * +- * @empty: if the UBI device is empty +- * @replay_tree: temporary tree used during journal replay ++ * @empty: %1 if the UBI device is empty ++ * @need_recovery: %1 if the file-system needs recovery ++ * @replaying: %1 during journal replay ++ * @mounting: %1 while mounting ++ * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed +- * @need_recovery: file-system needs recovery +- * @replaying: set to %1 during journal replay + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W + * mode + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted + * FS to R/W mode + * @size_tree: inode size information for recovery +- * @remounting_rw: set while re-mounting from R/O mode to R/W mode +- * @always_chk_crc: always check CRCs (while mounting and remounting to R/W +- * mode) + * @mount_opts: UBIFS-specific mount options + * + * @dbg: debugging-related information +@@ -1221,6 +1253,7 @@ struct ubifs_info { + wait_queue_head_t cmt_wq; + + unsigned int big_lpt:1; ++ unsigned int space_fixup:1; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; +@@ -1250,6 +1283,9 @@ struct ubifs_info { + struct mutex bu_mutex; + struct bu_info bu; + ++ struct mutex write_reserve_mutex; ++ void *write_reserve_buf; ++ + int log_lebs; + long long log_bytes; + int log_last; +@@ -1271,7 +1307,10 @@ struct ubifs_info { + + int min_io_size; + int min_io_shift; ++ int max_write_size; ++ int max_write_shift; + int leb_size; ++ int leb_start; + int half_leb_size; + int idx_leb_size; + int leb_cnt; +@@ -1285,21 +1324,10 @@ struct ubifs_info { + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; + +- long long budg_idx_growth; +- long long budg_data_growth; +- long long budg_dd_growth; +- long long budg_uncommitted_idx; + spinlock_t space_lock; +- int min_idx_lebs; +- unsigned long long old_idx_sz; +- unsigned long long calc_idx_sz; + struct ubifs_lp_stats lst; +- unsigned int nospace:1; +- unsigned int nospace_rp:1; +- +- int page_budget; +- int inode_budget; +- int dent_budget; ++ struct ubifs_budg_info bi; ++ unsigned long long calc_idx_sz; + + int ref_node_alsz; + int mst_node_alsz; +@@ -1402,19 +1430,18 @@ struct ubifs_info { + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ +- int empty; +- struct rb_root replay_tree; ++ unsigned int empty:1; ++ unsigned int need_recovery:1; ++ unsigned int replaying:1; ++ unsigned int mounting:1; ++ unsigned int remounting_rw:1; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; +- int need_recovery; +- int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; +- int remounting_rw; +- int always_chk_crc; + struct ubifs_mount_opts mount_opts; + + #ifdef CONFIG_UBIFS_FS_DEBUG +@@ -1438,6 +1465,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; + + /* io.c */ + void ubifs_ro_mode(struct ubifs_info *c, int err); ++int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs, ++ int len, int even_ebadmsg); ++int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs, ++ int len, int dtype); ++int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len, ++ int dtype); ++int ubifs_leb_unmap(struct ubifs_info *c, int lnum); ++int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype); ++int ubifs_is_mapped(const struct ubifs_info *c, int lnum); + int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); + int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype); +@@ -1605,6 +1641,7 @@ int ubifs_write_master(struct ubifs_info *c); + int ubifs_read_superblock(struct ubifs_info *c); + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); + int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); ++int ubifs_fixup_free_space(struct ubifs_info *c); + + /* replay.c */ + int ubifs_validate_entry(struct ubifs_info *c, +@@ -1713,11 +1750,11 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); + int ubifs_recover_master_node(struct ubifs_info *c); + int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped); ++ int offs, void *sbuf, int jhead); + struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); +-int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +-int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); ++int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf); ++int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf); + int ubifs_rcvry_gc_commit(struct ubifs_info *c); + int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, + int deletion, loff_t new_size); +diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c +index c74400f..2fdc4fa 100644 +--- a/fs/ubifs/xattr.c ++++ b/fs/ubifs/xattr.c +@@ -79,9 +79,9 @@ enum { + SECURITY_XATTR, + }; + +-static const struct inode_operations none_inode_operations; +-static const struct address_space_operations none_address_operations; +-static const struct file_operations none_file_operations; ++static const struct inode_operations empty_iops; ++static const struct file_operations empty_fops; ++static struct address_space_operations empty_aops; + + /** + * create_xattr - create an extended attribute. +@@ -130,20 +130,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, + } + + /* Re-define all operations to be "nothing" */ +- inode->i_mapping->a_ops = &none_address_operations; +- inode->i_op = &none_inode_operations; +- inode->i_fop = &none_file_operations; ++ inode->i_mapping->a_ops = &empty_aops; ++ inode->i_op = &empty_iops; ++ inode->i_fop = &empty_fops; + + inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; + ui = ubifs_inode(inode); + ui->xattr = 1; + ui->flags |= UBIFS_XATTR_FL; +- ui->data = kmalloc(size, GFP_NOFS); ++ ui->data = kmemdup(value, size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_free; + } +- memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + +@@ -204,12 +203,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, + return err; + + kfree(ui->data); +- ui->data = kmalloc(size, GFP_NOFS); ++ ui->data = kmemdup(value, size, GFP_NOFS); + if (!ui->data) { + err = -ENOMEM; + goto out_free; + } +- memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + +diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h +index b31bd9e..db4836b 100644 +--- a/include/linux/mtd/ubi.h ++++ b/include/linux/mtd/ubi.h +@@ -21,7 +21,7 @@ + #ifndef __LINUX_UBI_H__ + #define __LINUX_UBI_H__ + +-#include <asm/ioctl.h> ++#include <linux/ioctl.h> + #include <linux/types.h> + #include <mtd/ubi-user.h> + +@@ -87,7 +87,7 @@ enum { + * physical eraseblock size and on how much bytes UBI headers consume. But + * because of the volume alignment (@alignment), the usable size of logical + * eraseblocks if a volume may be less. The following equation is true: +- * @usable_leb_size = LEB size - (LEB size mod @alignment), ++ * @usable_leb_size = LEB size - (LEB size mod @alignment), + * where LEB size is the logical eraseblock size defined by the UBI device. + * + * The alignment is multiple to the minimal flash input/output unit size or %1 +@@ -116,29 +116,53 @@ struct ubi_volume_info { + * struct ubi_device_info - UBI device description data structure. + * @ubi_num: ubi device number + * @leb_size: logical eraseblock size on this UBI device ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @min_io_size: minimal I/O unit size ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) + * @ro_mode: if this device is in read-only mode + * @cdev: UBI character device major and minor numbers + * + * Note, @leb_size is the logical eraseblock size offered by the UBI device. + * Volumes of this UBI device may have smaller logical eraseblock size if their + * alignment is not equivalent to %1. ++ * ++ * The @max_write_size field describes flash write maximum write unit. For ++ * example, NOR flash allows for changing individual bytes, so @min_io_size is ++ * %1. However, it does not mean than NOR flash has to write data byte-by-byte. ++ * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when ++ * writing large chunks of data, they write 64-bytes at a time. Obviously, this ++ * improves write throughput. ++ * ++ * Also, the MTD device may have N interleaved (striped) flash chips ++ * underneath, in which case @min_io_size can be physical min. I/O size of ++ * single flash chip, while @max_write_size can be N * @min_io_size. ++ * ++ * The @max_write_size field is always greater or equivalent to @min_io_size. ++ * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In ++ * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND ++ * page size. + */ + struct ubi_device_info { + int ubi_num; + int leb_size; ++ int leb_start; + int min_io_size; ++ int max_write_size; + int ro_mode; + dev_t cdev; + }; + + /* +- * enum - volume notification types. +- * @UBI_VOLUME_ADDED: volume has been added +- * @UBI_VOLUME_REMOVED: start volume volume +- * @UBI_VOLUME_RESIZED: volume size has been re-sized +- * @UBI_VOLUME_RENAMED: volume name has been re-named +- * @UBI_VOLUME_UPDATED: volume name has been updated ++ * Volume notification types. ++ * @UBI_VOLUME_ADDED: a volume has been added (an UBI device was attached or a ++ * volume was created) ++ * @UBI_VOLUME_REMOVED: a volume has been removed (an UBI device was detached ++ * or a volume was removed) ++ * @UBI_VOLUME_RESIZED: a volume has been re-sized ++ * @UBI_VOLUME_RENAMED: a volume has been re-named ++ * @UBI_VOLUME_UPDATED: data has been written to a volume + * + * These constants define which type of event has happened when a volume + * notification function is invoked. |