ntfs_attr.c   [plain text]


/*
 * ntfs_attr.c - NTFS kernel attribute operations.
 *
 * Copyright (c) 2006-2011 Anton Altaparmakov.  All Rights Reserved.
 * Portions Copyright (c) 2006-2011 Apple Inc.  All Rights Reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer. 
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution. 
 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
 *    contributors may be used to endorse or promote products derived from this
 *    software without specific prior written permission. 
 *
 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * ALTERNATIVELY, provided that this notice and licensing terms are retained in
 * full, this file may be redistributed and/or modified under the terms of the
 * GNU General Public License (GPL) Version 2, in which case the provisions of
 * that version of the GPL will apply to you instead of the license terms
 * above.  You can obtain a copy of the GPL Version 2 at
 * http://developer.apple.com/opensource/licenses/gpl-2.txt.
 */

#include <sys/errno.h>
#include <sys/stat.h>
#include <sys/ucred.h>
#include <sys/ubc.h>

#include <string.h>

#include <libkern/libkern.h>
#include <libkern/OSMalloc.h>

#include <kern/debug.h>
#include <kern/sched_prim.h>

#include "ntfs.h"
#include "ntfs_attr.h"
#include "ntfs_attr_list.h"
#include "ntfs_debug.h"
#include "ntfs_dir.h"
#include "ntfs_endian.h"
#include "ntfs_index.h"
#include "ntfs_inode.h"
#include "ntfs_layout.h"
#include "ntfs_lcnalloc.h"
#include "ntfs_mft.h"
#include "ntfs_page.h"
#include "ntfs_runlist.h"
#include "ntfs_time.h"
#include "ntfs_types.h"
#include "ntfs_unistr.h"

ntfschar AT_UNNAMED[1] = { 0 };

/**
 * ntfs_attr_map_runlist - map the whole runlist of an ntfs inode
 * @ni:		ntfs inode for which to map the whole runlist
 *
 * Map the whole runlist of the ntfs inode @ni.
 *
 * Return 0 on success and errno on error.
 *
 * Note this function requires the runlist not to be mapped yet at all.  This
 * limitation is ok because we only use this function at mount time to map the
 * runlist of some system files thus we are guaranteed that they will not have
 * any runlist fragments mapped yet.
 *
 * Note the runlist can be NULL after this function returns if the attribute
 * has zero allocated size, i.e. there simply is no runlist.
 */
errno_t ntfs_attr_map_runlist(ntfs_inode *ni)
{
	VCN vcn, end_vcn;
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ntfs_attr_search_ctx *ctx;
	ATTR_RECORD *a;
	errno_t err = 0;

	ntfs_debug("Entering for mft_no 0x%llx, type 0x%x.",
			(unsigned long long)ni->mft_no,
			(unsigned)le32_to_cpu(ni->type));
	/* If the attribute is resident there is nothing to do. */
	if (!NInoNonResident(ni)) {
		ntfs_debug("Done (resident, nothing to do).");
		return 0;
	}
	lck_rw_lock_exclusive(&ni->rl.lock);
	/* Verify that the runlist is not mapped yet. */
	if (ni->rl.alloc && ni->rl.elements)
		panic("%s(): ni->rl.alloc && ni->rl.elements\n", __FUNCTION__);
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	err = ntfs_mft_record_map(base_ni, &m);
	if (err)
		goto err;
	ctx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!ctx) {
		err = ENOMEM;
		goto unm_err;
	}
	vcn = 0;
	end_vcn = ni->allocated_size >> ni->vol->cluster_size_shift;
	do {
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn,
				NULL, 0, ctx);
		if (err) {
			if (err == ENOENT)
				err = EIO;
			break;
		}
		a = ctx->a;
		if (!a->non_resident) {
corrupt_err:
			ntfs_error(ni->vol->mp, "Inode 0x%llx contains corrupt "
					"attribute extent, run chkdsk.",
					(unsigned long long)base_ni->mft_no);
			NVolSetErrors(ni->vol);
			err = EIO;
			break;
		}
		/*
		 * If we are in the first attribute extent, verify the cached
		 * allocated size is correct.
		 */
		if (!a->lowest_vcn)
			if (sle64_to_cpu(a->allocated_size) !=
					ni->allocated_size)
				panic("%s(): sle64_to_cpu(a->allocated_size) "
						"!= ni->allocated_size\n",
						__FUNCTION__);
		/*
		 * Sanity check the lowest_vcn of the attribute is equal to the
		 * vcn we looked up and that the highest_vcn of the attribute
		 * is above the current vcn.
		 */
		if (sle64_to_cpu(a->lowest_vcn) != vcn || (vcn &&
				sle64_to_cpu(a->highest_vcn) < vcn))
			goto corrupt_err;
		/* Determine the next vcn. */
		vcn = sle64_to_cpu(a->highest_vcn) + 1;
		/*
		 * Finally, map the runlist fragment contained in this
		 * attribute extent.
		 */
		err = ntfs_mapping_pairs_decompress(ni->vol, a, &ni->rl);
	} while (!err && vcn < end_vcn);
unm_err:
	ntfs_attr_search_ctx_put(ctx);
	ntfs_mft_record_unmap(base_ni);
err:
	lck_rw_unlock_exclusive(&ni->rl.lock);
	if (!err)
		ntfs_debug("Done.");
	else
		ntfs_error(ni->vol->mp, "Failed (error %d).", (int)err);
	return err;
}

/**
 * ntfs_map_runlist_nolock - map (a part of) a runlist of an ntfs inode
 * @ni:		ntfs inode for which to map (part of) a runlist
 * @vcn:	map runlist part containing this vcn
 * @ctx:	active attribute search context if present or NULL if not
 *
 * Map the part of a runlist containing the @vcn of the ntfs inode @ni.
 *
 * If @ctx is specified, it is an active search context of @ni and its base mft
 * record.  This is needed when ntfs_map_runlist_nolock() encounters unmapped
 * runlist fragments and allows their mapping.  If you do not have the mft
 * record mapped, you can specify @ctx as NULL and ntfs_map_runlist_nolock()
 * will perform the necessary mapping and unmapping.
 *
 * Note, ntfs_map_runlist_nolock() saves the state of @ctx on entry and
 * restores it before returning.  Thus, @ctx will be left pointing to the same
 * attribute on return as on entry.  However, the actual pointers in @ctx may
 * point to different memory locations on return, so you must remember to reset
 * any cached pointers from the @ctx, i.e. after the call to
 * ntfs_map_runlist_nolock(), you will probably want to do:
 *	m = ctx->m;
 *	a = ctx->a;
 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that
 * you cache ctx->m in a variable @m of type MFT_RECORD *.
 *
 * Return 0 on success and errno on error.  There is one special error code
 * which is not an error as such.  This is ENOENT.  It means that @vcn is out
 * of bounds of the runlist.
 *
 * Note the runlist can be NULL after this function returns if @vcn is zero and
 * the attribute has zero allocated size, i.e. there simply is no runlist.
 *
 * WARNING: If @ctx is supplied, regardless of whether success or failure is
 *	    returned, you need to check @ctx->is_error and if 1 the @ctx is no
 *	    longer valid, i.e. you need to either call
 *	    ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it.
 *	    In that case @ctx->error will give you the error code for why the
 *	    mapping of the old inode failed.
 *	    Also if @ctx is supplied and the current attribute (or the mft
 *	    record it is in) has been modified then the caller must call
 *	    NInoSetMrecNeedsDirtying(ctx->ni); before calling
 *	    ntfs_map_runlist_nolock() or the changes may be lost.
 *
 * Locking: - The runlist described by @ni must be locked for writing on entry
 *	      and is locked on return.  Note the runlist will be modified.
 *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
 *	      entry and it will be left unmapped on return.
 *	    - If @ctx is not NULL, the base mft record must be mapped on entry
 *	      and it will be left mapped on return.
 */
errno_t ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn,
		ntfs_attr_search_ctx *ctx)
{
	VCN end_vcn;
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	errno_t err = 0;
	BOOL ctx_is_temporary, ctx_needs_reset;
	ntfs_attr_search_ctx old_ctx = { { NULL, }, };

	ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)vcn);
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	if (!ctx) {
		ctx_is_temporary = ctx_needs_reset = TRUE;
		err = ntfs_mft_record_map(base_ni, &m);
		if (err)
			goto done;
		ctx = ntfs_attr_search_ctx_get(base_ni, m);
		if (!ctx) {
			err = ENOMEM;
			goto err;
		}
	} else {
		VCN allocated_size_vcn;

		if (ctx->is_error)
			panic("%s(): ctx->is_error\n", __FUNCTION__);
		a = ctx->a;
		if (!a->non_resident)
			panic("%s(): !a->non_resident\n", __FUNCTION__);
		ctx_is_temporary = FALSE;
		end_vcn = sle64_to_cpu(a->highest_vcn);
		lck_spin_lock(&ni->size_lock);
		allocated_size_vcn = ni->allocated_size >>
				ni->vol->cluster_size_shift;
		lck_spin_unlock(&ni->size_lock);
		/*
		 * If we already have the attribute extent containing @vcn in
		 * @ctx, no need to look it up again.  We slightly cheat in
		 * that if vcn exceeds the allocated size, we will refuse to
		 * map the runlist below, so there is definitely no need to get
		 * the right attribute extent.
		 */
		if (vcn >= allocated_size_vcn || (a->type == ni->type &&
				a->name_length == ni->name_len &&
				!bcmp((u8*)a + le16_to_cpu(a->name_offset),
				ni->name, ni->name_len) &&
				sle64_to_cpu(a->lowest_vcn) <= vcn &&
				end_vcn >= vcn))
			ctx_needs_reset = FALSE;
		else {
			/* Save the old search context. */
			old_ctx = *ctx;
			/*
			 * Reinitialize the search context so we can lookup the
			 * needed attribute extent.
			 */
			ntfs_attr_search_ctx_reinit(ctx);
			ctx_needs_reset = TRUE;
		}
	}
	if (ctx_needs_reset) {
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn,
				NULL, 0, ctx);
		if (err) {
			if (err == ENOENT)
				err = EIO;
			goto err;
		}
		if (!ctx->a->non_resident)
			panic("%s(): !a->non_resident!\n", __FUNCTION__);
	}
	a = ctx->a;
	/*
	 * Only decompress the mapping pairs if @vcn is inside it.  Otherwise
	 * we get into problems when we try to map an out of bounds vcn because
	 * we then try to map the already mapped runlist fragment and
	 * ntfs_mapping_pairs_decompress() fails.
	 */
	end_vcn = sle64_to_cpu(a->highest_vcn) + 1;
	if (vcn && vcn >= end_vcn) {
		err = ENOENT;
		goto err;
	}
	err = ntfs_mapping_pairs_decompress(ni->vol, a, &ni->rl);
err:
	if (ctx_is_temporary) {
		if (ctx)
			ntfs_attr_search_ctx_put(ctx);
		ntfs_mft_record_unmap(base_ni);
	} else if (ctx_needs_reset) {
		/*
		 * If there is no attribute list, restoring the search context
		 * is acomplished simply by copying the saved context back over
		 * the caller supplied context.  If there is an attribute list,
		 * things are more complicated as we need to deal with mapping
		 * of mft records and resulting potential changes in pointers.
		 */
		if (NInoAttrList(base_ni)) {
			/*
			 * If the currently mapped (extent) inode is not the
			 * one we had before, we need to unmap it and map the
			 * old one.
			 */
			if (ctx->ni != old_ctx.ni) {
				/*
				 * If the currently mapped inode is not the
				 * base inode, unmap it.
				 */
				if (ctx->base_ni && ctx->ni != ctx->base_ni) {
					ntfs_extent_mft_record_unmap(ctx->ni);
					ctx->m = ctx->base_m;
					if (!ctx->m)
						panic("%s(): !ctx->m\n",
								__FUNCTION__);
				}
				/*
				 * If the old mapped inode is not the base
				 * inode, map it.
				 */
				if (old_ctx.base_ni && old_ctx.ni !=
						old_ctx.base_ni) {
					errno_t err2;
retry_map:
					err2 = ntfs_mft_record_map(old_ctx.ni,
							&ctx->m);
					/*
					 * Something bad has happened.  If out
					 * of memory retry till it succeeds.
					 * Any other errors are fatal and we
					 * return the error code in ctx->m.
					 * Let the caller deal with it...  We
					 * just need to fudge things so the
					 * caller can reinit and/or put the
					 * search context safely.
					 */
					if (err2) {
						if (err2 == ENOMEM) {
							(void)thread_block(
							THREAD_CONTINUE_NULL);
							goto retry_map;
						}
						ctx->is_error = 1;
						ctx->error = err2;
						old_ctx.ni = old_ctx.base_ni;
					}
				}
			}
			if (ctx->is_error) {
				old_ctx.is_error = 1;
				old_ctx.error = ctx->error;
			} else if (ctx->m != old_ctx.m) {
				/*
				 * Update the changed pointers in the saved
				 * context.
				 */
				old_ctx.a = (ATTR_RECORD*)((u8*)ctx->m +
						((u8*)old_ctx.a -
						(u8*)old_ctx.m));
				old_ctx.m = ctx->m;
			}
		}
		/* Restore the search context to the saved one. */
		*ctx = old_ctx;
	}
done:
	ntfs_debug("Done (error %d).", (int)err);
	return err;
}

/**
 * ntfs_attr_vcn_to_lcn_nolock - convert a vcn into a lcn given an ntfs inode
 * @ni:			ntfs inode of the attribute whose runlist to search
 * @vcn:		vcn to convert
 * @write_locked:	true if the runlist is locked for writing
 * @clusters:		optional destination for number of contiguous clusters
 *
 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
 * described by the ntfs inode @ni and return the corresponding logical cluster
 * number (lcn).
 *
 * If the @vcn is not mapped yet, the attempt is made to map the attribute
 * extent containing the @vcn and the vcn to lcn conversion is retried.
 *
 * If @write_locked is true the caller has locked the runlist for writing and
 * if false for reading.
 *
 * If @clusters is not NULL, on success (i.e. we return >= LCN_HOLE) we return
 * the number of contiguous clusters after the returned lcn in *@clusters.
 *
 * Since lcns must be >= 0, we use negative return codes with special meaning:
 *
 * Return code	Meaning / Description
 * ==========================================
 *  LCN_HOLE	Hole / not allocated on disk.
 *  LCN_ENOENT	There is no such vcn in the runlist, i.e. @vcn is out of bounds.
 *  LCN_ENOMEM	Not enough memory to map runlist.
 *  LCN_EIO	Critical error (runlist/file is corrupt, i/o error, etc).
 *
 * Locking: - The runlist must be locked on entry and is left locked on return.
 *	    - If @write_locked is FALSE, i.e. the runlist is locked for reading,
 *	      the lock may be dropped inside the function so you cannot rely on
 *	      the runlist still being the same when this function returns.
 */
LCN ntfs_attr_vcn_to_lcn_nolock(ntfs_inode *ni, const VCN vcn,
		const BOOL write_locked, s64 *clusters)
{
	LCN lcn;
	BOOL need_lock_switch = FALSE;
	BOOL is_retry = FALSE;

	ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx, %s_locked.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)vcn,
			write_locked ? "write" : "read");
	if (!NInoNonResident(ni))
		panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
	if (vcn < 0)
		panic("%s(): vcn < 0\n", __FUNCTION__);
retry_remap:
	if (!ni->rl.elements) {
		lck_spin_lock(&ni->size_lock);
		if (!ni->allocated_size) {
			lck_spin_unlock(&ni->size_lock);
			lcn = LCN_ENOENT;
			goto lcn_enoent;
		}
		lck_spin_unlock(&ni->size_lock);
		if (!is_retry)
			goto try_to_map;
		lcn = LCN_EIO;
		goto lcn_eio;
	}
	/* Convert vcn to lcn.  If that fails map the runlist and retry once. */
	lcn = ntfs_rl_vcn_to_lcn(ni->rl.rl, vcn, clusters);
	if (lcn >= LCN_HOLE) {
		if (need_lock_switch)
			lck_rw_lock_exclusive_to_shared(&ni->rl.lock);
		ntfs_debug("Done (lcn 0x%llx, clusters 0x%llx).",
				(unsigned long long)lcn,
				clusters ? (unsigned long long)*clusters : 0);
		return lcn;
	}
	if (lcn != LCN_RL_NOT_MAPPED) {
		if (lcn != LCN_ENOENT)
			lcn = LCN_EIO;
	} else if (!is_retry) {
		errno_t err;

try_to_map:
		if (!write_locked && !need_lock_switch) {
			need_lock_switch = TRUE;
			/*
			 * If converting the lock from shared to exclusive
			 * fails, need to take the lock for writing and retry
			 * in case the racing process did the mapping for us.
			 */
			if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) {
				lck_rw_lock_exclusive(&ni->rl.lock);
				goto retry_remap;
			}
		}
		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
		if (!err) {
			is_retry = TRUE;
			goto retry_remap;
		}
		switch (err) {
		case ENOENT:
			lcn = LCN_ENOENT;
			break;
		case ENOMEM:
			lcn = LCN_ENOMEM;
			break;
		default:
			lcn = LCN_EIO;
		}
	}
lcn_eio:
	if (need_lock_switch)
		lck_rw_lock_exclusive_to_shared(&ni->rl.lock);
	if (lcn == LCN_ENOENT) {
lcn_enoent:
		ntfs_debug("Done (LCN_ENOENT).");
	} else
		ntfs_error(ni->vol->mp, "Failed (error %lld).", (long long)lcn);
	return lcn;
}

/**
 * ntfs_attr_find_vcn_nolock - find a vcn in the runlist of an ntfs inode
 * @ni:		ntfs inode of the attribute whose runlist to search
 * @vcn:	vcn to find
 * @run:	return pointer for the found runlist element
 * @ctx:	active attribute search context if present or NULL if not
 *
 * Find the virtual cluster number @vcn in the runlist of the ntfs attribute
 * described by the ntfs inode @ni and return the address of the runlist
 * element containing the @vcn in *@run.
 *
 * If the @vcn is not mapped yet, the attempt is made to map the attribute
 * extent containing the @vcn and the vcn to lcn conversion is retried.
 *
 * If @ctx is specified, it is an active search context of @ni and its base mft
 * record.  This is needed when ntfs_attr_find_vcn_nolock() encounters unmapped
 * runlist fragments and allows their mapping.  If you do not have the mft
 * record mapped, you can specify @ctx as NULL and ntfs_attr_find_vcn_nolock()
 * will perform the necessary mapping and unmapping.
 *
 * Note, ntfs_attr_find_vcn_nolock() saves the state of @ctx on entry and
 * restores it before returning.  Thus, @ctx will be left pointing to the same
 * attribute on return as on entry.  However, the actual pointers in @ctx may
 * point to different memory locations on return, so you must remember to reset
 * any cached pointers from the @ctx, i.e. after the call to
 * ntfs_attr_find_vcn_nolock(), you will probably want to do:
 *	m = ctx->m;
 *	a = ctx->a;
 * Assuming you cache ctx->a in a variable @a of type ATTR_RECORD * and that
 * you cache ctx->m in a variable @m of type MFT_RECORD *.
 * Note you need to distinguish between the lcn of the returned runlist element
 * being >= 0 and LCN_HOLE.  In the later case you have to return zeroes on
 * read and allocate clusters on write.
 *
 * Return 0 on success and errno on error.
 *
 * The possible error return codes are:
 *	ENOENT	- No such vcn in the runlist, i.e. @vcn is out of bounds.
 *	ENOMEM	- Not enough memory to map runlist.
 *	EIO	- Critical error (runlist/file is corrupt, i/o error, etc).
 *
 * WARNING: If @ctx is supplied, regardless of whether success or failure is
 *	    returned, you need to check @ctx->is_error and if 1 the @ctx is no
 *	    longer valid, i.e. you need to either call
 *	    ntfs_attr_search_ctx_reinit() or ntfs_attr_search_ctx_put() on it.
 *	    In that case @ctx->error will give you the error code for why the
 *	    mapping of the old inode failed.
 *	    Also if @ctx is supplied and the current attribute (or the mft
 *	    record it is in) has been modified then the caller must call
 *	    NInoSetMrecNeedsDirtying(ctx->ni); before calling
 *	    ntfs_map_runlist_nolock() or the changes may be lost.
 *
 * Locking: - The runlist described by @ni must be locked for writing on entry
 *	      and is locked on return.  Note the runlist may be modified when
 *	      needed runlist fragments need to be mapped.
 *	    - If @ctx is NULL, the base mft record of @ni must not be mapped on
 *	      entry and it will be left unmapped on return.
 *	    - If @ctx is not NULL, the base mft record must be mapped on entry
 *	      and it will be left mapped on return.
 */
errno_t ntfs_attr_find_vcn_nolock(ntfs_inode *ni, const VCN vcn,
		ntfs_rl_element **run, ntfs_attr_search_ctx *ctx)
{
	ntfs_rl_element *rl;
	errno_t err = 0;
	BOOL is_retry = FALSE;

	ntfs_debug("Entering for mft_no 0x%llx, vcn 0x%llx, with%s ctx.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)vcn, ctx ? "" : "out");
	if (!NInoNonResident(ni))
		panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
	if (vcn < 0)
		panic("%s(): vcn < 0\n", __FUNCTION__);
retry_remap:
	if (!ni->rl.elements) {
		lck_spin_lock(&ni->size_lock);
		if (!ni->allocated_size) {
			lck_spin_unlock(&ni->size_lock);
			return LCN_ENOENT;
		}
		lck_spin_unlock(&ni->size_lock);
		if (!is_retry)
			goto try_to_map;
		err = EIO;
		goto err;
	}
	rl = ni->rl.rl;
	if (vcn >= rl[0].vcn) {
		while (rl->length) {
			if (vcn < rl[1].vcn) {
				if (rl->lcn >= LCN_HOLE) {
					ntfs_debug("Done.");
					*run = rl;
					return 0;
				}
				break;
			}
			rl++;
		}
		if (rl->lcn != LCN_RL_NOT_MAPPED) {
			if (rl->lcn == LCN_ENOENT)
				err = ENOENT;
			else
				err = EIO;
		}
	}
	if (!err && !is_retry) {
		/*
		 * If the search context is invalid we cannot map the unmapped
		 * region.
		 */
		if (ctx->is_error)
			err = ctx->error;
		else {
try_to_map:
			/*
			 * The @vcn is in an unmapped region, map the runlist
			 * and retry.
			 */
			err = ntfs_map_runlist_nolock(ni, vcn, ctx);
			if (!err) {
				is_retry = TRUE;
				goto retry_remap;
			}
		}
		if (err == EINVAL)
			err = EIO;
	} else if (!err)
		err = EIO;
err:
	if (err != ENOENT)
		ntfs_error(ni->vol->mp, "Failed (error %d).", err);
	return err;
}

/**
 * ntfs_attr_search_ctx_reinit - reinitialize an attribute search context
 * @ctx:	attribute search context to reinitialize
 *
 * Reinitialize the attribute search context @ctx, unmapping an associated
 * extent mft record if present, and initialize the search context again.
 *
 * This is used when a search for a new attribute is being started to reset
 * the search context to the beginning.
 *
 * Note: We preserve the content of @ctx->is_mft_locked so that reinitializing
 * a search context can also be done when dealing with the mft itself.
 */
void ntfs_attr_search_ctx_reinit(ntfs_attr_search_ctx *ctx)
{
	const BOOL mft_is_locked = ctx->is_mft_locked;

	if (!ctx->base_ni) {
		/* No attribute list. */
		ctx->is_first = 1;
		ctx->is_iteration = 0;
		/* Sanity checks are performed elsewhere. */
		ctx->a = (ATTR_RECORD*)((u8*)ctx->m +
				le16_to_cpu(ctx->m->attrs_offset));
		/*
		 * This needs resetting due to
		 * ntfs_attr_find_in_attribute_list() which can leave it set
		 * despite having zeroed ctx->base_ni.
		 */
		ctx->al_entry = NULL;
		return;
	}
	/* Attribute list. */
	if (ctx->ni != ctx->base_ni)
		ntfs_extent_mft_record_unmap(ctx->ni);
	ntfs_attr_search_ctx_init(ctx, ctx->base_ni, ctx->base_m);
	if (mft_is_locked)
		ctx->is_mft_locked = 1;
}

/**
 * ntfs_attr_search_ctx_get - allocate and init a new attribute search context
 * @ni:		ntfs inode with which to initialize the search context
 * @m:		mft record with which to initialize the search context
 *
 * Allocate a new attribute search context, initialize it with @ni and @m, and
 * return it.  Return NULL if allocation failed.
 */
ntfs_attr_search_ctx *ntfs_attr_search_ctx_get(ntfs_inode *ni, MFT_RECORD *m)
{
	ntfs_attr_search_ctx *ctx;

	ctx = OSMalloc(sizeof(ntfs_attr_search_ctx), ntfs_malloc_tag);
	if (ctx)
		ntfs_attr_search_ctx_init(ctx, ni, m);
	return ctx;
}

/**
 * ntfs_attr_search_ctx_put - release an attribute search context
 * @ctx:	attribute search context to free
 *
 * Release the attribute search context @ctx, unmapping an associated extent
 * mft record if present.
 */
void ntfs_attr_search_ctx_put(ntfs_attr_search_ctx *ctx)
{
	if (ctx->base_ni && ctx->ni != ctx->base_ni)
		ntfs_extent_mft_record_unmap(ctx->ni);
	OSFree(ctx, sizeof(ntfs_attr_search_ctx), ntfs_malloc_tag);
}

/**
 * ntfs_attr_find_in_mft_record - find (next) attribute in mft record
 * @type:	attribute type to find
 * @name:	attribute name to find (optional, i.e. NULL means do not care)
 * @name_len:	attribute name length (only needed if @name present)
 * @val:	attribute value to find (optional, resident attributes only)
 * @val_len:	attribute value length (only needed if @val present)
 * @ctx:	search context with mft record and attribute to search from
 *
 * You should not need to call this function directly.  Use ntfs_attr_lookup()
 * instead.
 *
 * ntfs_attr_find_in_mft_record() takes a search context @ctx as parameter and
 * searches the mft record specified by @ctx->m, beginning at @ctx->a, for an
 * attribute of @type, optionally @name and @val.
 *
 * If the attribute is found, ntfs_attr_find_in_mft_record() returns 0 and
 * @ctx->a is set to point to the found attribute.
 *
 * If the attribute is not found, ENOENT is returned and @ctx->a is set to
 * point to the attribute before which the attribute being searched for would
 * need to be inserted if such an action were to be desired.
 *
 * On actual error, ntfs_attr_find_in_mft_record() returns EIO.  In this case
 * @ctx->a is undefined and in particular do not rely on it not having changed.
 *
 * If @ctx->is_first is 1, the search begins with @ctx->a itself.  If it is 0,
 * the search begins after @ctx->a.
 *
 * If @ctx->is_iteration is 1 and @type is AT_UNUSED this is not a search but
 * an iteration in which case each attribute in the mft record is returned in
 * turn with each call to ntfs_attr_find_in_mft_record().  Note all attributes
 * are returned including the attribute list attribute, unlike when
 * @ctx->is_iteration is 0 when it is not returned unless it is specifically
 * looked for.
 *
 * Similarly to the above, when @ctx->is_iterations is 1 and @type is not
 * AT_UNUSED all attributes of type @type are returned one after the other.
 *
 * If @name is AT_UNNAMED search for an unnamed attribute.  If @name is present
 * but not AT_UNNAMED search for a named attribute matching @name.  Otherwise,
 * match both named and unnamed attributes.
 *
 * Finally, the resident attribute value @val is looked for, if present.  If
 * @val is not present (NULL), @val_len is ignored.
 *
 * ntfs_attr_find_in_mft_record() only searches the specified mft record and it
 * ignores the presence of an attribute list attribute (unless it is the one
 * being searched for, obviously).  If you need to take attribute lists into
 * consideration, use ntfs_attr_lookup() instead (see below).  This also means
 * that you cannot use ntfs_attr_find_in_mft_record() to search for extent
 * records of non-resident attributes, as extents with lowest_vcn != 0 are
 * usually described by the attribute list attribute only.  Note that it is
 * possible that the first extent is only in the attribute list while the last
 * extent is in the base mft record, so do not rely on being able to find the
 * first extent in the base mft record.
 *
 * Warning: Never use @val when looking for attribute types which can be
 *	    non-resident as this most likely will result in a crash!
 *
 * Note if the volume is mounted case sensitive we treat attribute names as
 * being case sensitive and vice versa if the volume is not mounted case
 * sensitive we treat attribute names as being case insensitive also.
 */
errno_t ntfs_attr_find_in_mft_record(const ATTR_TYPE type,
		const ntfschar *name, const u32 name_len,
		const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
{
	ATTR_RECORD *a;
	ntfs_volume *vol = ctx->ni->vol;
	const ntfschar *upcase = vol->upcase;
	const u32 upcase_len = vol->upcase_len;
	const BOOL case_sensitive = NVolCaseSensitive(vol);
	const BOOL is_iteration = ctx->is_iteration;

	/*
	 * Iterate over attributes in mft record starting at @ctx->a, or the
	 * attribute following that, if @ctx->is_first is true.
	 */
	if (ctx->is_first) {
		a = ctx->a;
		ctx->is_first = 0;
	} else
		a = (ATTR_RECORD*)((u8*)ctx->a + le32_to_cpu(ctx->a->length));
	for (;;	a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
		if ((u8*)a < (u8*)ctx->m || (u8*)a > (u8*)ctx->m +
				le32_to_cpu(ctx->m->bytes_allocated))
			break;
		ctx->a = a;
		if (((!is_iteration || type != AT_UNUSED) &&
				le32_to_cpu(a->type) > le32_to_cpu(type)) ||
				a->type == AT_END)
			return ENOENT;
		if (!a->length)
			break;
		if (is_iteration) {
			if (type == AT_UNUSED || type == a->type)
				return 0;
		}
		if (a->type != type)
			continue;
		/*
		 * If @name is AT_UNNAMED we want an unnamed attribute.
		 * If @name is present, compare the two names.
		 * Otherwise, match any attribute.
		 */
		if (name == AT_UNNAMED) {
			/* The search failed if the found attribute is named. */
			if (a->name_length)
				return ENOENT;
		} else if (name) {
			unsigned len, ofs;

			len = a->name_length;
			ofs = le16_to_cpu(a->name_offset);
			if (ofs + (len * sizeof(ntfschar)) >
					le32_to_cpu(a->length))
				break;
			if (!ntfs_are_names_equal(name, name_len,
					(ntfschar*)((u8*)a + ofs), len,
					case_sensitive, upcase, upcase_len)) {
				int rc;

				rc = ntfs_collate_names(name, name_len,
						(ntfschar*)((u8*)a + ofs), len,
						1, FALSE, upcase, upcase_len);
				/*
				 * If @name collates before a->name, there is
				 * no matching attribute.
				 */
				if (rc == -1)
					return ENOENT;
				/*
				 * If the strings are not equal, continue
				 * searching.
				 */
				if (rc)
					continue;
				rc = ntfs_collate_names(name, name_len,
						(ntfschar*)((u8*)a + ofs), len,
						1, TRUE, upcase, upcase_len);
				if (rc == -1)
					return ENOENT;
				if (rc)
					continue;
			}
		}
		/*
		 * The names match or @name not present and attribute is
		 * unnamed.  If no @val specified, we have found the attribute
		 * and are done.
		 */
		if (!val)
			return 0;
		/* @val is present; compare values. */
		else {
			unsigned len, ofs;
			int rc;

			len = le32_to_cpu(a->value_length);
			ofs = le16_to_cpu(a->value_offset);
			if (ofs + len > le32_to_cpu(a->length))
				break;
			rc = memcmp(val, (u8*)a + ofs,
					len <= val_len ? len : val_len);
			/*
			 * If @val collates before the value of the current
			 * attribute, there is no matching attribute.
			 */
			if (!rc) {
				if (val_len == len)
					return 0;
				if (val_len < len)
					return ENOENT;
			} else if (rc < 0)
				return ENOENT;
		}
	}
	ntfs_error(vol->mp, "Inode is corrupt.  Run chkdsk.");
	NVolSetErrors(vol);
	return EIO;
}

/**
 * ntfs_attr_find_in_attribute_list - find an attribute in the attribute list
 * @type:	attribute type to find
 * @name:	attribute name to find (optional, i.e. NULL means do not care)
 * @name_len:	attribute name length (only needed if @name present)
 * @lowest_vcn:	lowest vcn to find (optional, non-resident attributes only)
 * @val:	attribute value to find (optional, resident attributes only)
 * @val_len:	attribute value length (only needed if @val present)
 * @ctx:	search context with mft record and attribute to search from
 *
 * You should not need to call this function directly.  Use ntfs_attr_lookup()
 * instead.
 *
 * Find an attribute by searching the attribute list for the corresponding
 * attribute list entry.  Having found the entry, map the mft record if the
 * attribute is in a different mft record/inode, ntfs_attr_find_in_mft_record()
 * the attribute in there and return it.
 *
 * On first search @ctx->ni must be the base mft record and @ctx must have been
 * obtained from a call to ntfs_attr_search_ctx_get().  On subsequent calls
 * @ctx->ni can be any extent inode, too (@ctx->base_ni is then the base
 * inode).
 *
 * After finishing with the attribute/mft record you need to call
 * ntfs_attr_search_ctx_put() to clean up the search context (unmapping any
 * mapped mft records, etc).
 *
 * If the attribute is found, ntfs_attr_find_in_attribute_list() returns 0 and
 * @ctx->a is set to point to the found attribute.  @ctx->m is set to point to
 * the mft record in which @ctx->a is located and @ctx->al_entry is set to
 * point to the attribute list entry for the attribute.
 *
 * If the attribute is not found, ENOENT is returned and @ctx->a is set to
 * point to the attribute in the base mft record before which the attribute
 * being searched for would need to be inserted if such an action were to be
 * desired.  @ctx->m is set to point to the mft record in which @ctx->a is
 * located, i.e. the base mft record, and @ctx->al_entry is set to point to the
 * attribute list entry of the attribute before which the attribute being
 * searched for would need to be inserted if such an action were to be desired.
 *
 * Thus to insert the not found attribute, one wants to add the attribute to
 * @ctx->m (the base mft record) and if there is not enough space, the
 * attribute should be placed in a newly allocated extent mft record.  The
 * attribute list entry for the inserted attribute should be inserted in the
 * attribute list attribute at @ctx->al_entry.
 *
 * On actual error, ntfs_attr_find_in_attribute_list() returns EIO.  In this
 * case @ctx->a is undefined and in particular do not rely on it not having
 * changed.
 *
 * If @ctx->is_first is 1, the search begins with @ctx->a itself.  If it is 0,
 * the search begins after @ctx->a.
 *
 * If @name is AT_UNNAMED search for an unnamed attribute.  If @name is present
 * but not AT_UNNAMED search for a named attribute matching @name.  Otherwise,
 * match both named and unnamed attributes.
 *
 * Finally, the resident attribute value @val is looked for, if present.  If
 * @val is not present (NULL), @val_len is ignored.
 *
 * Warning: Never use @val when looking for attribute types which can be
 *	    non-resident as this most likely will result in a crash!
 */
static errno_t ntfs_attr_find_in_attribute_list(const ATTR_TYPE type,
		const ntfschar *name, const u32 name_len, const VCN lowest_vcn,
		const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
{
	ntfs_inode *base_ni, *ni = ctx->ni;
	ntfs_volume *vol = ni->vol;
	ATTR_LIST_ENTRY *al_entry, *next_al_entry;
	u8 *al_start, *al_end;
	ATTR_RECORD *a;
	ntfschar *al_name;
	const ntfschar *upcase = vol->upcase;
	const u32 upcase_len = vol->upcase_len;
	u32 al_name_len;
	errno_t err = 0;
	static const char es[] = " Unmount and run chkdsk.";
	const BOOL case_sensitive = NVolCaseSensitive(vol);

	if (ctx->is_iteration)
		panic("%s(): ctx->is_iteration\n", __FUNCTION__);
	base_ni = ctx->base_ni;
	ntfs_debug("Entering for mft_no 0x%llx, type 0x%x.",
			(unsigned long long)ni->mft_no, le32_to_cpu(type));
	if (!base_ni) {
		/* First call happens with the base mft record. */
		base_ni = ctx->base_ni = ctx->ni;
		ctx->base_m = ctx->m;
	}
	if (ni == base_ni)
		ctx->base_a = ctx->a;
	if (type == AT_END)
		goto not_found;
	al_start = base_ni->attr_list;
	al_end = al_start + base_ni->attr_list_size;
	if (!ctx->al_entry)
		ctx->al_entry = (ATTR_LIST_ENTRY*)al_start;
	/*
	 * Iterate over entries in attribute list starting at @ctx->al_entry,
	 * or the entry following that, depending on the value of
	 * @ctx->is_first.
	 */
	if (ctx->is_first) {
		al_entry = ctx->al_entry;
		ctx->is_first = 0;
	} else
		al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
				le16_to_cpu(ctx->al_entry->length));
	for (;; al_entry = next_al_entry) {
		/* Out of bounds check. */
		if ((u8*)al_entry < base_ni->attr_list ||
				(u8*)al_entry > al_end)
			break;	/* Inode is corrupt. */
		ctx->al_entry = al_entry;
		/* Catch the end of the attribute list. */
		if ((u8*)al_entry == al_end)
			goto not_found;
		if (!al_entry->length)
			break;
		if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
				le16_to_cpu(al_entry->length) > al_end)
			break;
		next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
				le16_to_cpu(al_entry->length));
		if (al_entry->type != type) {
			if (le32_to_cpu(al_entry->type) < le32_to_cpu(type))
				continue;
			goto not_found;
		}
		/*
		 * If @name is AT_UNNAMED we want an unnamed attribute.
		 * If @name is present, compare the two names.
		 * Otherwise, match any attribute.
		 */
		al_name_len = al_entry->name_length;
		al_name = (ntfschar*)((u8*)al_entry + al_entry->name_offset);
		if (name == AT_UNNAMED) {
			if (al_name_len)
				goto not_found;
		} else if (name && !ntfs_are_names_equal(al_name, al_name_len,
				name, name_len, case_sensitive, upcase,
				upcase_len)) {
			int rc;

			rc = ntfs_collate_names(name, name_len, al_name,
					al_name_len, 1, FALSE,
					upcase, upcase_len);
			/*
			 * If @name collates before al_name, there is no
			 * matching attribute.
			 */
			if (rc == -1)
				goto not_found;
			/* If the strings are not equal, continue search. */
			if (rc)
				continue;
			/*
			 * FIXME: Reverse engineering showed 0, IGNORE_CASE but
			 * that would be inconsistent with
			 * ntfs_attr_find_in_mft_record().  The subsequent rc
			 * checks were also different.  Perhaps I made a
			 * mistake in one of the two.  Need to recheck which is
			 * correct or at least see what is going on...
			 */
			rc = ntfs_collate_names(name, name_len, al_name,
					al_name_len, 1, TRUE,
					vol->upcase, vol->upcase_len);
			if (rc == -1)
				goto not_found;
			if (rc)
				continue;
		}
		/*
		 * The names match or @name not present and attribute is
		 * unnamed.  Now check @lowest_vcn.  Continue search if the
		 * next attribute list entry still fits @lowest_vcn.  Otherwise
		 * we have reached the right one or the search has failed.
		 */
		if (lowest_vcn && (u8*)next_al_entry >= al_start &&
				(u8*)next_al_entry + 6 < al_end &&
				(u8*)next_al_entry + le16_to_cpu(
					next_al_entry->length) <= al_end &&
				sle64_to_cpu(next_al_entry->lowest_vcn) <=
					lowest_vcn &&
				next_al_entry->type == al_entry->type &&
				next_al_entry->name_length == al_name_len &&
				ntfs_are_names_equal((ntfschar*)((u8*)
					next_al_entry +
					next_al_entry->name_offset),
					next_al_entry->name_length,
					al_name, al_name_len, case_sensitive,
					vol->upcase, vol->upcase_len))
			continue;
		if (MREF_LE(al_entry->mft_reference) == ni->mft_no) {
			if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) {
				ntfs_error(vol->mp, "Found stale mft "
						"reference in attribute list "
						"of base inode 0x%llx.%s",
						(unsigned long long)
						base_ni->mft_no, es);
				err = EIO;
				break;
			}
		} else { /* Mft references do not match. */
			/* If there is a mapped record unmap it first. */
			if (ni != base_ni)
				ntfs_extent_mft_record_unmap(ni);
			/* Do we want the base record back? */
			if (MREF_LE(al_entry->mft_reference) ==
					base_ni->mft_no) {
				ni = ctx->ni = base_ni;
				ctx->m = ctx->base_m;
			} else {
				/* We want an extent record. */
				err = ntfs_extent_mft_record_map_ext(base_ni,
						le64_to_cpu(
						al_entry->mft_reference), &ni,
						&ctx->m, ctx->is_mft_locked);
				if (err) {
					ntfs_error(vol->mp, "Failed to map "
							"extent mft record "
							"0x%llx of base inode "
							"0x%llx.%s",
							(unsigned long long)
							MREF_LE(al_entry->
							mft_reference),
							(unsigned long long)
							base_ni->mft_no, es);
					if (err == ENOENT)
						err = EIO;
					/* Cause @ctx to be sanitized below. */
					ni = NULL;
					break;
				}
				ctx->ni = ni;
			}
		}
		a = ctx->a = (ATTR_RECORD*)((u8*)ctx->m +
				le16_to_cpu(ctx->m->attrs_offset));
		/*
		 * ctx->ni, ctx->m, and ctx->a now point to the mft record
		 * containing the attribute represented by the current
		 * al_entry.
		 *
		 * We could call into ntfs_attr_find_in_mft_record() to find
		 * the right attribute in this mft record but this would be
		 * less efficient and not quite accurate as it ignores the
		 * attribute instance numbers for example which become
		 * important when one plays with attribute lists.  Also,
		 * because a proper match has been found in the attribute list
		 * entry above, the comparison can now be optimized.  So it is
		 * worth re-implementing a simplified
		 * ntfs_attr_find_in_mft_record() here.
		 *
		 * Use a manual loop so we can still use break and continue
		 * with the same meanings as above.
		 */
do_next_attr_loop:
		if ((u8*)a < (u8*)ctx->m || (u8*)a > (u8*)ctx->m +
				le32_to_cpu(ctx->m->bytes_allocated))
			break;
		if (a->type == AT_END)
			continue;
		if (!a->length)
			break;
		if (al_entry->instance != a->instance)
			goto do_next_attr;
		/*
		 * If the type and/or the name are mismatched between the
		 * attribute list entry and the attribute record, there is
		 * corruption so we break and return error EIO.
		 */
		if (al_entry->type != a->type)
			break;
		if (!ntfs_are_names_equal((ntfschar*)((u8*)a +
				le16_to_cpu(a->name_offset)), a->name_length,
				al_name, al_name_len, case_sensitive,
				vol->upcase, vol->upcase_len))
			break;
		ctx->a = a;
		/*
		 * If no @val specified or @val specified and it matches, we
		 * have found it!
		 */
		if (!val || (!a->non_resident &&
				le32_to_cpu(a->value_length) == val_len &&
				!bcmp((u8*)a + le16_to_cpu(a->value_offset),
				val, val_len))) {
			ntfs_debug("Done, found.");
			return 0;
		}
do_next_attr:
		/* Proceed to the next attribute in the current mft record. */
		a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
		goto do_next_attr_loop;
	}
	if (!err) {
		ntfs_error(vol->mp, "Base inode 0x%llx contains corrupt "
				"attribute list attribute.%s",
				(unsigned long long)base_ni->mft_no, es);
		err = EIO;
	}
	if (ni != base_ni) {
		if (ni)
			ntfs_extent_mft_record_unmap(ni);
		ctx->ni = base_ni;
		ctx->m = ctx->base_m;
		ctx->a = ctx->base_a;
	}
	if (err != ENOMEM)
		NVolSetErrors(vol);
	return err;
not_found:
	/*
	 * If we were looking for AT_END, we reset the search context @ctx and
	 * use ntfs_attr_find_in_mft_record() to seek to the end of the base
	 * mft record.
	 */
	if (type == AT_END) {
		ntfs_attr_search_ctx_reinit(ctx);
		return ntfs_attr_find_in_mft_record(AT_END, NULL, 0, NULL, 0,
				ctx);
	}
	/*
	 * The attribute was not found.  Before we return, we want to ensure
	 * @ctx->m and @ctx->a indicate the position at which the attribute
	 * should be inserted in the base mft record.  Since we also want to
	 * preserve @ctx->al_entry we cannot reinitialize the search context
	 * using ntfs_attr_search_ctx_reinit() as this would set @ctx->al_entry
	 * to NULL.  Thus we do the necessary bits manually (see
	 * ntfs_attr_search_ctx_init() above).  Note, we postpone setting
	 * @base_a until after the call to ntfs_attr_find_in_mft_record() as we
	 * do not know the correct value yet.
	 */
	if (ni != base_ni)
		ntfs_extent_mft_record_unmap(ni);
	ctx->m = ctx->base_m;
	ctx->a = (ATTR_RECORD*)((u8*)ctx->m +
			le16_to_cpu(ctx->m->attrs_offset));
	ctx->is_first = 1;
	ctx->ni = base_ni;
	/*
	 * In case there are multiple matches in the base mft record, need to
	 * keep enumerating until we get an attribute not found response (or
	 * another error), otherwise we would keep returning the same attribute
	 * over and over again and all programs using us for enumeration would
	 * lock up in a tight loop.
	 */
	do {
		err = ntfs_attr_find_in_mft_record(type, name, name_len,
				val, val_len, ctx);
	} while (!err);
	ctx->base_a = ctx->a;
	ntfs_debug("Done, not found.");
	return err;
}

/**
 * ntfs_attr_lookup - find an attribute in an ntfs inode
 * @type:	attribute type to find
 * @name:	attribute name to find (optional, i.e. NULL means do not care)
 * @name_len:	attribute name length (only needed if @name present)
 * @lowest_vcn:	lowest vcn to find (optional, non-resident attributes only)
 * @val:	attribute value to find (optional, resident attributes only)
 * @val_len:	attribute value length (only needed if @val present)
 * @ctx:	search context with mft record and attribute to search from
 *
 * Find an attribute in an ntfs inode.  On first search @ctx->ni must be the
 * base mft record and @ctx must have been obtained from a call to
 * ntfs_attr_search_ctx_get().
 *
 * This function transparently handles attribute lists and @ctx is used to
 * continue searches where they were left off at.
 *
 * After finishing with the attribute/mft record you need to call
 * ntfs_attr_search_ctx_put() to clean up the search context (unmapping any
 * mapped mft records, etc).
 *
 * Return 0 if the search was successful and errno if not.
 *
 * On success, @ctx->a is the found attribute and it is in mft record @ctx->m.
 * If an attribute list attribute is present, @ctx->al_entry is the attribute
 * list entry of the found attribute.
 *
 * On error ENOENT, @ctx->a is the attribute which collates just after the
 * attribute being searched for, i.e. if one wants to add the attribute to the
 * mft record this is the correct place to insert it into.  If an attribute
 * list attribute is present, @ctx->al_entry is the attribute list entry which
 * collates just after the attribute list entry of the attribute being searched
 * for, i.e. if one wants to add the attribute to the mft record this is the
 * correct place to insert its attribute list entry into.
 *
 * When errno != ENOENT, an error occured during the lookup.  @ctx->a is then
 * undefined and in particular you should not rely on it not having changed.
 *
 * Warning: Never use @val when looking for attribute types which can be
 *	    non-resident as this most likely will result in a crash!
 */
errno_t ntfs_attr_lookup(const ATTR_TYPE type,
		const ntfschar *name, const u32 name_len, const VCN lowest_vcn,
		const void *val, const u32 val_len, ntfs_attr_search_ctx *ctx)
{
	ntfs_inode *base_ni;

	ntfs_debug("Entering.");
	if (ctx->base_ni)
		base_ni = ctx->base_ni;
	else
		base_ni = ctx->ni;
	/* Sanity check, just for debugging really. */
	if (!base_ni)
		panic("%s(): !base_ni\n", __FUNCTION__);
	if (!NInoAttrList(base_ni) || type == AT_ATTRIBUTE_LIST)
		return ntfs_attr_find_in_mft_record(type, name, name_len,
				val, val_len, ctx);
	if (ctx->is_iteration)
		panic("%s(): ctx->is_iteration\n", __FUNCTION__);
	return ntfs_attr_find_in_attribute_list(type, name, name_len,
			lowest_vcn, val, val_len, ctx);
}

/**
 * ntfs_attr_find_in_attrdef - find an attribute in the $AttrDef system file
 * @vol:	ntfs volume to which the attribute belongs
 * @type:	attribute type which to find
 *
 * Search for the attribute definition record corresponding to the attribute
 * @type in the $AttrDef system file.
 *
 * Return the attribute type definition record if found and NULL if not found.
 */
static ATTR_DEF *ntfs_attr_find_in_attrdef(const ntfs_volume *vol,
		const ATTR_TYPE type)
{
	ATTR_DEF *ad;

	if (!vol->attrdef)
		panic("%s(): !vol->attrdef\n", __FUNCTION__);
	if (!type)
		panic("%s(): !type\n", __FUNCTION__);
	for (ad = vol->attrdef; (u8*)ad - (u8*)vol->attrdef <
			vol->attrdef_size && ad->type; ++ad) {
		/* If we have not found it yet, carry on searching. */
		if (le32_to_cpu(type) > le32_to_cpu(ad->type))
			continue;
		/* If we have found the attribute, return it. */
		if (type == ad->type)
			return ad;
		/* We have gone too far already.  No point in continuing. */
		break;
	}
	/* Attribute not found. */
	ntfs_debug("Attribute type 0x%x not found in $AttrDef.",
			le32_to_cpu(type));
	return NULL;
}

/**
 * ntfs_attr_size_bounds_check - check a size of an attribute type for validity
 * @vol:	ntfs volume to which the attribute belongs
 * @type:	attribute type which to check
 * @size:	size which to check
 *
 * Check whether the @size in bytes is valid for an attribute of @type on the
 * ntfs volume @vol.  This information is obtained from $AttrDef system file.
 *
 * Return 0 if valid, ERANGE if not valid, and ENOENT if the attribute is not
 * listed in $AttrDef.
 */
errno_t ntfs_attr_size_bounds_check(const ntfs_volume *vol,
		const ATTR_TYPE type, const s64 size)
{
	ATTR_DEF *ad;

	if (size < 0)
		panic("%s(): size < 0\n", __FUNCTION__);
	/*
	 * $ATTRIBUTE_LIST has a maximum size of 256kiB, but this is not
	 * listed in $AttrDef.
	 */
	if (type == AT_ATTRIBUTE_LIST && size > NTFS_MAX_ATTR_LIST_SIZE)
		return ERANGE;
	/* Get the $AttrDef entry for the attribute @type. */
	ad = ntfs_attr_find_in_attrdef(vol, type);
	if (!ad)
		return ENOENT;
	/* Do the bounds check. */
	if ((sle64_to_cpu(ad->min_size) > 0 &&
			size < sle64_to_cpu(ad->min_size)) ||
			(sle64_to_cpu(ad->max_size) > 0 &&
			size > sle64_to_cpu(ad->max_size)) ||
			(u64)size > NTFS_MAX_ATTRIBUTE_SIZE)
		return ERANGE;
	return 0;
}

/**
 * ntfs_attr_can_be_non_resident - check if an attribute can be non-resident
 * @vol:	ntfs volume to which the attribute belongs
 * @type:	attribute type which to check
 *
 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
 * be non-resident.  This information is obtained from $AttrDef system file.
 *
 * Return 0 if the attribute is allowed to be non-resident, EPERM if not, and
 * ENOENT if the attribute is not listed in $AttrDef.
 */
static errno_t ntfs_attr_can_be_non_resident(const ntfs_volume *vol,
		const ATTR_TYPE type)
{
	ATTR_DEF *ad;

	/* Find the attribute definition record in $AttrDef. */
	ad = ntfs_attr_find_in_attrdef(vol, type);
	if (!ad)
		return ENOENT;
	/* Check the flags and return the result. */
	if (ad->flags & ATTR_DEF_RESIDENT)
		return EPERM;
	return 0;
}

/**
 * ntfs_attr_can_be_resident - check if an attribute can be resident
 * @vol:	ntfs volume to which the attribute belongs
 * @type:	attribute type which to check
 *
 * Check whether the attribute of @type on the ntfs volume @vol is allowed to
 * be resident.  This information is derived from our ntfs knowledge and may
 * not be completely accurate, especially when user defined attributes are
 * present.  Basically we allow everything to be resident except for index
 * allocation attributes.
 *
 * Return 0 if the attribute is allowed to be resident and EPERM if not.
 *
 * Warning: In the system file $MFT the attribute $Bitmap must be non-resident
 *	    otherwise windows will not boot (blue screen of death)!  We cannot
 *	    check for this here as we do not know which inode's $Bitmap is
 *	    being asked about so the caller needs to special case this.
 */
errno_t ntfs_attr_can_be_resident(const ntfs_volume *vol, const ATTR_TYPE type)
{
	if (type == AT_INDEX_ALLOCATION)
		return EPERM;
	return 0;
}

/**
 * ntfs_attr_record_is_only_one - check if an attribute is the only one
 * @m:		the mft record in which the attribute to check resides
 * @a:		the attribute to check
 *
 * Check if the attribute @a is the only attribute record in its mft record @m.
 *
 * Return true if @a is the only attribute record in its mft record @m and
 * false if @a is not the only attribute record in its mft record @m.
 */
BOOL ntfs_attr_record_is_only_one(MFT_RECORD *m, ATTR_RECORD *a)
{
	ATTR_RECORD *first_a, *next_a;

	first_a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
	next_a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
	return (first_a == a && next_a->type == AT_END);
}

/**
 * ntfs_attr_record_delete_internal - delete attribute record from mft record
 * @m:		mft record containing attribute record to delete
 * @a:		attribute record to delete
 *
 * Delete the attribute record @a, i.e. the resident part of the attribute,
 * from the mft record @m.
 *
 * This function cannot fail.
 *
 * Note the caller is responsible for marking the mft record dirty after
 * calling this function.
 */
void ntfs_attr_record_delete_internal(MFT_RECORD *m, ATTR_RECORD *a)
{
	const u32 new_muse = le32_to_cpu(m->bytes_in_use) -
			le32_to_cpu(a->length);
	/* Move attributes following @a into the position of @a. */
	memmove(a, (u8*)a + le32_to_cpu(a->length),
			new_muse - ((u8*)a - (u8*)m));
	/* Adjust @m to reflect the change in used space. */
	m->bytes_in_use = cpu_to_le32(new_muse);
}

/**
 * ntfs_attr_record_delete - delete an attribute record from its mft record
 * @base_ni:	base ntfs inode from which to delete the attribute
 * @ctx:	attribute search context describing attribute record to delete
 *
 * Delete the attribute record, i.e. the resident part of the attribute,
 * described by @ctx->a from its mft record @ctx->m and mark the mft record
 * dirty so it gets written out later.
 *
 * In an attribute list attribute is present also remove the attribute list
 * attribute entry corresponding to the attribute being deleted and update
 * the attribute list attribute record accordingly.
 *
 * If the only attribute in the mft record is the attribute being deleted then
 * instead of deleting the attribute we free the extent mft record altogether
 * taking care to disconnect it from the base ntfs inode in the process.  As
 * above we update the attribute list attribute accordingly.
 *
 * If we end up freeing the extent mft record we go on to check the attribute
 * list attribute and if it no longer references any extent mft records we
 * remove the attribute list attribute altogether and update the base ntfs
 * inode to reflect the changed inode state.
 *
 * Return 0 on success and the error code on error.
 *
 * Note that on success the attribute search record is no longer valid and the
 * caller must either release it by calling ntfs_attr_search_ctx_put() or
 * reinitialize it by calling ntfs_attr_search_ctx_reinit().  Looking at the
 * search context or using it to call other functions would have unpredictable
 * results and could lead to crashes and file system corruption.
 */
errno_t ntfs_attr_record_delete(ntfs_inode *base_ni, ntfs_attr_search_ctx *ctx)
{
	ntfs_inode *ni;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	ATTR_LIST_ENTRY *al_entry;
	errno_t err;
	unsigned al_ofs;
	BOOL al_needed;

	ni = ctx->ni;
	m = ctx->m;
	a = ctx->a;
	ntfs_debug("Entering for attribute type 0x%x located in %s mft "
			"record 0x%llx.  Attribute list attribute is "
			"%spresent.", (unsigned)le32_to_cpu(a->type),
			(base_ni == ni) ? "base" : "extent",
			(unsigned long long)ni->mft_no,
			NInoAttrList(base_ni) ? "" : "not ");
	/*
	 * If there is no attribute list attribute, the mft record must be a
	 * base mft record and thus it cannot be becoming empty as a
	 * consequence of deleting the attribute record.  Thus for inodes
	 * without an attribute list attribute we have a fast path of simply
	 * going ahead and deleting the attribute record and returning.
	 */
	if (!NInoAttrList(base_ni)) {
		ntfs_attr_record_delete_internal(m, a);
		NInoSetMrecNeedsDirtying(base_ni);
		ntfs_debug("Done (no attribute list attribute).");
		return 0;
	}
	if (a->type == AT_ATTRIBUTE_LIST)
		panic("%s(): a->type == AT_ATTRIBUTE_LIST\n", __FUNCTION__);
	al_entry = ctx->al_entry;
	if (!al_entry)
		panic("%s(): !al_entry\n", __FUNCTION__);
	/*
	 * We have an attribute list attribute.  To begin with check if the
	 * attribute to be deleted is in the base mft record or if it is not
	 * the only attribute in the extent mft record.  In both of these cases
	 * we need to delete the attribute record from its mft record.
	 *
	 * Otherwise the attribute to be deleted is in an extent mft record and
	 * it is the only attribute in the extent mft record thus we need to
	 * free the extent mft record instead of deleting the attribute record.
	 */
	if (base_ni == ni || (u8*)m + le16_to_cpu(m->attrs_offset) != (u8*)a ||
			((ATTR_RECORD*)((u8*)a +
			le32_to_cpu(a->length)))->type != AT_END) {
		ntfs_attr_record_delete_internal(m, a);
		/*
		 * If the attribute was not in the base mft record mark the
		 * extent mft record dirty so it gets written out later.  If
		 * the attribute was in the base mft record it will be marked
		 * dirty later when the attribute list attribute record is
		 * updated which is in the base mft record by definition.
		 *
		 * We also unmap the extent mft record so we get to the same
		 * state as in the above case where we freed the extent mft
		 * record and we set @ctx->ni to equal the base inode @base_ni
		 * so that the search context is initialized from scratch or
		 * simply freed if the caller reinitializes or releases the
		 * search context respectively.
		 */
		if (base_ni != ni) {
			NInoSetMrecNeedsDirtying(ni);
			ntfs_extent_mft_record_unmap(ni);
			ctx->ni = base_ni;
		}
	} else {
		err = ntfs_extent_mft_record_free(base_ni, ni, m);
		if (err) {
			/*
			 * Ignore the error as we just end up with an unused
			 * mft record that is marked in use.
			 */
			ntfs_error(ni->vol->mp, "Failed to free extent mft_no "
					"0x%llx (error %d).  Unmount and run "
					"chkdsk to recover the lost inode.",
					(unsigned long long)ni->mft_no, err);
			NVolSetErrors(ni->vol);
			/*
			 * Relese the extent mft record after dirtying it thus
			 * simulating the effect of freeing it.
			 */
			NInoSetMrecNeedsDirtying(ni);
			ntfs_extent_mft_record_unmap(ni);
		}
		/*
		 * The attribute search context still points to the no longer
		 * mapped extent inode thus we need to change it to point to
		 * the base inode instead so the context can be reinitialized
		 * or released safely.
		 */
		ctx->ni = base_ni;
		/*
		 * Check the attribute list attribute.  If there are no other
		 * attribute list attribute entries referencing extent mft
		 * records delete the attribute list attribute altogether.
		 *
		 * If this fails it does not matter as we simply retain the
		 * attribute list attribute so we ignore the error and go on to
		 * delete the attribute list attribute entry instead.
		 *
		 * If there are other attribute list attribute entries
		 * referencing extent mft records we still need the attribute
		 * list attribute thus we go on to delete the attribute list
		 * entry corresponding to the attribute record we just deleted
		 * by freeing its extent mft record.
		 */
		err = ntfs_attr_list_is_needed(base_ni, al_entry, &al_needed);
		if (err)
			ntfs_warning(ni->vol->mp, "Failed to determine if "
					"attribute list attribute of mft_no "
					"0x%llx if still needed (error %d).  "
					"Assuming it is still needed and "
					"continuing.",
					(unsigned long long)base_ni->mft_no,
					err);
		else if (!al_needed) {
			/*
			 * No more extent mft records are in use.  Delete the
			 * attribute list attribute.
			 */
			ntfs_attr_search_ctx_reinit(ctx);
			err = ntfs_attr_list_delete(base_ni, ctx);
			if (!err) {
				/*
				 * We deleted the attribute list attribute and
				 * this will have updated the base inode
				 * appropriately thus we are done.
				 */
				ntfs_debug("Done (deleted attribute list "
						"attribute).");
				return 0;
			}
			ntfs_warning(ni->vol->mp, "Failed to delete attribute "
					"list attribute of mft_no 0x%llx "
					"(error %d).  Continuing by trying to "
					"delete the attribute list entry of "
					"the deleted attribute instead.",
					(unsigned long long)base_ni->mft_no,
					err);
		}
	}
	/*
	 * Both @ctx and @ni are now invalid and cannot be used any more which
	 * is fine as we have finished dealing with the attribute record.
	 *
	 * We now need to delete the corresponding attribute list attribute
	 * entry.
	 */
	al_ofs = (u8*)al_entry - base_ni->attr_list;
	ntfs_attr_list_entry_delete(base_ni, al_entry);
	ntfs_attr_search_ctx_reinit(ctx);
	err = ntfs_attr_list_sync_shrink(base_ni, al_ofs, ctx);
	if (!err) {
		ntfs_debug("Done (deleted attribute list attribute entry).");
		return 0;
	}
	NInoSetMrecNeedsDirtying(base_ni);
	ntfs_error(ni->vol->mp, "Failed to delete attribute list attribute "
			"entry in base mft_no 0x%llx (error %d).  Leaving "
			"inconsistent metadata.  Unmount and run chkdsk.",
			(unsigned long long)base_ni->mft_no, err);
	NVolSetErrors(ni->vol);
	return err;
}

/**
 * ntfs_attr_record_make_space - make space for a new attribute record
 * @m:		mft record in which to make space for the new attribute record
 * @a:		attribute record in front of which to make space
 * @size:	byte size of the new attribute record for which to make space
 *
 * Make space for a new attribute record of size @size in the mft record @m, in
 * front of the existing attribute record @a.
 *
 * Return 0 on success and errno on error.  The following error codes are
 * defined:
 *	ENOSPC - Not enough space in the mft record @m.
 *
 * Note: On error, no modifications have been performed whatsoever.
 */
errno_t ntfs_attr_record_make_space(MFT_RECORD *m, ATTR_RECORD *a, u32 size)
{
	u32 new_muse;
	const u32 muse = le32_to_cpu(m->bytes_in_use);
	/* Align to 8 bytes if it is not already done. */
	if (size & 7)
		size = (size + 7) & ~7;
	new_muse = muse + size;
	/* Not enough space in this mft record. */
	if (new_muse > le32_to_cpu(m->bytes_allocated))
		return ENOSPC;
	/* Move attributes starting with @a to make space of @size bytes. */
	memmove((u8*)a + size, a, muse - ((u8*)a - (u8*)m));
	/* Adjust @m to reflect the change in used space. */
	m->bytes_in_use = cpu_to_le32(new_muse);
	/* Clear the created space so we start with a clean slate. */
	bzero(a, size);
	/*
	 * Set the attribute size in the newly created attribute, now at @a.
	 * We do this here so that the caller does not need to worry about
	 * rounding up the size to set the attribute length.
	 */
	a->length = cpu_to_le32(size);
	return 0;
}

/**
 * ntfs_attr_record_resize - resize an attribute record
 * @m:		mft record containing attribute record
 * @a:		attribute record to resize
 * @new_size:	new size in bytes to which to resize the attribute record @a
 *
 * Resize the attribute record @a, i.e. the resident part of the attribute, in
 * the mft record @m to @new_size bytes.
 *
 * Return 0 on success and errno on error.  The following error codes are
 * defined:
 *	ENOSPC	- Not enough space in the mft record @m to perform the resize.
 *
 * Note: On error, no modifications have been performed whatsoever.
 *
 * Warning: If you make a record smaller without having copied all the data you
 *	    are interested in the data may be overwritten.
 */
errno_t ntfs_attr_record_resize(MFT_RECORD *m, ATTR_RECORD *a, u32 new_size)
{
	const u32 old_size = le32_to_cpu(a->length);

	ntfs_debug("Entering for new_size %u.", new_size);
	/* Align to 8 bytes if it is not already done. */
	if (new_size & 7)
		new_size = (new_size + 7) & ~7;
	/* If the actual attribute length has changed, move things around. */
	if (new_size != old_size) {
		const u32 muse = le32_to_cpu(m->bytes_in_use);
		const u32 new_muse = muse - old_size + new_size;
		/* Not enough space in this mft record. */
		if (new_muse > le32_to_cpu(m->bytes_allocated))
			return ENOSPC;
		/* Move attributes following @a to their new location. */
		memmove((u8*)a + new_size, (u8*)a + old_size,
				muse - ((u8*)a - (u8*)m) - old_size);
		/* Adjust @m to reflect the change in used space. */
		m->bytes_in_use = cpu_to_le32(new_muse);
		/* Adjust @a to reflect the new size. */
		if (new_size >= offsetof(ATTR_REC, length) + sizeof(a->length))
			a->length = cpu_to_le32(new_size);
	}
	return 0;
}

/**
 * ntfs_attr_mapping_pairs_update - update an attribute's mapping pairs array
 * @base_ni:	base ntfs inode to which the attribute belongs
 * @ni:		ntfs inode of attribute whose mapping pairs array to update
 * @first_vcn:	first vcn which to update in the mapping pairs array
 * @last_vcn:	last vcn which to update in the mapping pairs array
 * @ctx:	search context describing the attribute to work on or NULL
 *
 * Create or update the mapping pairs arrays from the locked runlist of the
 * attribute @ni, i.e. @ni->rl, starting at vcn @first_vcn and finishing with
 * vcn @last_vcn.  The update can actually start before @first_vcn and finish
 * after @last_vcn but guarantees to at least include the range between
 * @first_vcn and @last_vcn, inclusive.
 *
 * This function is called from a variety of places after clusters have been
 * allocated to and/or freed from an attribute.  The runlist has already been
 * updated to reflect the allocated/freed clusters.  This functions takes the
 * modified runlist range and syncs it to the attribute record(s) by
 * compressing the runlist into mapping pairs array fragments and writing them
 * into the attribute record(s) of the attribute.
 *
 * This function also updates the attribute sizes using the values from the
 * ntfs inode @ni and syncs them to the base attribute record and if the
 * attribute has become sparse but the attribute record is not marked sparse or
 * the attribute is no longer sparse but the attribute record is marked sparse
 * the base attribute record is updated to reflect the changed state which
 * involves setting/clearing the sparse flag as well as the addition/removal of
 * the compressed size to the attribute record.  When the compressed size is
 * added this can lead to a larger portion of the mapping pairs array being
 * updated because there may not be enough space in the mft record to extend
 * the base attribute record to fit the compressed size.  When updating the
 * attribute record the compression state of the attribute is also taken into
 * consideration as the compressed size is used both with compressed and sparse
 * attributes.
 *
 * The update can involve the allocation/freeing of extent mft records and/or
 * extent attribute records.  If this happens the attribute list attribute in
 * the base ntfs inode @base_ni is updated appropriately both in memory and in
 * the attribute list attribute record in the base mft record.
 *
 * A @last_vcn of -1 means end of runlist and in that case the mapping pairs
 * array corresponding to the runlist starting at vcn @first_vcn and finishing
 * at the end of the runlist is updated.
 *
 * If @ctx is NULL, it is assumed that the attribute mft record is not mapped
 * and hence a new search context is allocated, the mft record is mapped, and
 * the attribute is looked up.  On completion the allocated search context is
 * released if it was allocated by ntfs_attr_mapping_pairs_update().
 *
 * Return 0 on success and errno on error.
 *
 * Locking: The runlist @ni->rl must be locked for writing, it remains locked
 *	    throughout, and is left locked upon return.
 */
#if 0
errno_t ntfs_attr_mapping_pairs_update(ntfs_inode *base_ni, ntfs_inode *ni,
		VCN first_vcn, VCN last_vcn, ntfs_attr_search_ctx *ctx)
{
	VCN lowest_vcn, highest_vcn, stop_vcn;
	ntfs_volume *vol;
	ATTR_RECORD *a;
	errno_t err;
	BOOL mpa_is_valid, was_sparse, is_sparse;
	ntfs_attr_search_ctx attr_ctx;

	ntfs_debug("Entering for base mft_no 0x%llx, attribute type 0x%x, "
			"name len 0x%x, first_vcn 0x%llx, last_vcn 0x%llx, "
			"ctx is %spresent.",
			(unsigned long long)base_ni->mft_no,
			(unsigned)le32_to_cpu(ni->type), ni->name_len,
			(unsigned long long)first_vcn,
			(unsigned long long)last_vcn,
			ctx ? "" : "not ");
	vol = base_ni->vol;
	/*
	 * If no search context was specified use ours, initialize it, and look
	 * up the base attribute record so we can update the sizes, flags, and
	 * add/remove the compressed size if needed.
	 *
	 * We also need to look up the base attribute record if a search
	 * context was specified but it points to an extent attribute record.
	 */
	if (!ctx || ctx->a->lowest_vcn) {
		if (!ctx) {
			MFT_RECORD *base_m;

			err = ntfs_mft_record_map(base_ni, &base_m);
			if (err) {
				ntfs_error(vol->mp, "Failed to map mft_no "
						"0x%llx (error %d).",
						(unsigned long long)
						base_ni->mft_no, err);
				return err;
			}
			ctx = &attr_ctx;
			ntfs_attr_search_ctx_init(ctx, base_ni, base_m);
		}
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0,
				NULL, 0, ctx);
		if (err) {
			if (err == ENOENT)
				err = EIO;
			ntfs_error(vol->mp, "Failed to look up base attribute "
					"record in mft_no 0x%llx (error %d).",
					(unsigned long long)base_ni->mft_no,
					err);
			goto err;
		}
	}
	a = ctx->a;
	if (!NInoNonResident(ni) || !a->non_resident)
		panic("%s(): !NInoNonResident(ni) || !a->non_resident\n",
				__FUNCTION__);
	mpa_is_valid = TRUE;
	/*
	 * If the attribute was sparse and is no longer sparse or it was not
	 * sparse and is now sparse, update the sparse state and add/remove the
	 * compressed size.
	 */
	was_sparse = a->flags & ATTR_IS_SPARSE;
	is_sparse = NInoSparse(ni);
	if (was_sparse == is_sparse)
		goto sparse_done;
	if (is_sparse) {
		a->flags |= ATTR_IS_SPARSE;
		if (NInoCompressed(ni))
			goto sparse_done;
		if (a->flags & ATTR_IS_COMPRESSED)
			panic("%s(): a->flags & ATTR_IS_COMPRESSED\n",
					__FUNCTION__);
		/*
		 * Add the compressed size and set up the relevant fields in
		 * the attribute record.
		 *
		 * If there is enough space in the mft record and we do not
		 * need to rewrite the mapping pairs array in this attribute
		 * record, resize the attribute record and move the mapping
		 * pairs array.
		 *
		 * If there is not enough space to perform the resize then do
		 * not preserve the mapping pairs array in this attribute
		 * record.
		 *
		 * If there still is not enough space to add the compressed
		 * size move the attribute record to an extent mft record (this
		 * cannot be the only attribute record in the current mft
		 * record).  If we do this do not preserve the mapping pairs
		 * array so we can make better use of the extent mft record.
		 *
		 * Note we need to ensure we have already mapped the runlist
		 * fragment described by the current mapping pairs array if we
		 * are not going to preserve it or we would lose the data.
		 */
		a->compression_unit = 0;
		if (vol->major_ver <= 1)
			a->compression_unit = NTFS_COMPRESSION_UNIT;
restart_compressed_size_add:
		if ((first_vcn > sle64_to_cpu(a->highest_vcn) + 1) &&
				!(err = ntfs_attr_record_resize(ctx->m, a,
				le32_to_cpu(a->length) +
				sizeof(a->compressed_size)))) {
			/*
			 * Move everything at the offset of the compressed size
			 * to make space for the compressed size.
			 */
			memmove((u8*)a + offsetof(ATTR_RECORD,
					compressed_size) +
					sizeof(a->compressed_size), (u8*)a +
					offsetof(ATTR_RECORD, compressed_size),
					le32_to_cpu(a->length) - offsetof(
					ATTR_RECORD, compressed_size));
			/*
			 * Update the name offset to match the moved data.  If
			 * there is no name then set the name offset to the
			 * correct position instead of adding to a potentially
			 * incorrect value.
			 */
			if (a->name_length)
				a->name_offset = cpu_to_le16(le16_to_cpu(
						a->name_offset) +
						sizeof(a->compressed_size));
			else
				a->name_offset = const_cpu_to_le16(offsetof(
						ATTR_RECORD, compressed_size) +
						sizeof(a->compressed_size));
			/* Update the mapping pairs offset. */
			mp_ofs = le16_to_cpu(a->mapping_pairs_offset) +
					sizeof(a->compressed_size);
			goto sparse_done;
		}
		/* Ensure this runlist fragment is mapped. */
		if (ni->allocated_size && (!ni->rl.elements ||
				ni->rl.rl->lcn == LCN_RL_NOT_MAPPED)) {
			err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
			if (err) {
				ntfs_error(vol->mp, "Failed to decompress "
						"mapping pairs array (error "
						"%d).", err);
				goto err;
			}
		}
		/*
		 * Check whether the attribute is big enough to have the
		 * compressed size added to it.  We need at the very least
		 * space for the record header, the name, and a zero byte for
		 * an empty mapping pairs array and we need to allow for all
		 * the needed alignment padding.
		 */
		if (((sizeof(ATTR_RECORD) + a->name_length * sizeof(ntfschar) +
				7) & ~7) + 8 <= le32_to_cpu(a->length)) {
add_compressed_size:
			/*
			 * Move the name back to the new end of the attribute
			 * record header thus adding the compressed size.
			 */
			if (a->name_length)
				memmove((u8*)a + sizeof(ATTR_RECORD), (u8*)a +
						le16_to_cpu(a->name_offset),
						a->name_length *
						sizeof(ntfschar));
			/*
			 * Update the name offset and the mapping pairs offset
			 * to match the moved name.
			 */
			a->name_offset = const_cpu_to_le16(sizeof(ATTR_RECORD));
			a->mapping_pairs_offset = cpu_to_le16(
					(sizeof(ATTR_RECORD) + a->name_length *
					sizeof(ntfschar) + 7) & ~7);
			/*
			 * We no longer have a valid mapping pairs array in the
			 * current attribute record.
			 */
			mpa_is_valid = FALSE;
			goto sparse_done;
		}
		/*
		 * The attribute record is not big enough so try to extend it
		 * (in case we did not try to extend it above).
		 */
		err = ntfs_attr_record_resize(ctx->m, a,
				((sizeof(ATTR_RECORD) + a->name_length *
				sizeof(ntfschar) + 7) & ~7) + 8);
		if (!err)
			goto add_compressed_size;
		/*
		 * The attribute record cannot be the only one in the mft
		 * record if it is not large enough to hold an empty attribute
		 * record and there is not enough space to grow it.
		 */
		if (ntfs_attr_record_is_only_one(ctx->m, a))
			panic("%s(): ntfs_attr_is_only_one(ctx->m, a)\n",
					__FUNCTION__);
		/*
		 * This is our last resort.  Move the attribute to an extent
		 * mft record.
		 *
		 * First, add the attribute list attribute if it is not already
		 * present.
		 */
		if (!NInoAttrList(base_ni)) {
			err = ntfs_attr_list_add(base_ni, ctx->m, ctx);
			if (err || ctx->is_error) {
				if (!err)
					err = ctx->error;
				ntfs_error(vol->mp, "Failed to %s mft_no "
						"0x%llx (error %d).",
						ctx->is_error ?
						"remap extent mft record of" :
						"add attribute list attribute "
						"to", (unsigned long long)
						base_ni->mft_no, err);
				goto err;
			}
			/*
			 * The attribute location will have changed so update
			 * it from the search context.
			 */
			a = ctx->a;
			/*
			 * Retry the attribute record resize as we may now have
			 * enough space to add the compressed size.
			 *
			 * This can for example happen when the attribute was
			 * moved out to an extent mft record which has much
			 * more free space than the base mft record had or of
			 * course other attributes may have been moved out to
			 * extent mft records which has created enough space in
			 * the base mft record.
			 *
			 * If the attribute record was moved to an empty extent
			 * mft record this is the same case as if we moved the
			 * attribute record below so treat it the same, i.e. we
			 * do not preserve the mapping pairs array and use the
			 * maximum possible size for the mft record to allow us
			 * to consolidate the mapping pairs arrays.
			 */
			if (ntfs_attr_record_is_only_one(ctx->m, a))
				goto attr_is_only_one;
			goto restart_compressed_size_add;
		}
		/* Move the attribute to an extent mft record. */
		lck_rw_lock_shared(&base_ni->attr_list_rl.lock);
		err = ntfs_attr_record_move(ctx);
		lck_rw_unlock_shared(&base_ni->attr_list_rl.lock);
		if (err) {
			ntfs_error(vol->mp, "Failed to move attribute extent "
					"from mft record 0x%llx to an extent "
					"mft record (error %d).",
					(unsigned long long)ctx->ni->mft_no,
					err);
			/*
			 * We could try to remove the attribute list attribute
			 * if we added it above but this will require
			 * attributes to be moved back into the base mft record
			 * from extent mft records so is a lot of work and
			 * given we are in an error code path and given that it
			 * is ok to just leave the inode with an attribute list
			 * attribute we do not bother and just bail out.
			 */
			goto err;
		}
		/*
		 * The attribute location will have changed so update it from
		 * the search context.
		 */
		a = ctx->a;
attr_is_only_one:
		/*
		 * We now have enough space to add the compressed size so
		 * resize the attribute record.  Note we do not want to
		 * preserve the mapping pairs array as we will have
		 * significanly more space in the extent mft record thus we
		 * want to consolidate the mapping pairs arrays which is why we
		 * resize straight to the maximum possible size for the mft
		 * record.
		 */
		err = ntfs_attr_record_resize(ctx->m, a,
				le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use) +
				le32_to_cpu(a->length));
		if (err)
			panic("%s(): err - resize failed\n", __FUNCTION__);
		if (((sizeof(ATTR_RECORD) + a->name_length * sizeof(ntfschar) +
				7) & ~7) + 8 > le32_to_cpu(a->length))
			panic("%s(): attribute record is still too small\n",
					__FUNCTION__);
		goto add_compressed_size;
	}
	/* The attribute is becoming non-sparse. */
	a->flags &= ~ATTR_IS_SPARSE;
	if (NInoCompressed(ni))
		goto sparse_done;
	if (a->flags & ATTR_IS_COMPRESSED)
		panic("%s(): a->flags & ATTR_IS_COMPRESSED\n", __FUNCTION__);
	/*
	 * Remove the compressed size and set up the relevant fields in the
	 * attribute record.
	 *
	 * If we do not need to rewrite the mapping pairs array in this
	 * attribute record, move the mapping pairs array and then resize the
	 * attribute record.
	 *
	 * Note we need to ensure we have already mapped the runlist fragment
	 * described by the current mapping pairs array if we are not going to
	 * preserve it or we would lose the data.
	 */
	a->compression_unit = 0;
	if (first_vcn > sle64_to_cpu(a->highest_vcn) + 1) {
		/*
		 * Move everything after the compressed size forward to the
		 * offset of the compressed size thus deleting the compressed
		 * size.
		 */
		memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size),
				(u8*)a + offsetof(ATTR_RECORD,
				compressed_size) + sizeof(a->compressed_size),
				le32_to_cpu(a->length) - (offsetof(ATTR_RECORD,
				compressed_size) + sizeof(a->compressed_size)));
		/*
		 * Update the name offset and the mapping pairs offset to match
		 * the moved data.  If there is no name then set the name
		 * offset to the correct position instead of subtracting from a
		 * potentially incorrect value.
		 */
		if (!a->name_length)
			a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD,
					compressed_size));
		else
			a->name_offset = cpu_to_le16(
					le16_to_cpu(a->name_offset) -
					sizeof(a->compressed_size));
		a->mapping_pairs_offset = cpu_to_le16(
				le16_to_cpu(a->mapping_pairs_offset) -
				sizeof(a->compressed_size));
		/*
		 * Shrink the attribute record to reflect the removal of the
		 * compressed size.  Note this cannot fail since we are making
		 * the attribute smaller thus by definition there there is
		 * enough space to do so.
		 */
		err = ntfs_attr_record_resize(ctx->m, a, le32_to_cpu(
				a->length) - sizeof(a->compressed_size));
		if (err)
			panic("%s(): err\n", __FUNCTION__);
		goto sparse_done;
	}
	/* Ensure this runlist fragment is mapped. */
	if (ni->allocated_size && (!ni->rl.elements ||
			ni->rl.rl->lcn == LCN_RL_NOT_MAPPED)) {
		err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
		if (err) {
			ntfs_error(vol->mp, "Failed to decompress mapping "
					"pairs array (error %d).", err);
			goto err;
		}
	}
	mpa_is_valid = FALSE;
	/*
	 * Move the name forward to the offset of the compressed size thus
	 * deleting the compressed size.
	 */
	if (a->name_length)
		memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size),
				(u8*)a + le16_to_cpu(a->name_offset),
				a->name_length * sizeof(ntfschar));
	/*
	 * Update the name offset and the mapping pairs offset to match the
	 * moved name.
	 */
	a->name_offset = const_cpu_to_le16(
			offsetof(ATTR_RECORD, compressed_size));
	a->mapping_pairs_offset = cpu_to_le16(
			(offsetof(ATTR_RECORD, compressed_size) +
			(a->name_length * sizeof(ntfschar)) + 7) & ~7);
sparse_done:
	/*
	 * Update the attribute sizes.
	 *
	 * TODO: Need to figure out whether we really need to update the data
	 * and initialized sizes or whether updating just the allocated and
	 * compressed sizes is sufficient in which case we can save a few CPU
	 * cycles by not updating the data and initialized sizes here.
	 */
	lck_spin_lock(&ni->size_lock);
	a->allocated_size = cpu_to_sle64(ni->allocated_size);
	a->data_size = cpu_to_sle64(ni->data_size);
	a->initialized_size = cpu_to_sle64(ni->initialized_size);
	if (a->flags & (ATTR_IS_COMPRESSED | ATTR_IS_SPARSE))
		a->compressed_size = cpu_to_sle64(ni->compressed_size);
	lck_spin_unlock(&ni->size_lock);
	/*
	 * If the current mapping pairs array is valid and the first vcn at
	 * which we need to update the mapping pairs array is not in this
	 * attribute extent, look up the attribute extent containing the first
	 * vcn.
	 */
	if (mpa_is_valid && first_vcn > sle64_to_cpu(a->highest_vcn) + 1) {
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				first_vcn, NULL, 0, ctx);
		if (err) {
			if (err == ENOENT)
				err = EIO;
			ntfs_error(vol->mp, "Failed to look up extent "
					"attribute record containing VCN "
					"0x%llx in mft_no 0x%llx (error %d).",
					(unsigned long long)first_vcn,
					(unsigned long long)base_ni->mft_no,
					err);
			goto err;
		}
		a = ctx->a;
	}
	/*
	 * We need to rebuild the mapping pairs array in this attribute extent.
	 * But first, check if we can grow the attribute extent.  If this is
	 * the base extent and the attribute is not sparse nor compressed and
	 * it is allowed to be sparse then reserve the size of the compressed
	 * size field in the mft record so it is easier to make the attribute
	 * sparse later on.
	 *
	 * FIXME: But we don't want to do that if the attribute extent is in
	 * the base mft record and the attribute is $DATA or $INDEX_ALLOCATION,
	 * etc as we want to keep the first extent of theese base attribute
	 * extents in the base mft record thus we have to keep them small to
	 * allow the attribute list attribute to grow over time.
	 *
	 * FIXME: Need to make sure we map any unmapped regions of the runlist
	 * when determining the size of the mapping pairs array.
	 *
	 * FIXME: If we don't impose a last vcn when getting the size it would
	 * just cause the entirety of the mapping pairs array starting with the
	 * current extent to be mapped in, which is not necessarilly a bad
	 * thing as it will then be already mapped for all subsequent writes.
	 *
	 * FIXME: We do not want to keep rewriting the entire mapping pairs
	 * array every time we fill a hole so need to be careful when
	 * consolidating the mapping pairs array fragments.  OTOH we do not
	 * want to end up with millions of very short attribute extents so need
	 * to be careful about that, too.
	 */
// TODO: I AM HERE: 
	ntfs_error(vol->mp, "FIXME: TODO...");
	return ENOTSUP;
	ntfs_debug("Done.");
	return 0;
err:
	/*
	 * If we mapped the mft record and looked up the attribute, release the
	 * mapped mft record(s) here.
	 */
	if (ctx == &attr_ctx) {
		if (ctx->ni != base_ni)
			ntfs_extent_mft_record_unmap(ctx->ni);
		ntfs_mft_record_unmap(base_ni);
	}
	return err;
}
#endif

/**
 * ntfs_resident_attr_record_insert_internal - insert a resident attribute
 * @m:		mft record in which to insert the resident attribute
 * @a:		attribute in front of which to insert the new attribute
 * @type:	attribute type of new attribute
 * @name:	Unicode name of new attribute
 * @name_len:	Unicode character size of name of new attribute
 * @val_len:	byte size of attribute value of new attribute
 *
 * Insert a new resident attribute in the mft record @m, in front of the
 * existing attribute record @a.  The new attribute is of type @type, and has a
 * name of @name which is @name_len Unicode characters long.  The new attribute
 * value is @val_len bytes and is initialized to zero.
 *
 * Note: If the inode uses the attribute list attribute the caller is
 * responsible for adding an entry for the inserted attribute to the attribute
 * list attribute.
 *
 * Return 0 on success and errno on error.  The following error codes are
 * defined:
 *	ENOSPC - Not enough space in the mft record @m.
 *
 * Note: On error, no modifications have been performed whatsoever.
 */
errno_t ntfs_resident_attr_record_insert_internal(MFT_RECORD *m,
		ATTR_RECORD *a, const ATTR_TYPE type, const ntfschar *name,
		const u8 name_len, const u32 val_len)
{
	unsigned name_ofs, val_ofs;

	/*
	 * Calculate the offset into the new attribute at which the attribute
	 * name begins.  The name is placed directly after the resident
	 * attribute record itself.
	 */
	name_ofs = offsetof(ATTR_RECORD, reservedR) + sizeof(a->reservedR);
	/*
	 * Calculate the offset into the new attribute at which the attribute
	 * value begins.  The attribute value is placed after the name aligned
	 * to an 8-byte boundary.
	 */
	val_ofs = name_ofs + (((name_len << NTFSCHAR_SIZE_SHIFT) + 7) & ~7);
	/*
	 * Work out the size for the attribute record.  We simply take the
	 * offset to the attribute value we worked out above and add the size
	 * of the attribute value in bytes aligned to an 8-byte boundary.  Note
	 * we do not need to do the alignment as ntfs_attr_record_make_space()
	 * does it anyway.
	 */
	if (ntfs_attr_record_make_space(m, a, val_ofs + val_len))
		return ENOSPC;
	/*
	 * Now setup the new attribute record.  The entire attribute has been
	 * zeroed and the length of the attribute record has been set up by
	 * ntfs_attr_record_make_space().
	 */
	a->type = type;
	a->name_length = name_len;
	a->name_offset = cpu_to_le16(name_ofs);
	a->instance = m->next_attr_instance;
	/*
	 * Increment the next attribute instance number in the mft record as we
	 * consumed the old one.
	 */
	m->next_attr_instance = cpu_to_le16(
			(le16_to_cpu(m->next_attr_instance) + 1) & 0xffff);
	a->value_length = cpu_to_le32(val_len);
	a->value_offset = cpu_to_le16(val_ofs);
	if (type == AT_FILENAME)
		a->resident_flags = RESIDENT_ATTR_IS_INDEXED;
	/* Copy the attribute name into place. */
	if (name_len)
		memcpy((u8*)a + name_ofs, name,
				name_len << NTFSCHAR_SIZE_SHIFT);
	return 0;
}

/**
 * ntfs_resident_attr_record_insert - insert a resident attribute record
 * @ni:		base ntfs inode to which the attribute is being added
 * @ctx:	search context describing where to insert the resident attribute
 * @type:	attribute type of new attribute
 * @name:	Unicode name of new attribute
 * @name_len:	Unicode character size of name of new attribute
 * @val:	attribute value of new attribute (optional, can be NULL)
 * @val_len:	byte size of attribute value of new attribute
 *
 * Insert a new resident attribute in the base ntfs inode @ni at the position
 * indicated by the attribute search context @ctx and add an attribute list
 * attribute entry for it if the inode uses the attribute list attribute.
 *
 * The new attribute is of type @type, has a name of @name which is @name_len
 * Unicode characters long, and has a value of @val with size @val_len bytes.
 * If @val is NULL, the value of size @val_len is zeroed.
 *
 * If @val is NULL, the caller is responsible for marking the extent mft record
 * the attribute is in dirty.  We do it this way because we assume the caller
 * is going to modify the attribute further and will then mark it dirty.
 *
 * If the attribute is in the base mft record then the caller is always
 * responsible for marking the mft record dirty.
 *
 * Return 0 on success and errno on error.
 *
 * WARNING: Regardless of whether success or failure is returned, you need to
 *	    check @ctx->is_error and if 1 the @ctx is no longer valid, i.e. you
 *	    need to either call ntfs_attr_search_ctx_reinit() or
 *	    ntfs_attr_search_ctx_put() on it.  In that case @ctx->error will
 *	    give you the error code for why the mapping of the inode failed.
 */
errno_t ntfs_resident_attr_record_insert(ntfs_inode *ni,
		ntfs_attr_search_ctx *ctx, const ATTR_TYPE type,
		const ntfschar *name, const u8 name_len,
		const void *val, const u32 val_len)
{
	ntfs_volume *vol;
	MFT_RECORD *base_m, *m;
	ATTR_RECORD *a;
	ATTR_LIST_ENTRY *al_entry;
	unsigned name_ofs, val_ofs, al_entry_used, al_entry_len, new_al_size;
	unsigned new_al_alloc;
	errno_t err;
	BOOL al_entry_added;

	ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, name_len "
			"0x%x, val_len 0x%x.", (unsigned long long)ni->mft_no,
			(unsigned)le32_to_cpu(type), name_len, val_len);
	vol = ni->vol;
	/*
	 * Calculate the offset into the new attribute at which the attribute
	 * name begins.  The name is placed directly after the resident
	 * attribute record itself.
	 */
	name_ofs = offsetof(ATTR_RECORD, reservedR) + sizeof(a->reservedR);
	/*
	 * Calculate the offset into the new attribute at which the attribute
	 * value begins.  The attribute value is placed after the name aligned
	 * to an 8-byte boundary.
	 */
	val_ofs = name_ofs + (((name_len << NTFSCHAR_SIZE_SHIFT) + 7) & ~7);
	/*
	 * Work out the size for the attribute record.  We simply take the
	 * offset to the attribute value we worked out above and add the size
	 * of the attribute value in bytes aligned to an 8-byte boundary.  Note
	 * we do not need to do the alignment as ntfs_attr_record_make_space()
	 * does it anyway.
	 */
	/*
	 * The current implementation of ntfs_attr_lookup() will always return
	 * pointing into the base mft record when an attribute is not found.
	 */
	base_m = ctx->m;
retry:
	if (ni != ctx->ni)
		panic("%s(): ni != ctx->ni\n", __FUNCTION__);
	m = ctx->m;
	a = ctx->a;
	err = ntfs_attr_record_make_space(m, a, val_ofs + val_len);
	if (err) {
		ntfs_inode *eni;

		if (err != ENOSPC)
			panic("%s(): err != ENOSPC\n", __FUNCTION__);
		/*
		 * There was not enough space in the mft record to insert the
		 * new attribute record which means we will need to insert it
		 * into an extent mft record.
		 *
		 * To avoid bugs and impossible situations, check that the
		 * attribute is not already the only attribute in the mft
		 * record otherwise moving it would not give us anything.
		 */
		if (ntfs_attr_record_is_only_one(m, a))
			panic("%s(): ntfs_attr_record_is_only_one(m, a)\n",
					__FUNCTION__);
		/*
		 * Before we can allocate an extent mft record, we need to
		 * ensure that the inode has an attribute list attribute.
		 */
		if (!NInoAttrList(ni)) {
			err = ntfs_attr_list_add(ni, m, NULL);
			if (err) {
				ntfs_error(vol->mp, "Failed to add attribute "
						"list attribute to mft_no "
						"0x%llx (error %d).",
						(unsigned long long)ni->mft_no,
						err);
				return err;
			}
			/*
			 * Adding the attribute list attribute may have
			 * generated enough space in the base mft record to
			 * fit the attribute so try again.
			 */
			ntfs_attr_search_ctx_reinit(ctx);
			err = ntfs_attr_lookup(type, name, name_len, 0, val,
					val_len, ctx);
			if (err == ENOENT) {
				/*
				 * The current implementation of
				 * ntfs_attr_lookup() will always return
				 * pointing into the base mft record when an
				 * attribute is not found.
				 */
				if (m != ctx->m)
					panic("%s(): m != ctx->m\n",
							__FUNCTION__);
				goto retry;
			}
			/*
			 * We cannot have found the attribute as we have
			 * exclusive access and know that it does not exist
			 * already.
			 */
			if (!err)
				panic("%s(): !err\n", __FUNCTION__);
			/*
			 * Something has gone wrong.  Note we have to bail out
			 * as a failing attribute lookup indicates corruption
			 * and/or disk failure and/or not enough memory all of
			 * which would prevent us from rolling back the
			 * attribute list attribute addition.
			 */
			ntfs_error(vol->mp, "Failed to add attribute type "
					"0x%x to mft_no 0x%llx because looking "
					"up the attribute failed (error %d).",
					(unsigned)le32_to_cpu(type),
					(unsigned long long)ni->mft_no, -err);
			return err;
		}
		/*
		 * We now need to allocate a new extent mft record, attach it
		 * to the base ntfs inode and set up the search context to
		 * point to it, then insert the new attribute into it.
		 */
		err = ntfs_mft_record_alloc(vol, NULL, NULL, ni, &eni, &m, &a);
		if (err) {
			ntfs_error(vol->mp, "Failed to add attribute type "
					"0x%x to mft_no 0x%llx because "
					"allocating a new extent mft record "
					"failed (error %d).",
					(unsigned)le32_to_cpu(type),
					(unsigned long long)ni->mft_no, err);
			/*
			 * If we added the attribute list attribute above we
			 * now remove it again.  This may require moving
			 * attributes back into the base mft record so is not a
			 * trivial amount of work and in the end it does not
			 * really matter if we leave an inode with an attribute
			 * list attribute that does not really need it.  So it
			 * will only be removed if there are no extent mft
			 * records at all, i.e. if adding the attribute list
			 * attribute did not cause any attribute records to be
			 * moved out to extent mft records.
			 */
			al_entry_added = FALSE;
			al_entry = NULL;
			goto remove_al;
		}
		ctx->m = m;
		ctx->a = a;
		ctx->ni = eni;
		/*
		 * Make space for the new attribute.  This cannot fail as we
		 * now have an empty mft record which by definition can hold
		 * a maximum size resident attribute record.
		 */
		err = ntfs_attr_record_make_space(m, a, val_ofs + val_len);
		if (err)
			panic("%s(): err (ntfs_attr_record_make_space())\n",
					__FUNCTION__);
	}
	/*
	 * Now setup the new attribute record.  The entire attribute has been
	 * zeroed and the length of the attribute record has been set up by
	 * ntfs_attr_record_make_space().
	 */
	a->type = type;
	a->name_length = name_len;
	a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD, reservedR) +
			sizeof(a->reservedR));
	a->instance = m->next_attr_instance;
	/*
	 * Increment the next attribute instance number in the mft record as we
	 * consumed the old one.
	 */
	m->next_attr_instance = cpu_to_le16(
			(le16_to_cpu(m->next_attr_instance) + 1) & 0xffff);
	a->value_length = cpu_to_le32(val_len);
	a->value_offset = cpu_to_le16(val_ofs);
	if (type == AT_FILENAME)
		a->resident_flags = RESIDENT_ATTR_IS_INDEXED;
	/* Copy the attribute name into place. */
	if (name_len)
		memcpy((u8*)a + name_ofs, name,
				name_len << NTFSCHAR_SIZE_SHIFT);
	/* If a value is specified, copy it into place. */
	if (val) {
		memcpy((u8*)a + le16_to_cpu(a->value_offset), val, val_len);
		/*
		 * Ensure the mft record containing the new filename attribute
		 * gets written out.
		 */
		if (ctx->ni != ni)
			NInoSetMrecNeedsDirtying(ctx->ni);
	}
	/*
	 * If the inode does not use the attribute list attribute we are done.
	 *
	 * If the inode uses the attribute list attribute (including the case
	 * where we just created it), we need to add an attribute list
	 * attribute entry for the attribute.
	 */
	if (!NInoAttrList(ni))
		goto done;
	/* Add an attribute list attribute entry for the inserted attribute. */
	al_entry = ctx->al_entry;
	al_entry_used = offsetof(ATTR_LIST_ENTRY, name) +
			(name_len << NTFSCHAR_SIZE_SHIFT);
	al_entry_len = (al_entry_used + 7) & ~7;
	new_al_size = ni->attr_list_size + al_entry_len;
	/* Out of bounds checks. */
	if ((u8*)al_entry < ni->attr_list ||
			(u8*)al_entry > ni->attr_list + new_al_size ||
			(u8*)al_entry + al_entry_len >
			ni->attr_list + new_al_size) {
		/* Inode is corrupt. */
		ntfs_error(vol->mp, "Mft_no 0x%llx is corrupt.  Run chkdsk.",
				(unsigned long long)ni->mft_no);
		err = EIO;
		goto undo;
	}
	err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST, new_al_size);
	if (err) {
		if (err == ERANGE) {
			ntfs_error(vol->mp, "Cannot insert attribute into "
					"mft_no 0x%llx because the attribute "
					"list attribute would become too "
					"large.  You need to defragment your "
					"volume and then try again.",
					(unsigned long long)ni->mft_no);
			err = ENOSPC;
		} else {
			ntfs_error(vol->mp, "Attribute list attribute is "
					"unknown on the volume.  The volume "
					"is corrupt.  Run chkdsk.");
			NVolSetErrors(vol);
			err = EIO;
		}
		goto undo;
	}
	/*
	 * Reallocate the memory buffer if needed and create space for the new
	 * entry.
	 */
	new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) &
			~(NTFS_ALLOC_BLOCK - 1);
	if (new_al_alloc > ni->attr_list_alloc) {
		u8 *tmp, *al, *al_end;
		unsigned al_entry_ofs;

		tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag);
		if (!tmp) {
			ntfs_error(vol->mp, "Not enough memory to extend "
					"attribute list attribute of mft_no "
					"0x%llx.",
					(unsigned long long)ni->mft_no);
			err = ENOMEM;
			goto undo;
		}
		al = ni->attr_list;
		al_entry_ofs = (u8*)al_entry - al;
		al_end = al + ni->attr_list_size;
		memcpy(tmp, al, al_entry_ofs);
		if ((u8*)al_entry < al_end)
			memcpy(tmp + al_entry_ofs + al_entry_len,
					al + al_entry_ofs,
					ni->attr_list_size - al_entry_ofs);
		al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)(tmp +
				al_entry_ofs);
		OSFree(ni->attr_list, ni->attr_list_alloc, ntfs_malloc_tag);
		ni->attr_list_alloc = new_al_alloc;
		ni->attr_list = tmp;
	} else if ((u8*)al_entry < ni->attr_list + ni->attr_list_size)
		memmove((u8*)al_entry + al_entry_len, al_entry,
				ni->attr_list_size - ((u8*)al_entry -
				ni->attr_list));
	ni->attr_list_size = new_al_size;
	/* Set up the attribute list entry. */
	al_entry->type = type;
	al_entry->length = cpu_to_le16(al_entry_len);
	al_entry->name_length = name_len;
	al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name);
	al_entry->lowest_vcn = 0;
	al_entry->mft_reference = MK_LE_MREF(ctx->ni->mft_no, ctx->ni->seq_no);
	al_entry->instance = a->instance;
	/* Copy the attribute name into place. */
	if (name_len)
		memcpy((u8*)&al_entry->name, name,
				name_len << NTFSCHAR_SIZE_SHIFT);
	/* For tidyness, zero any unused space. */
	if (al_entry_len != al_entry_used) {
		if (al_entry_len < al_entry_used)
			panic("%s(): al_entry_len < al_entry_used\n",
					__FUNCTION__);
		memset((u8*)al_entry + al_entry_used, 0,
				al_entry_len - al_entry_used);
	}
	/*
	 * Extend the attribute list attribute and copy in the modified
	 * value from the cache.
	 */
	err = ntfs_attr_list_sync_extend(ni, base_m,
			(u8*)al_entry - ni->attr_list, ctx);
	if (err) {
		ntfs_error(vol->mp, "Failed to extend attribute list "
				"attribute of mft_no 0x%llx (error %d).",
				(unsigned long long)ni->mft_no, err);
		al_entry_added = TRUE;
		goto undo_al;
	}
done:
	ntfs_debug("Done.");
	return 0;
undo:
	al_entry_added = FALSE;
undo_al:
	/*
	 * Need to remove the attribute again or free the extent mft record if
	 * there are no attributes remaining in it.
	 */
	if (m == base_m || !ntfs_attr_record_is_only_one(m, a)) {
		ntfs_attr_record_delete_internal(m, a);
		/*
		 * If the attribute was not in the base mft record mark the
		 * extent mft record dirty so it gets written out later.  If
		 * the attribute was in the base mft record it will be marked
		 * dirty later.
		 *
		 * We also unmap the extent mft record and we set @ctx->ni to
		 * equal the base inode @ni so that the search context is
		 * initialized from scratch or simply freed if the caller
		 * reinitializes or releases the search context respectively.
		 */
		if (m != base_m) {
			NInoSetMrecNeedsDirtying(ctx->ni);
			ntfs_extent_mft_record_unmap(ctx->ni);
			ctx->ni = ni;
		}
	} else {
		int err2;
		BOOL al_needed;

		err2 = ntfs_extent_mft_record_free(ni, ctx->ni, m);
		if (err2) {
			/*
			 * Ignore the error as we just end up with an unused
			 * mft record that is marked in use.
			 */
			ntfs_error(vol->mp, "Failed to free extent mft_no "
					"0x%llx (error %d).  Unmount and run "
					"chkdsk to recover the lost inode.",
					(unsigned long long)ctx->ni->mft_no,
					err2);
			NVolSetErrors(vol);
			/*
			 * Relese the extent mft record after dirtying it thus
			 * simulating the effect of freeing it.
			 */
			NInoSetMrecNeedsDirtying(ctx->ni);
			ntfs_extent_mft_record_unmap(ctx->ni);
		}
		/*
		 * The attribute search context still points to the no longer
		 * mapped extent inode thus we need to change it to point to
		 * the base inode instead so the context can be reinitialized
		 * or released safely.
		 */
		ctx->ni = ni;
remove_al:
		/*
		 * Check the attribute list attribute.  If there are no other
		 * attribute list attribute entries referencing extent mft
		 * records delete the attribute list attribute altogether.
		 *
		 * If this fails it does not matter as we simply retain the
		 * attribute list attribute so we ignore the error and go on to
		 * delete the attribute list attribute entry instead.
		 *
		 * If there are other attribute list attribute entries
		 * referencing extent mft records we still need the attribute
		 * list attribute thus we go on to delete the attribute list
		 * entry corresponding to the attribute record we just deleted
		 * by freeing its extent mft record.
		 */
		err2 = ntfs_attr_list_is_needed(ni,
				al_entry_added ? al_entry : NULL, &al_needed);
		if (err2)
			ntfs_warning(vol->mp, "Failed to determine if "
					"attribute list attribute of mft_no "
					"0x%llx if still needed (error %d).  "
					"Assuming it is still needed and "
					"continuing.",
					(unsigned long long)ni->mft_no, err2);
		else if (!al_needed) {
			/*
			 * No more extent mft records are in use.  Delete the
			 * attribute list attribute.
			 */
			ntfs_attr_search_ctx_reinit(ctx);
			err2 = ntfs_attr_list_delete(ni, ctx);
			if (!err2) {
				/*
				 * We deleted the attribute list attribute and
				 * this will have updated the base inode
				 * appropriately thus we have restored
				 * everything as it was before.
				 */
				return err;
			}
			ntfs_warning(vol->mp, "Failed to delete attribute "
					"list attribute of mft_no 0x%llx "
					"(error %d).  Continuing using "
					"alternative error recovery method.",
					(unsigned long long)ni->mft_no, err2);
		}
	}
	/*
	 * Both @ctx and @ni are now invalid and cannot be used any more which
	 * is fine as we have finished dealing with the attribute record.
	 *
	 * We now need to delete the corresponding attribute list attribute
	 * entry if we created it.
	 *
	 * Then we need to rewrite the attribute list attribute again because
	 * ntfs_attr_list_sync_extend() may have left it in an indeterminate
	 * state.
	 */
	if (al_entry_added) {
		int err2;

		ntfs_attr_list_entry_delete(ni, al_entry);
		ntfs_attr_search_ctx_reinit(ctx);
		err2 = ntfs_attr_list_sync_shrink(ni, 0, ctx);
		if (err2) {
			ntfs_error(vol->mp, "Failed to restore attribute list "
					"attribute in base mft_no 0x%llx "
					"(error %d).  Leaving inconsistent "
					"metadata.  Unmount and run chkdsk.",
					(unsigned long long)ni->mft_no, err2);
			NVolSetErrors(vol);
		}
	}
	/* Make sure any changes are written out. */
	NInoSetMrecNeedsDirtying(ni);
	return err;
}

/**
 * ntfs_resident_attr_value_resize - resize the value of a resident attribute
 * @m:		mft record containing attribute record
 * @a:		attribute record whose value to resize
 * @new_size:	new size in bytes to which to resize the attribute value of @a
 *
 * Resize the value of the attribute @a in the mft record @m to @new_size
 * bytes.  If the value is made bigger, the newly allocated space is cleared.
 *
 * Return 0 on success and errno on error.  The following error codes are
 * defined:
 *	ENOSPC	- Not enough space in the mft record @m to perform the resize.
 *
 * Note: On error, no modifications have been performed whatsoever.
 *
 * Warning: If you make a record smaller without having copied all the data you
 *	    are interested in the data may be overwritten.
 */
errno_t ntfs_resident_attr_value_resize(MFT_RECORD *m, ATTR_RECORD *a,
		const u32 new_size)
{
	const u32 old_size = le32_to_cpu(a->value_length);

	/* Resize the resident part of the attribute record. */
	if (ntfs_attr_record_resize(m, a, le16_to_cpu(a->value_offset) +
			new_size))
		return ENOSPC;
	/*
	 * The resize succeeded!  If we made the attribute value bigger, clear
	 * the area between the old size and @new_size.
	 */
	if (new_size > old_size)
		bzero((u8*)a + le16_to_cpu(a->value_offset) + old_size,
				new_size - old_size);
	/* Finally update the length of the attribute value. */
	a->value_length = cpu_to_le32(new_size);
	return 0;
}

/**
 * ntfs_attr_make_non_resident - convert a resident to a non-resident attribute
 * @ni:		ntfs inode describing the attribute to convert
 *
 * Convert the resident ntfs attribute described by the ntfs inode @ni to a
 * non-resident one.
 *
 * Return 0 on success and errno on error.  The following error return codes
 * are defined:
 *	EPERM	- The attribute is not allowed to be non-resident.
 *	ENOMEM	- Not enough memory.
 *	ENOSPC	- Not enough disk space.
 *	EINVAL	- Attribute not defined on the volume.
 *	EIO	- I/o error or other error.
 *
 * Note that if an error other than EPERM is returned it is possible that the
 * attribute has been made non-resident but for example the attribute list
 * attribute failed to be written out thus the base mft record is now corrupt
 * and all operations should be aborted by the caller.
 *
 * Locking: The caller must hold @ni->lock on the inode for writing.
 */
errno_t ntfs_attr_make_non_resident(ntfs_inode *ni)
{
	leMFT_REF mref;
	s64 new_size, data_size;
	ntfs_volume *vol = ni->vol;
	ntfs_inode *base_ni;
	MFT_RECORD *base_m, *m;
	ATTR_RECORD *a;
	upl_t upl;
	upl_page_info_array_t pl;
	u8 *kaddr, *al_end;
	unsigned mp_size, mp_ofs, name_ofs, arec_size, attr_size, bytes_needed;
	unsigned al_ofs = 0;
	errno_t err, err2;
	le32 type;
	u8 old_res_attr_flags;
	ntfs_attr_search_ctx ctx, actx;
	BOOL al_dirty = FALSE;

	/* Check that the attribute is allowed to be non-resident. */
	err = ntfs_attr_can_be_non_resident(vol, ni->type);
	if (err) {
		if (err == EPERM)
			ntfs_debug("Attribute is not allowed to be "
					"non-resident.");
		else
			ntfs_debug("Attribute not defined on the NTFS "
					"volume!");
		return err;
	}
	/*
	 * FIXME: Compressed and encrypted attributes are not supported when
	 * writing and we should never have gotten here for them.
	 */
	if (NInoCompressed(ni))
		panic("%s(): NInoCompressed(ni)\n", __FUNCTION__);
	if (NInoEncrypted(ni))
		panic("%s(): NInoEncrypted(ni)\n", __FUNCTION__);
	/*
	 * The size needs to be aligned to a cluster boundary for allocation
	 * purposes.
	 */
	lck_spin_lock(&ni->size_lock);
	data_size = ni->data_size;
	lck_spin_unlock(&ni->size_lock);
	new_size = (data_size + vol->cluster_size_mask) &
			~vol->cluster_size_mask;
	lck_rw_lock_exclusive(&ni->rl.lock);
	if (ni->rl.elements)
		panic("%s(): ni->rl.elements\n", __FUNCTION__);
	upl = NULL;
	if (new_size > 0) {
		/* Start by allocating clusters to hold the attribute value. */
		err = ntfs_cluster_alloc(vol, 0, new_size >>
				vol->cluster_size_shift, -1, DATA_ZONE, TRUE,
				&ni->rl);
		if (err) {
			if (err != ENOSPC)
				ntfs_error(vol->mp, "Failed to allocate "
						"cluster%s, error code %d.",
						(new_size >>
						vol->cluster_size_shift) > 1 ?
						"s" : "", err);
			goto unl_err;
		}
		/*
		 * Will need the page later and since the page lock nests
		 * outside all ntfs locks, we need to get the page now.
		 */
		err = ntfs_page_grab(ni, 0, &upl, &pl, &kaddr, TRUE);
		if (err)
			goto page_err;
	}
	/* Determine the size of the mapping pairs array. */
	err = ntfs_get_size_for_mapping_pairs(vol,
			ni->rl.elements ? ni->rl.rl : NULL, 0, -1, &mp_size);
	if (err) {
		ntfs_error(vol->mp, "Failed to get size for mapping pairs "
				"array (error %d).", err);
		goto rl_err;
	}
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	err = ntfs_mft_record_map(base_ni, &base_m);
	if (err)
		goto rl_err;
	ntfs_attr_search_ctx_init(&ctx, base_ni, base_m);
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			&ctx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto unm_err;
	}
	m = ctx.m;
	a = ctx.a;
	if (NInoNonResident(ni))
		panic("%s(): NINonResident(ni)\n", __FUNCTION__);
	if (a->non_resident)
		panic("%s(): a->non_resident\n", __FUNCTION__);
	/* Calculate new offsets for the name and the mapping pairs array. */
	name_ofs = offsetof(ATTR_REC, compressed_size);
	if (NInoSparse(ni) || NInoCompressed(ni))
		name_ofs += sizeof(a->compressed_size);
	mp_ofs = (name_ofs + a->name_length * sizeof(ntfschar) + 7) & ~7;
	/*
	 * Determine the size of the resident part of the now non-resident
	 * attribute record.
	 */
	arec_size = (mp_ofs + mp_size + 7) & ~7;
	/*
	 * If the page is not uptodate bring it uptodate by copying from the
	 * attribute value.
	 */
	attr_size = le32_to_cpu(a->value_length);
	if (attr_size != data_size)
		panic("%s(): attr_size != data_size\n", __FUNCTION__);
	if (upl && !upl_valid_page(pl, 0)) {
		memcpy(kaddr, (u8*)a + le16_to_cpu(a->value_offset),
				attr_size);
		bzero(kaddr + attr_size, PAGE_SIZE - attr_size);
	}
	/* Backup the attribute flags. */
	old_res_attr_flags = a->resident_flags;
retry_resize:
	/* Resize the resident part of the attribute record. */
	err = ntfs_attr_record_resize(m, a, arec_size);
	if (!err) {
		al_ofs = 0;
		goto do_switch;
	}
	if (err != ENOSPC)
		panic("%s(): err != ENOSPC\n", __FUNCTION__);
	/*
	 * The attribute record size required cannot be larger than the amount
	 * of space in an mft record.
	 */
	if (arec_size > le32_to_cpu(m->bytes_allocated) -
			le16_to_cpu(m->attrs_offset))
		panic("%s(): arec_size > le32_to_cpu(m->bytes_allocated) - "
				"le16_to_cpu(m->attrs_offset)\n",
				__FUNCTION__);
	/*
	 * To make space in the mft record we would like to try to make other
	 * attributes non-resident if that would save space.
	 *
	 * FIXME: We cannot do this at present unless the attribute is the
	 * attribute being resized as there could be an ntfs inode matching
	 * this attribute in memory and it would become out of date with its
	 * metadata if we touch its attribute record.
	 *
	 * FIXME: We do not need to do this if this is the attribute being
	 * resized as we already tried to make the attribute non-resident and
	 * it did not work or we would never have gotten here in the first
	 * place.
	 *
	 * Thus we have to either move other attributes to extent mft records
	 * thus making more space in the base mft record or we have to move the
	 * attribute being resized to an extent mft record thus giving it more
	 * space.  In any case we need to have an attribute list attribute so
	 * start by adding it if it does not yet exist.
	 *
	 * If the addition succeeds but the remapping of the extent mft record
	 * fails (i.e. the !err && IS_ERR(ctx.m) case below) we bail out
	 * without trying to remove the attribute list attribute because to do
	 * so we would have to map the extent mft record in order to move the
	 * attribute(s) in it back into the base mft record and we know the
	 * mapping just failed so it is unlikely to succeed now.  In any case
	 * the metadata is consistent we just cannot make further progress.
	 */
	if (!NInoAttrList(base_ni)) {
		err = ntfs_attr_list_add(base_ni, base_m, &ctx);
		if (err || ctx.is_error) {
			if (!err)
				err = ctx.error;
			ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error "
					"%d).", ctx.is_error ?
					"remap extent mft record of" :
					"add attribute list attribute to",
					(unsigned long long)base_ni->mft_no,
					err);
			goto unm_err;
		}
		/*
		 * The attribute location will have changed so update it from
		 * the search context.
		 */
		m = ctx.m;
		a = ctx.a;
		/*
		 * Check that the logic in ntfs_attr_list_add() has not changed
		 * without the code here being updated.  At present it will
		 * never make resident attributes non-resident.
		 */
		if (a->non_resident)
			panic("%s(): a->non_resident\n", __FUNCTION__);
		/*
		 * We now have an attribute list attribute.  This may have
		 * caused the attribute to be made non-resident to be moved out
		 * to an extent mft record in which case there would now be
		 * enough space to resize the attribute record.
		 *
		 * Alternatively some other large attribute may have been moved
		 * out to an extent mft record thus generating enough space in
		 * the base mft record for the attribute to be made
		 * non-resident.
		 *
		 * In either case we simply want to retry the resize.
		 */
		goto retry_resize;
	}
	/*
	 * We now know we have an attribute list attribute and that we still do
	 * not have enough space to make the attribute non-resident.
	 *
	 * As discussed above we need to start moving attributes out of the
	 * base mft record to make enough space.
	 *
	 * Note that if the attribute to be made non-resident had been moved
	 * out of the base mft record we would then have had enough space for
	 * the resize thus we would never have gotten here.  We detect this
	 * case and BUG() in case we change the logic in ntfs_attr_list_add()
	 * some day to remind us to update the code here to match.
	 */
	if (ctx.ni != base_ni)
		panic("%s(): ctx.ni != base_ni\n", __FUNCTION__);
	/*
	 * If this is the only attribute record in the mft record we cannot
	 * gain anything by moving it or anything else.  This really cannot
	 * happen as we ensure above that the attribute is in the base mft
	 * record.
	 */
	if (ntfs_attr_record_is_only_one(m, a))
		panic("%s(): ntfs_attr_record_is_only_one(m, a)\n",
				__FUNCTION__);
	/*
	 * If the attribute to be resized is the standard information, index
	 * root, or unnamed $DATA attribute try to move other attributes out
	 * into extent mft records.  If none of these then move the attribute
	 * to be resized out to an extent mft record.
	 */
	type = ni->type;
	if (type != AT_STANDARD_INFORMATION && type != AT_INDEX_ROOT &&
			(type != AT_DATA || ni->name_len)) {
		lck_rw_lock_shared(&base_ni->attr_list_rl.lock);
		err = ntfs_attr_record_move(&ctx);
		lck_rw_unlock_shared(&base_ni->attr_list_rl.lock);
		if (!err) {
			/* The attribute has moved so update our variables. */
			m = ctx.m;
			a = ctx.a;
			/* The resize will now succeed. */
			goto retry_resize;
		}
		ntfs_error(vol->mp, "Failed to move attribute type 0x%x out "
				"of base mft_no 0x%llx into an extent mft "
				"record (error %d).", le32_to_cpu(type),
				base_ni->mft_no, err);
		goto unm_err;
	}
	type = AT_UNUSED;
	/*
	 * The number of free bytes needed in the mft record so the resize can
	 * succeed.
	 */
	bytes_needed = arec_size - le32_to_cpu(a->length);
	/*
	 * The MFT reference of the mft record in which the attribute to be
	 * made non-resident is located.
	 */
	mref = MK_LE_MREF(base_ni->mft_no, base_ni->seq_no);
	al_ofs = base_ni->attr_list_size;
	al_end = base_ni->attr_list + al_ofs;
next_pass:
	ntfs_attr_search_ctx_init(&actx, base_ni, base_m);
	actx.is_iteration = 1;
	do {
		ntfschar *a_name;
		ATTR_LIST_ENTRY *al_entry;

		/* Get the next attribute in the mft record. */
		err = ntfs_attr_find_in_mft_record(type, NULL, 0, NULL, 0,
				&actx);
		if (err) {
			if (err == ENOENT) {
				/*
				 * If we have more passes to go do the next
				 * pass which will try harder to move things
				 * out of the way.
				 */
				if (type == AT_UNUSED) {
					type = AT_DATA;
					goto next_pass;
				}
				/*
				 * TODO: Need to get these cases triggered and
				 * then need to run chkdsk to check for
				 * validity of moving these attributes out of
				 * the base mft record.
				 */
				if (type == AT_DATA) {
					type = AT_INDEX_ROOT;
					goto next_pass;
				}
				if (type == AT_INDEX_ROOT) {
					type = AT_STANDARD_INFORMATION;
					goto next_pass;
				}
				/*
				 * We can only get here when the attribute to
				 * be made non-resident is the standard
				 * information attribute and for some reason it
				 * does not exist in the mft record.  That can
				 * only happen with some sort of corruption or
				 * due to a bug.
				 */
				ntfs_error(vol->mp, "Standard information "
						"attribute is missing from "
						"mft_no 0x%llx.  Run chkdsk.",
						(unsigned long long)
						base_ni->mft_no);
				err = EIO;
				NVolSetErrors(vol);
				goto unm_err;
			}
			ntfs_error(vol->mp, "Failed to iterate over attribute "
					"records in base mft record 0x%llx "
					"(error %d).",
					(unsigned long long)base_ni->mft_no,
					err);
			goto unm_err;
		}
		a = actx.a;
		if (type == AT_UNUSED) {
			/*
			 * Skip the attribute list attribute itself as that is
			 * not represented inside itself and we cannot move it
			 * out anyway.
			 *
			 * Also, do not touch standard information, index root,
			 * and unnamed $DATA attributes.  They will be moved
			 * out to extent mft records in later passes if really
			 * necessary.
			 */
			if (a->type == AT_ATTRIBUTE_LIST ||
					a->type == AT_STANDARD_INFORMATION ||
					a->type == AT_INDEX_ROOT ||
					(a->type == AT_DATA &&
					!a->name_length))
				continue;
		}
		/*
		 * Move the attribute out to an extent mft record and update
		 * its attribute list entry.
		 *
		 * But first find the attribute list entry matching the
		 * attribute record so it can be updated.
		 */
		a_name = (ntfschar*)((u8*)a + le16_to_cpu(a->name_offset));
		al_entry = (ATTR_LIST_ENTRY*)base_ni->attr_list;
		do {
			/*
			 * The attribute must be present in the attribute list
			 * attribute or something is corrupt.
			 */
			if ((u8*)al_entry >= al_end || !al_entry->length) {
				ntfs_error(vol->mp, "Attribute type 0x%x not "
						"found in attribute list "
						"attribute of base mft record "
						"0x%llx.  Run chkdsk.",
						(unsigned)le32_to_cpu(a->type),
						(unsigned long long)
						base_ni->mft_no);
				NVolSetErrors(vol);
				err = EIO;
				goto unm_err;
			}
			if (al_entry->mft_reference == mref &&
					al_entry->instance == a->instance) {
				/*
				 * We found the entry, stop looking but first
				 * perform a quick sanity check that we really
				 * do have the correct attribute record.
				 */
				if (al_entry->type == a->type &&
						ntfs_are_names_equal(
						(ntfschar*)((u8*)al_entry +
						al_entry->name_offset),
						al_entry->name_length, a_name,
						a->name_length, TRUE,
						vol->upcase, vol->upcase_len))
					break;
				ntfs_error(vol->mp, "Found corrupt attribute "
						"list attribute when looking "
						"for attribute type 0x%x in "
						"attribute list attribute of "
						"base mft record 0x%llx.  Run "
						"chkdsk.",
						(unsigned)le32_to_cpu(a->type),
						(unsigned long long)
						base_ni->mft_no);
				NVolSetErrors(vol);
				err = EIO;
				goto unm_err;
			}
			/* Go to the next attribute list entry. */
			al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
					le16_to_cpu(al_entry->length));
		} while (1);
		/* Finally, move the attribute to an extent record. */
		err = ntfs_attr_record_move_for_attr_list_attribute(&actx,
				al_entry, &ctx, NULL);
		if (err) {
			ntfs_error(vol->mp, "Failed to move attribute type "
					"0x%x out of base mft record 0x%llx "
					"and into an extent mft record (error "
					"%d).  Run chkdsk.",
					(unsigned)le32_to_cpu(a->type),
					(unsigned long long)base_ni->mft_no,
					err);
			NVolSetErrors(vol);
			goto unm_err;
		}
		/*
		 * If the modified attribute list entry is before the current
		 * start of attribute list modification we need to sync this
		 * entry as well.  For simplicity we just set @al_ofs to the
		 * new value thus syncing everything starting at that offset.
		 */
		if ((u8*)al_entry - base_ni->attr_list < (long)al_ofs) {
			al_ofs = (u8*)al_entry - base_ni->attr_list;
			al_dirty = TRUE;
		}
		/*
		 * If we moved the attribute to be made non-resident we will
		 * now have enough space so retry the resize.
		 */
		if (ctx.ni != base_ni) {
			/*
			 * @ctx is not in the base mft record, map the extent
			 * inode it is in and if it is mapped at a different
			 * address than before update the pointers in @ctx.
			 */
retry_map:
			err = ntfs_mft_record_map(ctx.ni, &m);
			if (err) {
				/*
				 * Something bad has happened.  If out of
				 * memory retry till it succeeds.  Any other
				 * errors are fatal and we have to abort.
				 *
				 * We do not need to undo anything as the
				 * metadata is self-consistent except for the
				 * attribute list attribute which we need to
				 * write out.
				 */
				if (err == ENOMEM) {
					(void)thread_block(
							THREAD_CONTINUE_NULL);
					goto retry_map;
				}
				ctx.ni = base_ni;
				goto unm_err;
			}
			if (ctx.m != m) {
				ctx.a = (ATTR_RECORD*)((u8*)m +
						((u8*)ctx.a - (u8*)ctx.m));
				ctx.m = m;
			}
			a = ctx.a;
			goto retry_resize;
		}
		/* If we now have enough space retry the resize. */
		if (bytes_needed > le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use)) {
			a = ctx.a;
			goto retry_resize;
		}
	} while (1);
do_switch:
	/*
	 * Convert the resident part of the attribute record to describe a
	 * non-resident attribute.
	 */
	a->non_resident = 1;
	/* Move the attribute name if it exists and update the offset. */
	if (a->name_length)
		memmove((u8*)a + name_ofs,
				(u8*)a + le16_to_cpu(a->name_offset),
				a->name_length * sizeof(ntfschar));
	a->name_offset = cpu_to_le16(name_ofs);
	/* Setup the fields specific to non-resident attributes. */
	a->lowest_vcn = 0;
	a->highest_vcn = cpu_to_sle64((new_size - 1) >>
			vol->cluster_size_shift);
	a->mapping_pairs_offset = cpu_to_le16(mp_ofs);
	bzero(&a->reservedN, sizeof(a->reservedN));
	a->allocated_size = cpu_to_sle64(new_size);
	a->data_size = a->initialized_size = cpu_to_sle64(attr_size);
	a->compression_unit = 0;
	if (NInoSparse(ni) || NInoCompressed(ni)) {
		if (NInoCompressed(ni) || vol->major_ver <= 1)
			a->compression_unit = NTFS_COMPRESSION_UNIT;
		a->compressed_size = a->allocated_size;
	}
	/*
	 * Generate the mapping pairs array into the attribute record.
	 *
	 * This cannot fail as we have already checked the size we need to
	 * build the mapping pairs array.
	 */
	err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs, arec_size - mp_ofs,
			ni->rl.elements ? ni->rl.rl : NULL, 0, -1, NULL);
	if (err)
		panic("%s(): err\n", __FUNCTION__);
	/* Setup the in-memory attribute structure to be non-resident. */
	lck_spin_lock(&ni->size_lock);
	ni->allocated_size = new_size;
	if (NInoSparse(ni) || NInoCompressed(ni)) {
		ni->compressed_size = ni->allocated_size;
		if (a->compression_unit) {
			ni->compression_block_size = 1U <<
					(a->compression_unit +
					vol->cluster_size_shift);
			ni->compression_block_size_shift =
					ffs(ni->compression_block_size) - 1;
			ni->compression_block_clusters = 1U <<
					a->compression_unit;
		} else {
			ni->compression_block_size = 0;
			ni->compression_block_size_shift = 0;
			ni->compression_block_clusters = 0;
		}
	}
	lck_spin_unlock(&ni->size_lock);
	/*
	 * This needs to be last since we are not allowed to fail once we flip
	 * this switch.
	 */
	NInoSetNonResident(ni);
	/* Mark the mft record dirty, so it gets written back. */
	NInoSetMrecNeedsDirtying(ctx.ni);
	if (ctx.ni != base_ni)
		ntfs_extent_mft_record_unmap(ctx.ni);
	if (al_dirty) {
		ntfs_attr_search_ctx_reinit(&actx);
		err = ntfs_attr_list_sync(base_ni, al_ofs, &actx);
		if (err) {
			ntfs_error(vol->mp, "Failed to write attribute list "
					"attribute of mft_no 0x%llx (error "
					"%d).  Leaving corrupt metadata.  Run "
					"chkdsk.",
					(unsigned long long)base_ni->mft_no,
					err);
			NVolSetErrors(vol);
		}
		/* Mark the base mft record dirty, so it gets written back. */
		NInoSetMrecNeedsDirtying(base_ni);
	}
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	/*
	 * We have modified the allocated size.  If the ntfs inode is the base
	 * inode, cause the sizes to be written to all the directory index
	 * entries pointing to the base inode when the inode is written to
	 * disk.  Do not do this for directories as they have both sizes set to
	 * zero in their index entries.
	 */
	if (ni == base_ni && !S_ISDIR(ni->mode))
		NInoSetDirtySizes(ni);
	if (upl)
		ntfs_page_unmap(ni, upl, pl, TRUE);
	ntfs_debug("Done.");
	return 0;
unm_err:
	if (ctx.ni != base_ni) {
		NInoSetMrecNeedsDirtying(ctx.ni);
		ntfs_extent_mft_record_unmap(ctx.ni);
	}
	if (al_dirty) {
		ntfs_attr_search_ctx_reinit(&actx);
		err2 = ntfs_attr_list_sync(base_ni, al_ofs, &actx);
		if (err2) {
			ntfs_error(vol->mp, "Failed to write attribute list "
					"attribute in error code path (error "
					"%d).  Leaving corrupt metadata.  Run "
					"chkdsk.", err2);
			NVolSetErrors(vol);
		}
	}
	NInoSetMrecNeedsDirtying(base_ni);
	ntfs_mft_record_unmap(base_ni);
rl_err:
	if (upl) {
		/*
		 * If the page was valid release it back to the VM.  If it was
		 * not valid throw it away altogether.
		 * TODO: We could wrap this up in a ntfs_page_unmap_ext()
		 * function which takes an extra parameter to specify whether
		 * to keep the page or to dump it if it is invalid...
		 */
		if (upl_valid_page(pl, 0))
			ntfs_page_unmap(ni, upl, pl, FALSE);
		else
			ntfs_page_dump(ni, upl, pl);
	}
page_err:
	if (ni->rl.elements > 0) {
		err2 = ntfs_cluster_free_from_rl(vol, ni->rl.rl, 0, -1, NULL);
		if (err2) {
			ntfs_error(vol->mp, "Failed to undo cluster "
					"allocation (error %d).  Run chkdsk "
					"to recover the lost space.", err2);
			NVolSetErrors(vol);
		}
		err2 = ntfs_rl_truncate_nolock(vol, &ni->rl, 0);
		if (err2)
			panic("%s(): err2\n", __FUNCTION__);
	}
unl_err:
	lck_rw_unlock_exclusive(&ni->rl.lock);
	if (err == EINVAL)
		err = EIO;
	return err;
}

/**
 * ntfs_attr_record_move_for_attr_list_attribute - move an attribute record
 * @al_ctx:		search context describing the attribute to move
 * @al_entry:		attribute list entry of the attribute to move
 * @ctx:		search context of attribute being resized or NULL
 * @remap_needed:	[OUT] pointer to remap_needed variable or NULL
 *
 * Move the attribute described by the attribute search context @al_ctx and
 * @al_entry from its mft record to a newly allocated extent mft record and
 * update @ctx to reflect this fact (if @ctx is not NULL, otherwise it is
 * ignored).
 *
 * If @ctx is present and is the attribute moved out then set *@remap_needed to
 * true.  If the caller is not interested in this then @remap_needed can be set
 * to NULL in which case it is ignored.
 *
 * Return 0 on success and the negative error code on error.
 */
errno_t ntfs_attr_record_move_for_attr_list_attribute(
		ntfs_attr_search_ctx *al_ctx, ATTR_LIST_ENTRY *al_entry,
		ntfs_attr_search_ctx *ctx, BOOL *remap_needed)
{
	ntfs_inode *base_ni, *ni;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	unsigned attr_len;
	errno_t err;

	base_ni = al_ctx->ni;
	ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x.",
			(unsigned long long)base_ni->mft_no,
			(unsigned)le32_to_cpu(al_entry->type));
	/*
	 * Allocate a new extent mft record, attach it to the base ntfs inode
	 * and set up the search context to point to it.
	 *
	 * FIXME: We should go through all existing extent mft records which
	 * will all be attached to @base_ni->extent_nis and for each of them we
	 * should map the extent mft record, check for free space and if we
	 * find enough free space for the attribute being moved we should move
	 * the attribute there instead of allocating a new extent mft record.
	 */
	err = ntfs_mft_record_alloc(base_ni->vol, NULL, NULL, base_ni, &ni, &m,
			&a);
	if (err) {
		ntfs_error(base_ni->vol->mp, "Failed to move attribute to a "
				"new mft record because allocation of the new "
				"mft record failed (error %d).", err);
		return err;
	}
	attr_len = le32_to_cpu(al_ctx->a->length);
	/* Make space for the attribute extent and copy it into place. */
	err = ntfs_attr_record_make_space(m, a, attr_len);
	/*
	 * This cannot fail as the new mft record must have enough space to
	 * hold the attribute record given it fitted inside the old mft record.
	 */
	if (err)
		panic("%s(): err\n", __FUNCTION__);
	memcpy(a, al_ctx->a, attr_len);
	/* Delete the attribute record from the base mft record. */
	ntfs_attr_record_delete_internal(al_ctx->m, al_ctx->a);
	/*
	 * We moved the attribute out of the mft record thus @al_ctx->a now
	 * points to the next attribute.  Since the caller will want to look at
	 * that next attribute we set @al_ctx->is_first so that the next call
	 * to ntfs_attr_find_in_mft_record() will return the currently pointed
	 * at attribute.
	 */
	al_ctx->is_first = 1;
	/*
	 * Change the moved attribute record to reflect the new sequence number
	 * and the current attribute list attribute entry to reflect the new
	 * mft record reference and sequence number.
	 */
	al_entry->mft_reference = MK_LE_MREF(ni->mft_no, ni->seq_no);
	a->instance = al_entry->instance = m->next_attr_instance;
	/*
	 * Increment the next attribute instance number in the mft record as we
	 * consumed the old one.
	 */
	m->next_attr_instance = cpu_to_le16(
			(le16_to_cpu(m->next_attr_instance) + 1) & 0xffff);
	/*
	 * Ensure the changes make it to disk later and unmap the mft record as
	 * we do not need it any more right now.
	 */
	NInoSetMrecNeedsDirtying(ni);
	ntfs_extent_mft_record_unmap(ni);
	/*
	 * Update @ctx if the attribute it describes is still in the base mft
	 * record and the attribute that was deleted was either in front of the
	 * attribute described by @ctx or it was the attribute described by
	 * @ctx.
	 *
	 * FIXME: When we fix the above FIXME and we thus start to place
	 * multiple attributes in each extent mft record we will need to update
	 * @ctx in a more complex fashion here.
	 */
	if (ctx && ctx->ni == base_ni) {
		if ((u8*)al_ctx->a < (u8*)ctx->a)
			ctx->a = (ATTR_RECORD*)((u8*)ctx->a - attr_len);
		else if (al_ctx->a == ctx->a) {
			ctx->m = m;
			ctx->a = a;
			ctx->ni = ni;
			if (remap_needed)
				*remap_needed = TRUE;
		}
	}
	ntfs_debug("Done.");
	return 0;
}

/**
 * ntfs_attr_record_move - move an attribute record to another mft record
 * @ctx:	attribute search context describing the attribute to move
 *
 * Move the attribute described by the attribute search context @ctx from its
 * mft record to a newly allocated extent mft record.  On successful return
 * @ctx is setup to point to the moved attribute.
 *
 * Return 0 on success and the negative error code on error.  On error, the
 * attribute search context is invalid and must be either reinitialized or
 * released.
 *
 * NOTE: This function expects that an attribute list attribute is already
 * present.
 *
 * Locking: Caller must hold lock on attribute list attribute runlist, i.e.
 *	    @ctx->base_ni->attr_list_rl.lock.
 */
errno_t ntfs_attr_record_move(ntfs_attr_search_ctx *ctx)
{
	ntfs_inode *base_ni, *ni;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	u8 *a_copy;
	unsigned attr_len;
	errno_t err, err2;
	ntfs_attr_search_ctx al_ctx;
	static const char es[] = "  Leaving inconsistent metadata.  Unmount "
			"and run chkdsk.";

	base_ni = ctx->base_ni;
	if (!base_ni || !NInoAttrList(base_ni))
		panic("%s(): !base_ni || !NInoAttrList(base_ni)\n",
				__FUNCTION__);
	ni = ctx->ni;
	m = ctx->m;
	a = ctx->a;
	ntfs_debug("Entering for base mft_no 0x%llx, extent mft_no 0x%llx, "
			"attribute type 0x%x.",
			(unsigned long long)base_ni->mft_no,
			(unsigned long long)ni->mft_no,
			(unsigned)le32_to_cpu(a->type));
	attr_len = le32_to_cpu(a->length);
	/* Allocate a temporary buffer to hold the attribute to be moved. */
	a_copy = OSMalloc(attr_len, ntfs_malloc_tag);
	if (!a_copy) {
		ntfs_error(ni->vol->mp, "Not enough memory to allocate "
				"temporary attribute buffer.");
		return ENOMEM;
	}
	/*
	 * Copy the attribute to the temporary buffer and delete it from its
	 * original mft record.
	 */
	memcpy(a_copy, a, attr_len);
	ntfs_attr_record_delete_internal(m, a);
	/*
	 * This function will never be called if the attribute is the only
	 * attribute in the mft record as this would not gain anything thus
	 * report a bug in this case.
	 */
	if (((ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset)))->type ==
			AT_END)
		panic("%s(): Is only attribute in mft record!\n", __FUNCTION__);
	/* Ensure the changes make it to disk later. */
	NInoSetMrecNeedsDirtying(ni);
	/*
	 * We have finished with this mft record thus if it is an extent mft
	 * record we release it.  We do this by hand as we want to keep the
	 * current attribute list attribute entry.
	 */
	if (ni != base_ni)
		ntfs_extent_mft_record_unmap(ni);
	/*
	 * Find the attribute list attribute in the base mft record.  Doing
	 * this now hugely simplifies error handling.
	 */
	ntfs_attr_search_ctx_init(&al_ctx, base_ni, ctx->base_m);
	err = ntfs_attr_find_in_mft_record(AT_ATTRIBUTE_LIST, AT_UNNAMED, 0,
			NULL, 0, &al_ctx);
	if (err) {
		ntfs_error(base_ni->vol->mp, "Failed to move attribute to a "
				"new mft record because looking up the "
				"attribute list attribute in the base inode "
				"failed (error %d).", err);
		goto undo_delete;
	}
	/*
	 * Allocate a new extent mft record, attach it to the base ntfs inode
	 * and set up the search context to point to it.
	 */
	err = ntfs_mft_record_alloc(base_ni->vol, NULL, NULL, base_ni, &ni, &m,
			&a);
	if (err) {
		ntfs_error(base_ni->vol->mp, "Failed to move attribute to a "
				"new mft record because allocation of the new "
				"mft record failed (error %d).", err);
		goto undo_delete;
	}
	ctx->ni = ni;
	ctx->m = m;
	ctx->a = a;
	/* Make space for the attribute extent and copy it into place. */
	err = ntfs_attr_record_make_space(m, a, attr_len);
	/*
	 * This cannot fail as the new mft record must have enough space to
	 * hold the attribute record given it fitted inside the old mft record.
	 */
	if (err)
		panic("%s(): err (ntfs_attr_record_make_space())\n",
				__FUNCTION__);
	memcpy(a, a_copy, attr_len);
	/* We do not need the temporary buffer any more. */
	OSFree(a_copy, attr_len, ntfs_malloc_tag);
	/*
	 * Change the moved attribute record to reflect the new sequence number
	 * and the current attribute list attribute entry to reflect the new
	 * mft record reference and sequence number.
	 */
	ctx->al_entry->mft_reference = MK_LE_MREF(ni->mft_no, ni->seq_no);
	a->instance = ctx->al_entry->instance = m->next_attr_instance;
	/*
	 * Increment the next attribute instance number in the mft record as we
	 * consumed the old one.
	 */
	m->next_attr_instance = cpu_to_le16(
			(le16_to_cpu(m->next_attr_instance) + 1) & 0xffff);
	/* Ensure the changes make it to disk later. */
	NInoSetMrecNeedsDirtying(ni);
	/*
	 * Finally, sync the modified attribute list attribute from its in-
	 * memory buffer to the on-disk metadata.
	 */
	a = al_ctx.a;
	if (a->non_resident) {
		unsigned ofs;

		ofs = (u8*)ctx->al_entry - base_ni->attr_list;
		err = ntfs_rl_write(base_ni->vol, base_ni->attr_list,
				base_ni->attr_list_size,
				&base_ni->attr_list_rl, ofs,
				le16_to_cpu(ctx->al_entry->length));
		if (err) {
			ntfs_error(base_ni->vol->mp, "Failed to update "
					"on-disk attribute list attribute of "
					"mft_no 0x%llx (error %d).%s",
					(unsigned long long)base_ni->mft_no,
					err, es);
			return err;
		}
	} else {
		ATTR_LIST_ENTRY *al_entry;

		al_entry = (ATTR_LIST_ENTRY*)((u8*)a +
				le16_to_cpu(a->value_offset) +
				((u8*)ctx->al_entry - base_ni->attr_list));
		al_entry->mft_reference = ctx->al_entry->mft_reference;
		al_entry->instance = ctx->al_entry->instance;
		/* Ensure the changes make it to disk later. */
		NInoSetMrecNeedsDirtying(base_ni);
	}
	ntfs_debug("Done.");
	return 0;
undo_delete:
	/*
	 * Map the old mft record again (if we unmapped it) and re-insert the
	 * deleted attribute record in its old place.
	 */
	ni = ctx->ni;
	if (ni != base_ni) {
		err2 = ntfs_mft_record_map(ni, &m);
		if (err2) {
			/*
			 * Make it safe to release the attribute search
			 * context.
			 */
			ctx->ni = base_ni;
			ntfs_error(base_ni->vol->mp, "Failed to restore "
					"attribute in mft_no 0x%llx after "
					"allocation failure (error %d).%s",
					(unsigned long long)base_ni->mft_no,
					err2, es);
			NVolSetErrors(base_ni->vol);
			goto err;
		}
		/*
		 * If the extent mft record was mapped into a different
		 * address, adjust the mft record and attribute record pointers
		 * in the search context.
		 */
		if (m != ctx->m) {
			ctx->a = (ATTR_RECORD*)((u8*)m + ((u8*)ctx->a -
					(u8*)ctx->m));
			ctx->m = m;
		}
	}
	/*
	 * Creating space for the attribute in its old mft record cannot fail
	 * because we only just deleted the attribute from the mft record thus
	 * there must be enough space in it.
	 */
	err2 = ntfs_attr_record_make_space(ctx->m, ctx->a, attr_len);
	if (err2)
		panic("%s(): err2\n", __FUNCTION__);
	memcpy(ctx->a, a_copy, attr_len);
	/* Ensure the changes make it to disk later. */
	NInoSetMrecNeedsDirtying(ni);
err:
	OSFree(a_copy, attr_len, ntfs_malloc_tag);
	return err;
}

/**
 * ntfs_attr_set_initialized_size - extend the initialized size of an attribute
 * @ni:			ntfs inode whose sizes to extend
 * @new_init_size:	the new initialized size to set @ni to or -1
 *
 * If @new_init_size is >= 0, set the initialized size in the ntfs inode @ni
 * to @new_init_size.  Otherwise ignore @new_init_size and do not change the
 * initialized size in @ni.
 *
 * If the new initialized size is bigger than the data size of the ntfs inode,
 * update the data size to equal the initialized size.  In this case also set
 * the size in the ubc.
 * 
 * Then, set the data and initialized sizes in the attribute record of the
 * attribute specified by the ntfs inode @ni to the values in the ntfs inode
 * @ni.
 *
 * Thus, if @new_init_size is >= 0, both @ni and its underlying attribute have
 * their initialized size set to @new_init_size and if @new_init_size is < 0,
 * the underlying attribute initialized size is set to the initialized size of
 * the ntfs inode @ni.
 *
 * Note the caller is responsible for any zeroing that needs to happen between
 * the old initialized size and @new_init_size.
 *
 * Note when this function is called for resident attributes it requires that
 * the initialized size equals the data size as anything else does not make
 * sense for resident attributes.  Further, @new_init_size must be >= 0, i.e. a
 * specific value must be provided as the call would otherwise be pointless as
 * there is no such thing as an initialized size for resident attributes.
 *
 * Return 0 on success and errno on error.
 *
 * Locking: The caller must hold @ni->lock on the inode for writing.
 */
errno_t ntfs_attr_set_initialized_size(ntfs_inode *ni, s64 new_init_size)
{
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ntfs_attr_search_ctx *ctx;
	ATTR_RECORD *a;
	errno_t err;
	BOOL data_size_updated = FALSE;

#ifdef DEBUG
	lck_spin_lock(&ni->size_lock);
	ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, old data "
			"size 0x%llx, old initialized size 0x%llx, new "
			"initialized size 0x%llx.",
			(unsigned long long)ni->mft_no,
			(unsigned)le32_to_cpu(ni->type),
			(unsigned long long)ni->data_size,
			(unsigned long long)ni->initialized_size,
			(unsigned long long)new_init_size);
	lck_spin_unlock(&ni->size_lock);
#endif /* DEBUG */
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	/* Map, pin, and lock the mft record. */
	err = ntfs_mft_record_map(base_ni, &m);
	if (err)
		goto err;
	ctx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!ctx) {
		err = ENOMEM;
		goto unm_err;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			ctx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto put_err;
	}
	a = ctx->a;
	lck_spin_lock(&ni->size_lock);
	if (new_init_size >= 0) {
		if (new_init_size < ni->initialized_size)
			panic("%s(): new_init_size < ni->initialized_size\n",
					__FUNCTION__);
		/*
		 * If the new initialized size exceeds the data size extend the
		 * data size to cover the new initialized size.
		 */
		if (new_init_size > ni->data_size) {
			ni->data_size = new_init_size;
			if (a->non_resident)
				a->data_size = cpu_to_sle64(new_init_size);
			else {
				if (NInoNonResident(ni))
					panic("%s(): NInoNonResident(ni)\n",
							__FUNCTION__);
				if (new_init_size >> 32)
					panic("%s(): new_init_size >> 32\n",
							__FUNCTION__);
				if (new_init_size > le32_to_cpu(a->length) -
						le16_to_cpu(a->value_offset))
					panic("%s(): new_init_size > "
							"le32_to_cpu("
							"a->length) - "
							"le16_to_cpu("
							"a->value_offset)\n",
							__FUNCTION__);
				a->value_length = cpu_to_le32(new_init_size);
			}
			data_size_updated = TRUE;
			if (ni == base_ni && !S_ISDIR(ni->mode))
				NInoSetDirtySizes(ni);
		}
		ni->initialized_size = new_init_size;
	} else {
		if (!a->non_resident)
			panic("%s(): !a->non_resident\n", __FUNCTION__);
		if (ni->initialized_size > ni->data_size)
			panic("%s(): ni->initialized_size > ni->data_size\n",
					__FUNCTION__);
		new_init_size = ni->initialized_size;
	}
	if (a->non_resident) {
		if (!NInoNonResident(ni))
			panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
		a->initialized_size = cpu_to_sle64(new_init_size);
	}
	lck_spin_unlock(&ni->size_lock);
	/*
	 * If this is a directory B+tree index allocation attribute also update
	 * the sizes in the base inode.
	 */
	if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
		lck_spin_lock(&base_ni->size_lock);
		if (data_size_updated)
			base_ni->data_size = new_init_size;
		base_ni->initialized_size = new_init_size;
		lck_spin_unlock(&base_ni->size_lock);
	}
	/* Mark the mft record dirty to ensure it gets written out. */
	NInoSetMrecNeedsDirtying(ctx->ni);
put_err:
	ntfs_attr_search_ctx_put(ctx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
	if (data_size_updated) {
		if (!ubc_setsize(ni->vn, new_init_size))
			panic("%s(): ubc_setsize() failed.\n", __FUNCTION__);
	}
	if (!err)
		ntfs_debug("Done.");
	else {
err:
		ntfs_error(ni->vol->mp, "Failed (error %d).", err);
	}
	return err;
}

/**
 * ntfs_attr_extend_initialized - extend the initialized size of an attribute
 * @ni:			ntfs inode of the attribute to extend
 * @new_init_size:	requested new initialized size in bytes
 *
 * Extend the initialized size of an attribute described by the ntfs inode @ni
 * to @new_init_size bytes.  This involves zeroing any non-sparse space between
 * the old initialized size and @new_init_size both in the VM page cache and on
 * disk (if relevant complete pages are already uptodate in the VM page cache
 * then these are simply marked dirty).
 *
 * As a side-effect, the data size as well as the ubc size may be incremented
 * as, in the resident attribute case, it is tied to the initialized size and,
 * in the non-resident attribute case, it may not fall below the initialized
 * size.
 *
 * Note that if the attribute is resident, we do not need to touch the VM page
 * cache at all.  This is because if the VM page is not uptodate we bring it
 * uptodate later, when doing the write to the mft record since we then already
 * have the page mapped.  And if the page is uptodate, the non-initialized
 * region will already have been zeroed when the page was brought uptodate and
 * the region may in fact already have been overwritten with new data via
 * mmap() based writes, so we cannot just zero it.  And since POSIX specifies
 * that the behaviour of resizing a file whilst it is mmap()ped is unspecified,
 * we choose not to do zeroing and thus we do not need to touch the VM page at
 * all.
 *
 * Return 0 on success and errno on error.  In the case that an error is
 * encountered it is possible that the initialized size and/or the data size
 * will already have been incremented some way towards @new_init_size but it is
 * guaranteed that if this is the case, the necessary zeroing will also have
 * happened and that all metadata is self-consistent.
 *
 * Locking: - Caller must hold @ni->lock on the inode for writing.
 *	    - The runlist @ni must be unlocked as it is taken for writing.
 */
errno_t ntfs_attr_extend_initialized(ntfs_inode *ni, const s64 new_init_size)
{
	VCN vcn, end_vcn;
	s64 size, old_init_size, ofs;
	ntfs_volume *vol;
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	ntfs_attr_search_ctx *ctx;
	u8 *kattr;
	ntfs_rl_element *rl = NULL;
	errno_t err;
	unsigned attr_len;
	BOOL locked, write_locked, is_sparse, mark_sizes_dirty;

	lck_spin_lock(&ni->size_lock);
	if (new_init_size > ni->allocated_size)
		panic("%s(): new_init_size > ni->allocated_size\n",
				__FUNCTION__);
	size = ni->data_size;
	old_init_size = ni->initialized_size;
	lck_spin_unlock(&ni->size_lock);
	if (new_init_size <= old_init_size)
		panic("%s(): new_init_size <= old_init_size\n",
				__FUNCTION__);
	mark_sizes_dirty = write_locked = FALSE;
	vol = ni->vol;
	ntfs_debug("Entering for mft_no 0x%llx, old initialized size 0x%llx, "
			"new initialized size 0x%llx, old data size 0x%llx.",
			(unsigned long long)ni->mft_no,
			(unsigned long long)old_init_size,
			(unsigned long long)new_init_size,
			(unsigned long long)size);
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	/* Use goto to reduce indentation and we need the label below anyway. */
	if (NInoNonResident(ni))
		goto do_non_resident_extend;
	if (old_init_size != size)
		panic("%s(): old_init_size != size\n", __FUNCTION__);
	/* Map, pin, and lock the mft record. */
	err = ntfs_mft_record_map(base_ni, &m);
	if (err)
		goto err;
	ctx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!ctx) {
		err = ENOMEM;
		goto unm_err;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			ctx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto put_err;
	}
	a = ctx->a;
	if (a->non_resident)
		panic("%s(): a->non_resident\n", __FUNCTION__);
	/* The total length of the attribute value. */
	attr_len = le32_to_cpu(a->value_length);
	if (size != attr_len)
		panic("%s(): size != attr_len\n", __FUNCTION__);
	/*
	 * Do the zeroing in the mft record and update the attribute size in
	 * the mft record.
	 */
	kattr = (u8*)a + le16_to_cpu(a->value_offset);
	bzero(kattr + attr_len, new_init_size - attr_len);
	a->value_length = cpu_to_le32((u32)new_init_size);
	/* Update the sizes in the ntfs inode as well as the ubc size. */
	lck_spin_lock(&ni->size_lock);
	ni->initialized_size = ni->data_size = size = new_init_size;
	lck_spin_unlock(&ni->size_lock);
	/* Mark the mft record dirty to ensure it gets written out. */
	NInoSetMrecNeedsDirtying(ctx->ni);
	ntfs_attr_search_ctx_put(ctx);
	ntfs_mft_record_unmap(base_ni);
	ubc_setsize(ni->vn, new_init_size);
	mark_sizes_dirty = TRUE;
	goto done;
do_non_resident_extend:
	/*
	 * If the new initialized size @new_init_size exceeds the current data
	 * size we need to extend the file size to the new initialized size.
	 */
	if (new_init_size > size) {
		/* Map, pin, and lock the mft record. */
		err = ntfs_mft_record_map(base_ni, &m);
		if (err)
			goto err;
		ctx = ntfs_attr_search_ctx_get(base_ni, m);
		if (!ctx) {
			err = ENOMEM;
			goto unm_err;
		}
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0,
				NULL, 0, ctx);
		if (err) {
			if (err == ENOENT)
				err = EIO;
			goto put_err;
		}
		a = ctx->a;
		if (!a->non_resident)
			panic("%s(): !a->non_resident\n", __FUNCTION__);
		if (size != sle64_to_cpu(a->data_size))
			panic("%s(): size != sle64_to_cpu(a->data_size)\n",
					__FUNCTION__);
		size = new_init_size;
		lck_spin_lock(&ni->size_lock);
		ni->data_size = new_init_size;
		lck_spin_unlock(&ni->size_lock);
		a->data_size = cpu_to_sle64(new_init_size);
		/* Mark the mft record dirty to ensure it gets written out. */
		NInoSetMrecNeedsDirtying(ctx->ni);
		ntfs_attr_search_ctx_put(ctx);
		ntfs_mft_record_unmap(base_ni);
		mark_sizes_dirty = TRUE;
		ubc_setsize(ni->vn, new_init_size);
	}
	/*
	 * If the attribute is not sparse we can simply map each page between
	 * the old initialized size and the new initialized size which takes
	 * care of any needed zeroing and then unmap the page again marking it
	 * dirty so the VM later causes it to be written out.
	 *
	 * If the file is sparse on the other hand things are more complicated
	 * because we want to skip any sparse regions because mapping a sparse
	 * page and then unmapping it again and marking it dirty would cause
	 * the hole to be filled when the page is written out.
	 *
	 * Thus for sparse files we walk the runlist before we start doing
	 * anything and check whether there are any sparse regions between the
	 * old initialized size and the new initialized size.  If there are no
	 * sparse regions we can simply proceed as if this attribute was not
	 * sparse.
	 *
	 * If there are sparse regions then we ensure that all runlist
	 * fragments between the old initialized size and new initialized size
	 * are mapped and then we hold the runlist lock shared and walk the
	 * runlist and only for non-sparse regions do we do the page mapping,
	 * unmapping and dirtying.
	 */
	ofs = old_init_size & ~PAGE_MASK_64;
	write_locked = locked = FALSE;
	is_sparse = (NInoSparse(ni));
	if (is_sparse) {
		BOOL have_holes = FALSE;

		locked = TRUE;
		lck_rw_lock_shared(&ni->rl.lock);
		vcn = ofs >> vol->cluster_size_shift;
		end_vcn = (new_init_size + vol->cluster_size_mask) >>
				vol->cluster_size_shift;
retry_remap:
		rl = ni->rl.rl;
		if (!ni->rl.elements || vcn < rl->vcn || !rl->length) {
map_vcn:
			if (!write_locked) {
				write_locked = TRUE;
				if (!lck_rw_lock_shared_to_exclusive(
						&ni->rl.lock)) {
					lck_rw_lock_exclusive(&ni->rl.lock);
					goto retry_remap;
				}
			}
			/* Need to map the runlist fragment containing @vcn. */
			err = ntfs_map_runlist_nolock(ni, vcn, NULL);
			if (err) {
				ntfs_error(vol->mp, "Failed to map runlist "
						"fragment (error %d).", err);
				if (err == EINVAL)
					err = EIO;
				goto unl_err;
			}
			rl = ni->rl.rl;
			if (!ni->rl.elements || vcn < rl->vcn || !rl->length)
				panic("%s(): !ni->rl.elements || "
						"vcn < rl[0].vcn || "
						"!rl->length\n", __FUNCTION__);
		}
		/* Seek to the runlist element containing @vcn. */
		while (rl->length && vcn >= rl[1].vcn)
			rl++;
		do {
			/*
			 * If this run is not mapped map it now and start again
			 * as the runlist will have been updated.
			 */
			if (rl->lcn == LCN_RL_NOT_MAPPED) {
				vcn = rl->vcn;
				goto map_vcn;
			}
			/* If this run is not valid abort with an error. */
			if (!rl->length || rl->lcn < LCN_HOLE)
				goto rl_err;
			if (rl->lcn == LCN_HOLE) {
				have_holes = TRUE;
				/*
				 * If the current initialized size is inside
				 * the current run we can move the initialized
				 * size forward to the end of this run taking
				 * care not to go beyond the new initialized
				 * size.
				 *
				 * Note we also have to take care not to move
				 * the initialized size backwards thus we only
				 * have to update the initialized size if the
				 * current offset is above the old initialized
				 * size.
				 */
				if (ofs >> vol->cluster_size_shift >= rl->vcn) {
					ofs = rl[1].vcn <<
							vol->cluster_size_shift;
					if (ofs > old_init_size) {
						if (ofs > new_init_size)
							ofs = new_init_size;
						lck_spin_lock(&ni->size_lock);
						ni->initialized_size = ofs;
						lck_spin_unlock(&ni->size_lock);
						if (ofs == new_init_size)
							goto update_done;
					}
				}
			}
			/* Proceed to the next run. */
			rl++;
		} while (rl->vcn < end_vcn);
		/*
		 * If we encountered sparse regions in the runlist then we need
		 * to keep the runlist lock shared.
		 *
		 * If there were no sparse regions we do not need the runlist
		 * lock at all any more so we release it and we pretend this
		 * attribute is not sparse.
		 */
		if (have_holes) {
			if (write_locked) {
				lck_rw_lock_exclusive_to_shared(&ni->rl.lock);
				write_locked = FALSE;
			}
			/*
			 * We may have moved @ofs forward in which case it will
			 * be cluster aligned instead of page aligned and the
			 * two are not equal when the cluster size is less than
			 * the page size so we need to align at @ofs to the
			 * page size again.
			 */
			ofs &= ~PAGE_MASK_64;
			rl = ni->rl.rl;
		} else {
			if (write_locked)
				lck_rw_unlock_exclusive(&ni->rl.lock);
			else
				lck_rw_unlock_shared(&ni->rl.lock);
			locked = FALSE;
			is_sparse = FALSE;
		}
	}
	do {
		/*
		 * If the file is sparse, check if the current page is
		 * completely sparse and if so skip it.
		 *
		 * Otherwise take care of zeroing the uninitialized region.
		 */
		if (is_sparse) {
			/* We need to update @vcn to the current offset @ofs. */
			vcn = ofs >> vol->cluster_size_shift;
			/* Determine the first VCN outside the current page. */
			end_vcn = (ofs + PAGE_SIZE + vol->cluster_size_mask) >>
					vol->cluster_size_shift;
			/* Seek to the runlist element containing @vcn. */
			while (rl->length && vcn >= rl[1].vcn)
				rl++;
			/* If this run is not valid abort with an error. */
			if (!rl->length || rl->lcn < LCN_HOLE)
				goto rl_err;
			/*
			 * @rl is the runlist element containing @ofs, the
			 * current initialized size, and the current @vcn.
			 *
			 * Check whether the current page is completely sparse.
			 * This is complicated slightly by the fact that a page
			 * can span multiple clusters when the cluster size is
			 * less than the page size.
			 *
			 * As an optimization when a sparse run spans more than
			 * one page we forward both @ofs and the initialized
			 * size to the end of the run (ensuring it is page
			 * aligned).
			 */
			do {
				if (rl->lcn >= 0) {
					/* This page is not entirely sparse. */
					goto on_disk_page;
				}
				/* Proceed to the next run. */
				rl++;
				vcn = rl->vcn;
			} while (vcn < end_vcn && rl->length);
			/*
			 * The page is entirely sparse.
			 *
			 * Check how many pages are entirely sparse and move
			 * the initialized size up to the end of the sparse
			 * region ensuring we maintain page alignment.
			 */
			while (rl->lcn == LCN_HOLE && rl->length)
				rl++;
			ofs = (rl->vcn << vol->cluster_size_shift) &
					~PAGE_MASK_64;
			/*
			 * Update the initialized size in the ntfs inode.  This
			 * is enough to make ntfs_vnop_pageout() work.  We
			 * could postpone this until we actually are going to
			 * unmap a page or we have reached the end of the
			 * region to be initialized but we do it now to
			 * minimize our impact on processes that are performing
			 * concurrent mmap() based writes to this attribute.
			 *
			 * FIXME: This is not actually true as the caller is
			 * holding the ntfs inode lock for writing thus no
			 * pageouts on this inode can occur at all.  We
			 * probably need to fix this so we cannot bring the
			 * system out of memory.
			 */
			if (ofs > new_init_size)
				ofs = new_init_size;
			lck_spin_lock(&ni->size_lock);
			ni->initialized_size = ofs;
			lck_spin_unlock(&ni->size_lock);
		} else /* if (!is_sparse) */ {
			upl_t upl;
			upl_page_info_array_t pl;

on_disk_page:
			/*
			 * Read the page.  If the page is not present,
			 * ntfs_page_map() will zero the uninitialized/sparse
			 * regions for us.
			 *
			 * TODO: An optimization would be to do things by hand
			 * taking advantage of dealing with multiple pages at
			 * once instead of working one page at a time.
			 *
			 * FIXME: We are potentially creating a lot of dirty
			 * pages here and since the caller is holding the ntfs
			 * inode lock for writing no pageouts on this inode can
			 * occur at all.  We probably need to fix this so we
			 * cannot bring the system out of memory.
			 */
// TODO: This should never happen.  Just adding it so we can detect if we were
// going to deadlock.  If it triggers need to fix it in the code so it does
// not.  Or perhaps just remove the warning and use this as the solution.
			if (locked && write_locked) {
				write_locked = FALSE;
				lck_rw_lock_exclusive_to_shared(&ni->rl.lock);
				ntfs_warning(vol->mp, "Switching runlist lock "
						"to shared to avoid "
						"deadlock.");
			}
			err = ntfs_page_map(ni, ofs, &upl, &pl, &kattr, TRUE);
			if (err)
				goto unl_err;
			/*
			 * Update the initialized size in the ntfs inode.  This
			 * is enough to make ntfs_vnop_pageout() work.
			 */
			ofs += PAGE_SIZE;
			if (ofs > new_init_size)
				ofs = new_init_size;
			lck_spin_lock(&ni->size_lock);
			ni->initialized_size = ofs;
			lck_spin_unlock(&ni->size_lock);
			/* Set the page dirty so it gets written out. */
			ntfs_page_unmap(ni, upl, pl, TRUE);
		}
	} while (ofs < new_init_size);
	lck_spin_lock(&ni->size_lock);
	if (ni->initialized_size != new_init_size)
		panic("%s(): ni->initialized_size != new_init_size\n",
				__FUNCTION__);
	lck_spin_unlock(&ni->size_lock);
update_done:
	/* If we are holding the runlist lock, release it now. */
	if (locked) {
		if (write_locked)
			lck_rw_unlock_exclusive(&ni->rl.lock);
		else
			lck_rw_unlock_shared(&ni->rl.lock);
		locked = FALSE;
	}
	/* Bring up to date the initialized_size in the attribute record. */
	err = ntfs_attr_set_initialized_size(ni, -1);
	if (err)
		goto unl_err;
done:
	/*
	 * If we have modified the size of the base inode, cause the sizes to
	 * be written to all the directory index entries pointing to the base
	 * inode when the inode is written to disk.
	 */
	if (mark_sizes_dirty && ni == base_ni && !S_ISDIR(ni->mode))
		NInoSetDirtySizes(ni);
	ntfs_debug("Done, new initialized size 0x%llx, new data size 0x%llx.",
			(unsigned long long)new_init_size,
			(unsigned long long)size);
	return 0;
rl_err:
	ntfs_error(vol->mp, "Runlist is corrupt.  Unmount and run chkdsk.");
	NVolSetErrors(vol);
	err = EIO;
unl_err:
	if (locked) {
		if (write_locked)
			lck_rw_unlock_exclusive(&ni->rl.lock);
		else
			lck_rw_unlock_shared(&ni->rl.lock);
	}
	lck_spin_lock(&ni->size_lock);
	ni->initialized_size = old_init_size;
	lck_spin_unlock(&ni->size_lock);
	goto err;
put_err:
	ntfs_attr_search_ctx_put(ctx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
err:
	ntfs_debug("Failed (error %d).", err);
	return err;
}

/**
 * ntfs_attr_sparse_set - switch an attribute to be sparse
 * @base_ni:	base ntfs inode to which the attribute belongs
 * @ni:		ntfs inode of attribute which to cause to be sparse
 * @ctx:	attribute search context describing the attribute to work on
 *
 * Switch the non-sparse, base attribute described by @ni and @ctx belonging to
 * the base ntfs inode @base_ni to be sparse.
 *
 * Return 0 on success and errno on error.
 *
 * Note that the attribute may be moved to be able to extend it when adding the
 * compressed size.  Thus any cached values of @ctx->ni, @ctx->m, and @ctx->a
 * are invalid after this function returns.
 */
static errno_t ntfs_attr_sparse_set(ntfs_inode *base_ni, ntfs_inode *ni,
		ntfs_attr_search_ctx *ctx)
{
#if 0
	VCN highest_vcn, stop_vcn;
	ntfs_volume *vol;
	MFT_RECORD *base_m, *m;
	ATTR_RECORD *a;
	ntfs_rl_element *rl;
	ntfs_inode *eni;
	ATTR_LIST_ENTRY *al_entry;
	unsigned name_size, mp_ofs, mp_size, al_entry_len, new_al_size;
	unsigned new_al_alloc;
	errno_t err;
	BOOL rewrite;
#endif

	ntfs_debug("Entering for mft_no 0x%llx, type 0x%x, name_len 0x%x.",
			(unsigned long long)base_ni->mft_no,
			(unsigned)le32_to_cpu(ni->type), ni->name_len);
	return ENOTSUP;
#if 0
	vol = base_ni->vol;
	base_m = base_ni->m;
	m = ctx->m;
	a = ctx->a;
	rewrite = FALSE;
	/*
	 * We should only be called for non-sparse, non-resident, $DATA
	 * attributes.
	 */
	if (a->type != AT_DATA || !NInoNonResident(ni) || !a->non_resident ||
			NInoSparse(ni) || a->flags & ATTR_IS_SPARSE)
		panic("%s(): a->type != AT_DATA || !NInoNonResident(ni) || "
				"!a->non_resident || NInoSparse(ni) || "
				"a->flags & ATTR_IS_SPARSE\n", __FUNCTION__);
	/*
	 * If the attribute is not compressed either, we need to add the
	 * compressed size to the attribute record and to switch all relevant
	 * fields to match.
	 */
	if (NInoCompressed(ni))
		goto is_compressed;
	if (a->flags & ATTR_IS_COMPRESSED)
		panic("%s(): a->flags & ATTR_IS_COMPRESSED)\n", __FUNCTION__);
retry_attr_rec_resize:
	err = ntfs_attr_record_resize(m, a, le32_to_cpu(a->length) +
			sizeof(a->compressed_size));
	if (!err) {
		/*
		 * Move everything at the offset of the compressed size to make
		 * space for the compressed size.
		 */
		memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size) +
				sizeof(a->compressed_size), (u8*)a +
				offsetof(ATTR_RECORD, compressed_size),
				le32_to_cpu(a->length) - offsetof(ATTR_RECORD,
				compressed_size));
		/*
		 * Update the name offset to match the moved data.  If there is
		 * no name then set the name offset to the correct position
		 * instead of adding to a potentially incorrect value.
		 */
		if (a->name_length)
			a->name_offset = cpu_to_le16(
					le16_to_cpu(a->name_offset) +
					sizeof(a->compressed_size));
		else
			a->name_offset = const_cpu_to_le16(
					offsetof(ATTR_RECORD,
					compressed_size) +
					sizeof(a->compressed_size));
		/* Update the mapping pairs offset to its new location. */
		mp_ofs = le16_to_cpu(a->mapping_pairs_offset) +
				sizeof(a->compressed_size);
		goto set_compressed_size;
	}
	/*
	 * There is not enough space in the mft record.
	 *
	 * We need to add an attribute list attribute if it is not already
	 * present.
	 */
	if (!NInoAttrList(base_ni)) {
		err = ntfs_attr_list_add(base_ni, base_m, ctx);
		if (err || ctx->is_error) {
			if (!err)
				err = ctx->error;
			ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx "
					"(error %d).", ctx->is_error ?
					"remap extent mft record of" :
					"add attribute list attribute to",
					(unsigned long long)base_ni->mft_no,
					err);
			return err;
		}
		/*
		 * The attribute location will have changed so update it from
		 * the search context.
		 */
		m = ctx->m;
		a = ctx->a;
		/*
		 * Retry the original attribute record resize as we may now
		 * have enough space to add the compressed size to the
		 * attribute record.
		 *
		 * This can for example happen when the attribute was moved out
		 * to an extent mft record which has much more free space than
		 * the base mft record had.
		 */
		goto retry_attr_rec_resize;
	}
	/*
	 * If this is not the only attribute record in the mft record then move
	 * it out to a new extent mft record which is guaranteed to generate
	 * enough space to add the compressed size to the attribute record.
	 */
	if (!ntfs_attr_record_is_only_one(m, a)) {
		lck_rw_lock_shared(&base_ni->attr_list_rl.lock);
		err = ntfs_attr_record_move(ctx);
		lck_rw_unlock_shared(&base_ni->attr_list_rl.lock);
		if (err) {
			ntfs_error(vol->mp, "Failed to move attribute extent "
					"from mft record 0x%llx to an extent "
					"mft record (error %d).",
					(unsigned long long)ctx->ni->mft_no,
					err);
			/*
			 * We could try to remove the attribute list attribute
			 * if we added it above but this will require
			 * attributes to be moved back into the base mft record
			 * from extent mft records so is a lot of work and
			 * given we are in an error code path and given that it
			 * is ok to just leave the inode with an attribute list
			 * attribute we do not bother and just bail out.
			 */
			return err;
		}
		/*
		 * The attribute location will have changed so update it from
		 * the search context.
		 */
		m = ctx->m;
		a = ctx->a;
		/*
		 * Retry the original attribute record resize as we will now
		 * have enough space to add the compressed size to the
		 * attribute record.
		 */
		goto retry_attr_rec_resize;
	}
	/*
	 * This is the only attribute in the mft record thus there is nothing
	 * to gain by moving it to another extent mft record.  So to generate
	 * space, we allocate a new extent mft record, create a new extent
	 * attribute record in it and use it to catch the overflow mapping
	 * pairs array data generated by the fact that we have added the
	 * compressed size to the base extent.
	 *
	 * TODO: We could instead iterate over all existing extent attribute
	 * records and rewrite the entire mapping pairs array but this could
	 * potentially be a lot of overhead.  On the other hand it would be an
	 * infrequent event thus the overhead may be worth it in the long term
	 * as it will generate better packed metadata.  For now we choose the
	 * simpler approach of just doing the splitting into a new extent
	 * attribute record.
	 *
	 * As we are going to rewrite the mapping pairs array we need to make
	 * sure we have decompressed the mapping pairs from the base attribute
	 * extent and have them cached in the runlist.
	 */
	if (!ni->rl.elements || ni->rl.rl->lcn == LCN_RL_NOT_MAPPED) {
		err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
		if (err) {
			ntfs_error(vol->mp, "Mapping of the base runlist "
					"fragment failed (error %d).", err);
			if (err != ENOMEM)
				err = EIO;
			return err;
		}
	}
	rewrite = TRUE;
	/*
	 * Now add the compressed size so we can unmap the mft record of the
	 * base attribute extent if it is an extent mft record.
	 *
	 * First, move the name if present to its new location and update the
	 * name offset to match the new location.
	 */
	name_size = a->name_length * sizeof(ntfschar);
	if (name_size)
		memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size) +
				sizeof(a->compressed_size), (u8*)a +
				le16_to_cpu(a->name_offset), name_size);
	a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD,
			compressed_size) + sizeof(a->compressed_size));
	/* Update the mapping pairs offset to its new location. */
	mp_ofs = (offsetof(ATTR_RECORD, compressed_size) +
			sizeof(a->compressed_size) + name_size + 7) & ~7;
set_compressed_size:
	a->mapping_pairs_offset = cpu_to_le16(mp_ofs);
	/*
	 * Set the compression unit to 0 or 4 depending on the NTFS volume
	 * version.  FIXME: We know that NT4 uses 4 whilst XPSP2 uses 0 and we
	 * do not know what 2k uses so we assume 2k is the same as XPSP2.
	 */
	if (vol->major_ver > 1) {
		a->compression_unit = 0;
		ni->compression_block_size = 0;
		ni->compression_block_clusters =
				ni->compression_block_size_shift = 0;
	} else {
		a->compression_unit = NTFS_COMPRESSION_UNIT;
		ni->compression_block_size = 1U << (NTFS_COMPRESSION_UNIT +
				vol->cluster_size_shift);
		ni->compression_block_size_shift =
				ffs(ni->compression_block_size) - 1;
		ni->compression_block_clusters = 1U << NTFS_COMPRESSION_UNIT;
	}
	lck_spin_lock(&ni->size_lock);
	ni->compressed_size = ni->allocated_size;
	a->compressed_size = a->allocated_size;
	lck_spin_unlock(&ni->size_lock);
is_compressed:
	/* Mark both the attribute and the ntfs inode as sparse. */
	a->flags |= ATTR_IS_SPARSE;
	NInoSetSparse(ni);
	/*
	 * If this is the unnamed $DATA attribute, need to set the sparse flag
	 * in the standard information attribute and in the directory entries,
	 * too.
	 */
	if (ni == base_ni) {
		ni->file_attributes |= FILE_ATTR_SPARSE_FILE;
		NInoSetDirtyFileAttributes(ni);
	}
	/* If we do not need to rewrite the mapping pairs array we are done. */
	if (!rewrite)
		goto done;
	/*
	 * Determine the size of the mapping pairs array needed to fit all the
	 * runlist elements that were stored in the base attribute extent
	 * before we added the compressed size to the attribute record.
	 */
	highest_vcn = sle64_to_cpu(a->highest_vcn);
	err = ntfs_get_size_for_mapping_pairs(vol, ni->rl.elements ?
			ni->rl.rl : NULL, 0, highest_vcn, &mp_size);
	if (err) {
		ntfs_error(vol->mp, "Failed to get size for mapping pairs "
				"array (error %d).", err);
		goto undo1;
	}
	/* Write the mapping pairs array. */
	err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs,
			le32_to_cpu(a->length) - mp_ofs, ni->rl.elements ?
			ni->rl.rl : NULL, 0, highest_vcn, &stop_vcn);
	if (err && err != ENOSPC) {
		ntfs_error(vol->mp, "Failed to rebuild mapping pairs array "
				"(error %d).", err);
		goto undo1;
	}
	/* If by some miracle it all fitted we are done. */
	if (!err)
		goto done;
	/* Update the highest vcn to the new value. */
	a->highest_vcn = cpu_to_sle64(stop_vcn - 1);
	/*
	 * If the base attribute extent is in an extent mft record mark it
	 * dirty so it gets written back and unmap the extent mft record so we
	 * can allocate the new extent mft record.
	 */
	if (ctx->ni != base_ni) {
		NInoSetMrecNeedsDirtying(ctx->ni);
		ntfs_extent_mft_record_unmap(ctx->ni);
		/* Make the search context safe. */
		ctx->ni = base_ni;
	}
	/*
	 * Get the runlist element containing the lowest vcn for the new
	 * attribute record, i.e. @stop_vcn.
	 *
	 * This cannot fail as we know the runlist is ok and the runlist
	 * fragment containing @stop_vcn is mapped.
	 */
	rl = NULL;
	if (ni->rl.elements) {
		rl = ntfs_rl_find_vcn_nolock(ni->rl.rl, stop_vcn);
		if (!rl)
			panic("%s(): Memory corruption detected.\n",
					__FUNCTION__);
	}
	/*
	 * Determine the size of the mapping pairs array needed to fit all the
	 * remaining runlist elements that were stored in the base attribute
	 * extent before we added the compressed size to the attribute record
	 * but did now not fit.
	 */
	err = ntfs_get_size_for_mapping_pairs(vol, rl, stop_vcn, highest_vcn,
			&mp_size);
	if (err) {
		ntfs_error(vol->mp, "Failed to get size for mapping pairs "
				"array (error %d).", err);
		goto undo2;
	}
	/*
	 * We now need to allocate a new extent mft record, attach it to the
	 * base ntfs inode and set up the search context to point to it, then
	 * insert the new attribute record into it.
	 */
	err = ntfs_mft_record_alloc(vol, NULL, NULL, ni, &eni, &m, &a);
	if (err) {
		ntfs_error(vol->mp, "Failed to allocate a new extent mft "
				"record (error %d).", err);
		goto undo2;
	}
	ctx->ni = eni;
	ctx->m = m;
	ctx->a = a;
	/*
	 * Calculate the offset into the new attribute at which the mapping
	 * pairs array begins.  The mapping pairs array is placed after the
	 * name aligned to an 8-byte boundary which in turn is placed
	 * immediately after the non-resident attribute record itself.
	 *
	 * Note that extent attribute records do not have the compressed size
	 * field in their attribute records.
	 */
	mp_ofs = (offsetof(ATTR_RECORD, compressed_size) + name_size + 7) & ~7;
	/*
	 * Make space for the new attribute extent.  This cannot fail as we now
	 * have an empty mft record which by definition can hold a non-resident
	 * attribute record with just a small mapping pairs array.
	 */
	err = ntfs_attr_record_make_space(m, a, mp_ofs + mp_size);
	if (err)
		panic("%s(): err (ntfs_attr_record_make_space())\n",
				__FUNCTION__);
	/*
	 * Now setup the new attribute record.  The entire attribute has been
	 * zeroed and the length of the attribute record has been set.
	 *
	 * Before we proceed with setting up the attribute, add an attribute
	 * list attribute entry for the created attribute extent.
	 */
	al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry +
			le16_to_cpu(ctx->al_entry->length));
	al_entry_len = (offsetof(ATTR_LIST_ENTRY, name) + name_size + 7) & ~7;
	new_al_size = base_ni->attr_list_size + al_entry_len;
	/* Out of bounds checks. */
	if ((u8*)al_entry < base_ni->attr_list || (u8*)al_entry >
			base_ni->attr_list + new_al_size || (u8*)al_entry +
			al_entry_len > base_ni->attr_list + new_al_size) {
		/* Inode is corrupt. */
		ntfs_error(vol->mp, "Inode 0x%llx is corrupt.  Run chkdsk.",
				(unsigned long long)base_ni->mft_no);
		err = EIO;
		goto undo3;
	}
	err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST, new_al_size);
	if (err) {
		if (err == ERANGE) {
			ntfs_error(vol->mp, "Attribute list attribute would "
					"become to large.  You need to "
					"defragment your volume and then try "
					"again.");
			err = ENOSPC;
		} else {
			ntfs_error(vol->mp, "Attribute list attribute is "
					"unknown on the volume.  The volume "
					"is corrupt.  Run chkdsk.");
			NVolSetErrors(vol);
			err = EIO;
		}
		goto undo3;
	}
	/*
	 * Reallocate the memory buffer if needed and create space for the new
	 * entry.
	 */
	new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) &
			~(NTFS_ALLOC_BLOCK - 1);
	if (new_al_alloc > base_ni->attr_list_alloc) {
		u8 *tmp, *al, *al_end;
		unsigned al_entry_ofs;

		tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag);
		if (!tmp) {
			ntfs_error(vol->mp, "Not enough memory to extend the "
					"attribute list attribute.");
			err = ENOMEM;
			goto undo3;
		}
		al = base_ni->attr_list;
		al_entry_ofs = (u8*)al_entry - al;
		al_end = al + base_ni->attr_list_size;
		memcpy(tmp, al, al_entry_ofs);
		if ((u8*)al_entry < al_end)
			memcpy(tmp + al_entry_ofs + al_entry_len, al +
					al_entry_ofs, base_ni->attr_list_size -
					al_entry_ofs);
		al_entry = ctx->al_entry = (ATTR_LIST_ENTRY*)(tmp +
				al_entry_ofs);
		OSFree(base_ni->attr_list, base_ni->attr_list_alloc,
				ntfs_malloc_tag);
		base_ni->attr_list_alloc = new_al_alloc;
		base_ni->attr_list = tmp;
	} else if ((u8*)al_entry < base_ni->attr_list +
			base_ni->attr_list_size)
		memmove((u8*)al_entry + al_entry_len, al_entry,
				base_ni->attr_list_size - ((u8*)al_entry -
				base_ni->attr_list));
	base_ni->attr_list_size = new_al_size;
	/* Set up the attribute extent and the attribute list entry. */
	al_entry->type = a->type = ni->type;
	al_entry->length = cpu_to_le16(al_entry_len);
	a->non_resident = 1;
	al_entry->name_length = a->name_length = ni->name_len;
	a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD,
			compressed_size));
	al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name);
	al_entry->instance = a->instance = m->next_attr_instance;
	/*
	 * Increment the next attribute instance number in the mft record as we
	 * consumed the old one.
	 */
	m->next_attr_instance = cpu_to_le16(
			(le16_to_cpu(m->next_attr_instance) + 1) & 0xffff);
	al_entry->lowest_vcn = a->lowest_vcn = cpu_to_sle64(stop_vcn);
	a->highest_vcn = cpu_to_sle64(highest_vcn);
	al_entry->mft_reference = MK_LE_MREF(eni->mft_no, eni->seq_no);
	a->mapping_pairs_offset = cpu_to_le16(mp_ofs);
	/* Copy the attribute name into place. */
	if (name_size) {
		memcpy((u8*)a + offsetof(ATTR_RECORD, compressed_size),
				ni->name, name_size);
		memcpy(&al_entry->name, ni->name, name_size);
	}
	/* For tidyness, zero out the unused space. */
	if (al_entry_len > offsetof(ATTR_LIST_ENTRY, name) + name_size)
		memset((u8*)al_entry + offsetof(ATTR_LIST_ENTRY, name) +
				name_size, 0, al_entry_len -
				(offsetof(ATTR_LIST_ENTRY, name) + name_size));
	/*
	 * Extend the attribute list attribute and copy in the modified value
	 * from the cache.
	 */
	err = ntfs_attr_list_sync_extend(base_ni, base_m,
			(u8*)al_entry - base_ni->attr_list, ctx);
	if (err || ctx->is_error) {
		/*
		 * If @ctx->is_error indicates error this is fatal as we cannot
		 * build the mapping pairs array into it as it is not mapped.
		 *
		 * However, we may still be able to recover from this situation
		 * by freeing the extent mft record and thus deleting the
		 * attribute record.  This only works when this is the only
		 * attribute record in the mft record and when we just created
		 * this extent attribute record.  We can easily determine if
		 * this is the only attribute in the mft record by scanning
		 * through the cached attribute list attribute.
		 */
		if (!err)
			err = ctx->error;
		ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error %d).",
				ctx->is_error ?  "remap extent mft record of" :
				"extend and sync attribute list attribute to",
				(unsigned long long)base_ni->mft_no, err);
		goto undo4;
	}
	/*
	 * Finally, proceed to building the mapping pairs array into the
	 * attribute record.
	 */
	err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs,
			le32_to_cpu(a->length) - mp_ofs, rl, stop_vcn,
			highest_vcn, &stop_vcn);
	if (err && err != ENOSPC) {
		ntfs_error(vol->mp, "Failed to rebuild mapping pairs array "
				"(error %d).", err);
		goto undo5;
	}
	/*
	 * We must have fully rebuilt the mapping pairs array as we made sure
	 * there is enough space.
	 */
	if (err || stop_vcn != highest_vcn + 1)
		panic("%s(): err || stop_vcn != highest_vcn + 1\n",
				__FUNCTION__);
	/*
	 * If the attribute extent is in an extent mft record mark it dirty so
	 * it gets written back and unmap the extent mft record so we can map
	 * the mft record containing the base extent again.
	 */
	if (eni != base_ni) {
		NInoSetMrecNeedsDirtying(eni);
		ntfs_extent_mft_record_unmap(eni);
		/* Make the search context safe. */
		ctx->ni = base_ni;
	}
	/*
	 * Look up the base attribute extent again so we restore the search
	 * context as the caller expects it to be.
	 */
	ntfs_attr_search_ctx_reinit(ctx);
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			ctx);
	if (err) {
		ntfs_error(vol->mp, "Re-lookup of first attribute extent "
				"failed (error %d).", err);
		if (err == ENOENT)
			err = EIO;
		goto undo6;
	}
done:
	ntfs_debug("Done.");
	return 0;
// TODO: HERE:
undo6:
undo5:
undo4:
undo3:
undo2:
undo1:
	panic("%s(): TODO!\n", __FUNCTION__);
	return err;
#endif
}

/**
 * ntfs_attr_sparse_clear - switch an attribute to not be sparse any more
 * @base_ni:	base ntfs inode to which the attribute belongs
 * @ni:		ntfs inode of attribute which to cause not to be sparse
 * @ctx:	attribute search context describing the attribute to work on
 *
 * Switch the sparse attribute described by @ni and @ctx belonging to the base
 * ntfs inode @base_ni to not be sparse any more.
 *
 * This function cannot fail.
 */
static void ntfs_attr_sparse_clear(ntfs_inode *base_ni, ntfs_inode *ni,
		ntfs_attr_search_ctx *ctx)
{
	ATTR_RECORD *a;
	
	a = ctx->a;
	/*
	 * We should only be called for sparse, non-resident, $DATA attributes.
	 */
	if (a->type != AT_DATA || !NInoNonResident(ni) || !a->non_resident ||
			!NInoSparse(ni) || !(a->flags & ATTR_IS_SPARSE))
		panic("%s(): a->type != AT_DATA || !NInoNonResident(ni) || "
				"!a->non_resident || !NInoSparse(ni) || "
				"!(a->flags & ATTR_IS_SPARSE)\n", __FUNCTION__);
	/*
	 * If the attribute is not compressed we need to remove the compressed
	 * size from the attribute record and to switch all relevant fields to
	 * match.
	 */
	if (!NInoCompressed(ni)) {
		errno_t err;

		if (a->flags & ATTR_IS_COMPRESSED)
			panic("%s(): a->flags & ATTR_IS_COMPRESSED)\n",
					__FUNCTION__);
		/*
		 * Move everything after the compressed size forward to the
		 * offset of the compressed size thus deleting the compressed
		 * size.
		 */
		memmove((u8*)a + offsetof(ATTR_RECORD, compressed_size),
				(u8*)a + offsetof(ATTR_RECORD,
				compressed_size) + sizeof(a->compressed_size),
				le32_to_cpu(a->length) - (offsetof(ATTR_RECORD,
				compressed_size) + sizeof(a->compressed_size)));
		/*
		 * Update the name offset and the mapping pairs offset to match
		 * the moved data.  If there is no name then set the name
		 * offset to the correct position instead of subtracting from a
		 * potentially incorrect value.
		 */
		if (!a->name_length)
			a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD,
					compressed_size));
		else
			a->name_offset = cpu_to_le16(
					le16_to_cpu(a->name_offset) -
					sizeof(a->compressed_size));
		a->mapping_pairs_offset = cpu_to_le16(
				le16_to_cpu(a->mapping_pairs_offset) -
				sizeof(a->compressed_size));
		/* Set the compression unit to 0. */
		a->compression_unit = 0;
		lck_spin_lock(&ni->size_lock);
		ni->compressed_size = 0;
		lck_spin_unlock(&ni->size_lock);
		/* Clear the other related fields. */
		ni->compression_block_size = 0;
		ni->compression_block_clusters =
				ni->compression_block_size_shift = 0;
		/*
		 * Finally shrink the attribute record to reflect the removal
		 * of the compressed size.  Note, this cannot fail since we are
		 * making the attribute smaller thus by definition there is
		 * enough space to do so.
		 */
		err = ntfs_attr_record_resize(ctx->m, a,
				le32_to_cpu(a->length) -
				sizeof(a->compressed_size));
		if (err)
			panic("%s(): err\n", __FUNCTION__);
	}
	/* Mark both the attribute and the ntfs inode as non-sparse. */
	a->flags &= ~ATTR_IS_SPARSE;
	NInoClearSparse(ni);
	/*
	 * If this is the unnamed $DATA attribute, need to clear the sparse
	 * flag in the standard information attribute and in the directory
	 * entries, too.
	 */
	if (ni == base_ni) {
		ni->file_attributes &= ~FILE_ATTR_SPARSE_FILE;
		NInoSetDirtyFileAttributes(ni);
	}
}

/**
 * ntfs_attr_instantiate_holes - instantiate the holes in an attribute region
 * @ni:		ntfs inode of the attribute whose holes to instantiate
 * @start:	start offset in bytes at which to begin instantiating holes
 * @end:	end offset in bytes at which to stop instantiating holes
 * @new_end:	return the offset at which we stopped instantiating holes
 * @atomic:	if true must complete the entire exension or abort
 *
 * Scan the runlist (mapping any unmapped fragments as needed) starting at byte
 * offset @start into the attribute described by the ntfs inode @ni and
 * finishing at byte offset @end and instantiate any sparse regions located
 * between @start and @end with real clusters.
 *
 * Any clusters that are inside the initialized size are zeroed.
 *
 * If @atomic is true the whole instantiation must be complete so abort on
 * errors.  If @atomic is false partial instantiations are acceptable (but we
 * still return an error if the instantiation is partial).  In any case we set
 * *@new_end to the end of the instantiated range.  Thus the caller has to
 * always check *@new_end.  If *@new_end is equal to @end then the whole
 * instantiation was complete.  If *@new_end is less than @end the
 * instantiation was partial.
 *
 * Note if @new_end is NULL, then @atomic is set to true as there is no way to
 * communicate to the caller that the hole instantiation was partial.
 *
 * Return 0 on success and errno on error.
 *
 * Locking: - Caller must hold @ni->lock on the inode for writing.
 *	    - The runlist @ni must be unlocked as it is taken for writing.
 */
errno_t ntfs_attr_instantiate_holes(ntfs_inode *ni, s64 start, s64 end,
		s64 *new_end, BOOL atomic)
{
#if 0
	VCN vcn, end_vcn;
	s64 allocated_size, initialized_size, compressed_size, len;
	ntfs_inode *base_ni;
	ntfs_volume *vol = ni->vol;
	ntfs_rl_element *rl;
	MFT_RECORD *base_m, *m;
	ntfs_attr_search_ctx *ctx;
	ATTR_RECORD *a;
	errno_t err, err2;
	BOOL write_locked;
	ntfs_runlist runlist;
#else
	ntfs_volume *vol = ni->vol;
	errno_t err;
#endif

	err = 0;
	/* We should never be called for non-sparse attributes. */
	if (!NInoSparse(ni))
		panic("%s(): !NInoSparse(ni)\n", __FUNCTION__);
	/* We should never be called for resident attributes. */
	if (!NInoNonResident(ni))
		panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
	/* We should only be called for $DATA attributes. */
	if (ni->type != AT_DATA)
		panic("%s(): ni->type != AT_DATA\n", __FUNCTION__);
	/* Sanity check @start and @end. */
	if (start >= end)
		panic("%s(): start >= end\n", __FUNCTION__);
	if (start & vol->cluster_size_mask || end & vol->cluster_size_mask)
		panic("%s(): start & vol->cluster_size_mask || "
				"end & vol->cluster_size_mask\n", __FUNCTION__);
	err = ENOTSUP;
	return err;
#if 0
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	if (!new_end)
		atomic = TRUE;
	lck_rw_lock_shared(&ni->rl.lock);
	write_locked = FALSE;
	/*
	 * We have to round down @start to the nearest page boundary and we
	 * have to round up @end to the nearest page boundary for the cases
	 * where the cluster size is smaller than the page size.  It makes no
	 * sense to instantiate only part of a page as a later pageout of the
	 * dirty page would cause any sparse clusters inside the page to be
	 * instantiated so we might as well do it now whilst we are
	 * instantiating things.
	 */
	vcn = (start & ~PAGE_MASK_64) >> vol->cluster_size_shift;
	end_vcn = ((end + PAGE_MASK) & ~PAGE_MASK_64) >>
			vol->cluster_size_shift;
	/* Cache the sizes for the attribute so we take the size lock once. */
	lck_spin_lock(&ni->size_lock);
	allocated_size = ni->allocated_size;
	initialized_size = ni->initialized_size;
	compressed_size = ni->compressed_size;
	lck_spin_unlock(&ni->size_lock);
	/*
	 * We have to make sure that we stay within the existing allocated
	 * size when instantiating holes as it would corrupt the attribute if
	 * we were to extend the runlist beyond the allocated size.  And our
	 * rounding up of @end above could have caused us to go above the
	 * allocated size so fix this up now.
	 */
	if (end_vcn > allocated_size >> vol->cluster_size_shift)
		end_vcn = allocated_size >> vol->cluster_size_shift;
retry_remap:
	rl = ni->rl.rl;
	if (!ni->rl.elements || vcn < rl->vcn || !rl->length) {
map_vcn:
		if (!write_locked) {
			write_locked = TRUE;
			if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) {
				lck_rw_lock_exclusive(&ni->rl.lock);
				goto retry_remap;
			}
		}
		/* Need to map the runlist fragment containing @vcn. */
		err = ntfs_map_runlist_nolock(ni, vcn, NULL);
		if (err) {
			ntfs_error(vol->mp, "Failed to map runlist fragment "
					"(error %d).", err);
			if (err == EINVAL)
				err = EIO;
			goto err;
		}
		rl = ni->rl.rl;
		if (!ni->rl.elements || vcn < rl->vcn || !rl->length)
			panic("%s(): !ni->rl.elements || vcn < rl[0].vcn || "
					"!rl->length\n", __FUNCTION__);
	}
	do {
		VCN lowest_vcn, highest_vcn, stop_vcn;
		ntfs_rl_element *rl2;
		unsigned mp_size, mp_ofs;

		/* Seek to the runlist element containing @vcn. */
		while (rl->length && vcn >= rl[1].vcn)
			rl++;
		/*
		 * Seek to the first sparse run or to the end of the region we
		 * are interested in.
		 */
		while (rl->length && rl->lcn >= 0 && vcn < end_vcn) {
			rl++;
			vcn = rl->vcn;
		}
		/*
		 * If there are no sparse runs (left) in the region of interest
		 * we are done.
		 */
		if (vcn >= end_vcn) {
			vcn = end_vcn;
			break;
		}
		/*
		 * If this run is not mapped map it now and start again as the
		 * runlist will have been updated.
		 */
		if (rl->lcn == LCN_RL_NOT_MAPPED)
			goto map_vcn;
		/* If this run is not valid abort with an error. */
		if (!rl->length || rl->lcn < LCN_HOLE) {
			ntfs_error(vol->mp, "Runlist is corrupt.  Unmount and "
					"run chkdsk.");
			NVolSetErrors(vol);
			err = EIO;
			goto err;
		}
		/*
		 * This run is sparse thus we need to instantiate it for which
		 * we need to hold the runlist lock for writing.
		 */
		if (!write_locked) {
			write_locked = TRUE;
			if (!lck_rw_lock_shared_to_exclusive(&ni->rl.lock)) {
				lck_rw_lock_exclusive(&ni->rl.lock);
				goto retry_remap;
			}
		}
		/*
		 * Make sure that we do not instantiate past @end_vcn as would
		 * otherwise happen when the hole goes past @end_vcn.
		 */
		len = rl[1].vcn - vcn;
		if (rl[1].vcn > end_vcn)
			len = end_vcn - vcn;
// TODO: HERE:
		/*
		 * If the entire run lies outside the initialized size we do
		 * not need to do anything other than instantiating the hole
		 * with real clusters.
		 *
		 * If part of the run (or the whole run) lies inside the
		 * initialized size we need to zero the clusters in memory and
		 * mark the pages dirty so they get written out later in
		 * addition to instantiating the hole with real clusters.
		 *
		 * The need for zeroing causes two potential problems.  The
		 * first problem is that if the run being instantiated is very
		 * large we could run out of memory due to us holding both the
		 * inode lock and the runlist lock for writing so all the dirty
		 * pages we create/release back to the VM cannot be paged out
		 * until we release the locks and the second problem is that if
		 * the cluster size is less than the page size we can encounter
		 * partially sparse pages and if they are not already cached by
		 * the VM we have to page them in.  But to do so we have to not
		 * hold the runlist lock for writing.  We have two ways out of
		 * this situation.  Either we have to drop and re-acquire the
		 * runlist lock around paging in such pages (with restarting
		 * everything each time because we had dropped the lock) or we
		 * have to read the non-sparse clusters in by hand using an
		 * enhanced ntfs_rl_read() or even by calling buf_meta_bread()
		 * directly.
		 *
		 * FIXME: We ignore the first problem for now until the code is
		 * working and we can test it.  The solution is probably to
		 * break the work into chunks of a fixed size and the allocate
		 * only enough clusters to complete the current chunk then
		 * merge that with the runlist, dirty all corresponding pages,
		 * then drop the locks to allow the pages to be written if
		 * needed and then take the locks again and start again with
		 * the next chunk.  This does have one nasty side effect and
		 * that is that whilst the locks are dropped a concurrent
		 * process could do nasty things to the inode including
		 * truncate our carefully allocated pages by shrinking the file
		 * so a lot of sanity checking after re-taking the locks will
		 * be needed.  Alternatively perhaps we need to hold the inode
		 * lock shared throughout this function so dropping the
		 * runlist lock would be sufficient.  We do not actually need
		 * the inode lock for writing in this function as we do not
		 * modify any of the inode sizes and the runlist lock will
		 * protect us sufficiently from everything.
		 *
		 * FIXME: We also ignore the second problem for now and abort
		 * if it bites us, again until the code is working and we can
		 * test it.
		 */
		/*
		 * Seek back to the last real LCN so we can try and extend the
		 * hole at that LCN so the instantiated clusters are at least
		 * in close proximity to the other data in the attribute.
		 */
		rl2 = rl;
		while (rl2->lcn < 0 && rl2 > ni->rl.rl)
			rl2--;
		runlist.rl = NULL;
		runlist.alloc = runlist.elements = 0;
		err = ntfs_cluster_alloc(vol, vcn, len,
				(rl2->lcn >= 0) ? rl2->lcn + rl2->length : -1,
				DATA_ZONE, FALSE, &runlist);
		if (err) {
			if (err != ENOSPC)
				ntfs_error(vol->mp, "Failed to allocate "
						"clusters (error %d).", err);
			goto err;
		}
// TODO: HERE:
		/*
		 * If the instantiated hole starts before the initialized size
		 * we need to zero it.
		 *
		 * FIXME: For now we do it in the most stupid way possible and
		 * that is to synchronously write zeroes to disk via the device
		 * hosting the volume.  That way we get around our issues and
		 * problems with the UBC and small/large cluster sizes.  This
		 * way if there is dirty data in the UBC it will still get
		 * written on top of the zeroing we are now doing.  Ordering is
		 * guaranteed as no-one knows about the allocated clusters yet
		 * as we have not merged the runlists yet.
		 *
		 * FIXME: TODO: It may be worth restricting ntfs_rl_set() to
		 * only operate up to the initialized size as it could
		 * otherwise do a lot of unneeded extra work.
		 */
		if (vcn << vol->cluster_size_shift < initialized_size) {
			ntfs_debug("Zeroing instantiated hole inside the "
					"initialized size.");
			if (!runlist.elements || !runlist.alloc)
				panic("%s(): !runlist.elements || "
						"!runlist.alloc\n",
						__FUNCTION__);
			err = ntfs_rl_set(vol, runlist.rl, 0);
			if (err) {
				ntfs_error(vol->mp, "Failed to zero newly "
						"allocated space (error %d).",
						err);
				goto undo_alloc;
			}
		}
		err = ntfs_rl_merge(&ni->rl, &runlist);
		if (err) {
			ntfs_error(vol->mp, "Failed to merge runlists (error "
					"%d).", err);
			goto undo_alloc;
		}
		/*
		 * The runlist may have been reallocated so @rl needs to be
		 * reset back to the beginning.
		 */
		rl = ni->rl.rl;
		/*
		 * Need to update the mapping pairs array of the attribute.  We
		 * cannot postpone this till the end (which would be much more
		 * efficient) because we could run out of space on the volume
		 * when trying to update the mapping pairs array and then we
		 * would not be able to roll back to the previous state because
		 * we would not know which bits of the runlist are new and
		 * which are old.  Doing it now means that if we get an error
		 * we still know the starting and ending VCNs of the run we
		 * instantiated so we can punch the clusters out again thus
		 * restoring the original hole.
		 */
		err = ntfs_mft_record_map(base_ni, &base_m);
		if (err) {
			ntfs_error(vol->mp, "Failed to map mft_no 0x%llx "
					"(error %d).",
					(unsigned long long)base_ni->mft_no,
					err);
			goto undo_merge;
		}
		ctx = ntfs_attr_search_ctx_get(base_ni, base_m);
		if (!ctx) {
			ntfs_error(vol->mp, "Failed to allocate attribute "
					"search context.");
			err = ENOMEM;
			goto unm_err;
		}
		/*
		 * Get the base attribute record so we can update the
		 * compressed size or so we can switch the attribute to not be
		 * sparse any more if we just filled the last hole.
		 */
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0,
				NULL, 0, ctx);
		if (err) {
			ntfs_error(vol->mp, "Failed to lookup base attribute "
					"extent in mft_no 0x%llx (error %d).",
					(unsigned long long)base_ni->mft_no,
					err);
			goto put_err;
		}
		m = ctx->m;
		a = ctx->a;
		/*
		 * We added @len clusters thus the compressed size grows by
		 * that many clusters whilst the allocated size does not change
		 * as we have not extended the attribute.
		 */
		compressed_size += len << vol->cluster_size_shift;
		/*
		 * Determine whether the attribute is still sparse by comparing
		 * the new compressed size to the allocated size.  If the two
		 * have now become the same the attribute is no longer sparse.
		 */
		if (compressed_size >= allocated_size) {
			if (compressed_size != allocated_size)
				panic("%s(): compressed_size != "
						"allocated_size\n",
						__FUNCTION__);
			/* Switch the attribute to not be sparse any more. */
			ntfs_attr_sparse_clear(base_ni, ni, ctx);
		}
		/*
		 * If the attribute is (still) sparse or compressed, need to
		 * update the compressed size.
		 */
		if (NInoSparse(ni) || NInoCompressed(ni)) {
			lck_spin_lock(&ni->size_lock);
			ni->compressed_size = compressed_size;
			a->compressed_size = cpu_to_sle64(compressed_size);
			lck_spin_unlock(&ni->size_lock);
		}
		/*
		 * If this is the unnamed $DATA attribute also need to update
		 * the sizes in the directory entries pointing to this inode.
		 */
		if (ni == base_ni)
			NInoSetDirtySizes(ni);
		/*
		 * If the VCN we started allocating at is not in the base
		 * attribute record get the attribute record containing it so
		 * we can update the mapping pairs array.
		 */
		if (vcn > sle64_to_cpu(a->highest_vcn)) {
			/* Ensure the modified mft record is written out. */
			NInoSetMrecNeedsDirtying(ctx->ni);
			err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
					vcn, NULL, 0, ctx);
			if (err) {
				ntfs_error(vol->mp, "Failed to lookup "
						"attribute extent in mft_no "
						"0x%llx (error %d).",
						(unsigned long long)
						base_ni->mft_no, err);
				a = NULL;
				goto undo_sparse;
			}
			a = ctx->a;
		}
		/*
		 * Get the size for the new mapping pairs array for this
		 * attribute extent.
		 */
		lowest_vcn = sle64_to_cpu(a->lowest_vcn);
		/*
		 * Get the runlist element containing the lowest vcn.
		 *
		 * This cannot fail as we know the runlist is ok and the
		 * runlist fragment containing the lowest vcn is mapped.
		 */
		rl2 = ntfs_rl_find_vcn_nolock(rl, lowest_vcn);
		if (!rl2)
			panic("%s(): Memory corruption detected.\n",
					__FUNCTION__);
		err = ntfs_get_size_for_mapping_pairs(vol, rl2, lowest_vcn,
				highest_vcn, &mp_size);
		if (err) {
			ntfs_error(vol->mp, "Failed to get size for mapping "
					"pairs array (error %d).", err);
			goto undo_sparse;
		}
		mp_ofs = le16_to_cpu(a->mapping_pairs_offset);
retry_attr_rec_resize:
		/*
		 * Extend the attribute record to fit the bigger mapping pairs
		 * array.
		 */
		err = ntfs_attr_record_resize(m, a, mp_size + mp_ofs);
		if (!err)
			goto build_mpa;
		if (err != ENOSPC)
			panic("%s(): err != ENOSPC\n", __FUNCTION__);
		/*
		 * There is not enough space in the mft record.
		 *
		 * We need to add an attribute list attribute if it is not
		 * already present.
		 */
		if (!NInoAttrList(base_ni)) {
			err = ntfs_attr_list_add(base_ni, base_m, ctx);
			if (err || ctx->is_error) {
				if (!err)
					err = ctx->error;
				ntfs_error(vol->mp, "Failed to %s mft_no "
						"0x%llx (error %d).",
						ctx->is_error ?
						"remap extent mft record of" :
						"add attribute list attribute "
						"to", (unsigned long long)
						base_ni->mft_no, err);
				goto undo1;
			}
			/*
			 * The attribute location will have changed so update
			 * it from the search context.
			 */
			m = ctx->m;
			a = ctx->a;
			/*
			 * Retry the original attribute record resize as we may
			 * now have enough space to create the needed mapping 
			 * pairs array in the moved attribute record.
			 *
			 * This can for example happen when the attribute was
			 * moved out to an extent mft record which has much
			 * more free space than the base mft record had.
			 */
			goto retry_attr_rec_resize;
		}
		/*
		 * If this is not the only attribute record in the mft record
		 * then move it out to a new extent mft record which will allow
		 * the attribute record to grow larger thus reducing the total
		 * number of extent attribute records needed to a minimum.
		 */
		if (!ntfs_attr_record_is_only_one(m, a)) {
			lck_rw_lock_shared(&base_ni->attr_list_rl.lock);
			err = ntfs_attr_record_move(ctx);
			lck_rw_unlock_shared(&base_ni->attr_list_rl.lock);
			if (err) {
				ntfs_error(vol->mp, "Failed to move attribute "
						"extent from mft record "
						"0x%llx to an extent mft "
						"record (error %d).",
						(unsigned long long)
						ctx->ni->mft_no, err);
				/*
				 * We could try to remove the attribute list
				 * attribute if we added it above but this
				 * would probably require attributes to be
				 * moved back into the base mft record from
				 * extent mft records so is a lot of work and
				 * given we are in an error code path and given
				 * that it is ok to just leave the inode with
				 * an attribute list attribute we do not bother
				 * and just bail out.
				 */
				goto undo1;
			}
			/*
			 * The attribute location will have changed so update
			 * it from the search context.
			 */
			m = ctx->m;
			a = ctx->a;
			/*
			 * Retry the original attribute record resize as we may
			 * now have enough space to create the mapping pairs
			 * array in the moved attribute record.
			 */
			goto retry_attr_rec_resize;
		}
		max_size = (le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use)) & ~7;
		max_size += le32_to_cpu(a->length) - mp_ofs;
		err = ntfs_attr_record_resize(m, a, max_size + mp_ofs);
		/*
		 * We worked out the exact size we can extend to so the resize
		 * cannot fail.
		 */
		if (err)
			panic("%s(): err (ntfs_attr_record_resize())\n",
					__FUNCTION__);
build_mpa:
// TODO: HERE...
		mp_rebuilt = TRUE;
		/*
		 * Generate the mapping pairs array directly into the attribute
		 * record.
		 *
		 * This cannot fail as we have already checked the size we need
		 * to build the mapping pairs array.
		 */
		err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs,
				le32_to_cpu(a->length) - mp_ofs, rl2,
				lowest_vcn, highest_vcn, &stop_vcn);
		if (err && err != ENOSPC) {
			ntfs_error(vol->mp, "Cannot fill hole of mft_no "
					"0x%llx, attribute type 0x%x, because "
					"building the mapping pairs array "
					"failed (error %d).",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
			err = EIO;
			/*
			 * Need to set @a->highest_vcn to enable correct error
			 * recovery.
			 */
// TODO: HERE...
			if (!is_first)
				a->highest_vcn = cpu_to_sle64(sle64_to_cpu(
						a->lowest_vcn) - 1);
			goto undo;
		}
		/* Update the highest_vcn. */
		a->highest_vcn = cpu_to_sle64(stop_vcn - 1);
		/* Ensure the modified mft record is written out. */
		NInoSetMrecNeedsDirtying(ctx->ni);
		/*
		 * If the mapping pairs build succeeded, i.e. the current
		 * attribute extent contains the whole runlist fragment, we are
		 * done and can proceed to the next run.
		 */
		if (!err)
			goto next_run;
		/*
		 * Partial mapping pairs update.  This means we need to create
		 * one or me new attribute extents to hold the remainder of the
		 * mapping pairs.
		 *
		 * Get the size of the remaining mapping pairs array.
		 */
		rl2 = ntfs_rl_find_vcn_nolock(rl2, stop_vcn);
		if (!rl2)
			panic("%s(): !rl2 (stop_vcn)\n", __FUNCTION__);
		if (!rl2->length)
			panic("%s(): !rl2->length (stop_vcn)\n", __FUNCTION__);
		if (rl2->lcn < LCN_HOLE)
			panic("%s(): rl2->lcn < LCN_HOLE (stop_vcn)\n",
					__FUNCTION__);
		err = ntfs_get_size_for_mapping_pairs(vol, rl2, stop_vcn,
				highest_vcn, &mp_size);
		if (err) {
			ntfs_error(vol->mp, "Cannot complete filling of hole "
					"of mft_no 0x%llx, attribute type "
					"0x%x, because determining the size "
					"for the mapping pairs failed (error "
					"%d).", (unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
			err = EIO;
// TODO: HERE...
			goto undo;
		}
		/* We only release extent mft records. */
		if (ctx->ni != base_ni)
			ntfs_extent_mft_record_unmap(ctx->ni);
// TODO: I AM HERE...  Need to allocate an extent mft record, add an extent
// attribute record to it filling it with remaining mapping pairs array fragment
// and creating an attribute list attribute entry for it.  Then if still not
// reached highest_vcn, need to repeat the process again.
next_run:
		ntfs_attr_search_ctx_put(ctx);
		ntfs_mft_record_unmap(base_ni);
		/*
		 * If the attribute is no longer sparse there are no more holes
		 * to instantiate thus we are done with the whole region of
		 * interest.
		 */
		if (!NInoSparse(ni)) {
			vcn = end_vcn;
			break;
		}
		/*
		 * We allocated @len clusters starting at @vcn.  Thus the next
		 * VCN we need to look at is at @vcn + @len.
		 */
		vcn += len;
	} while (vcn < end_vcn);
	if (vcn > end_vcn)
		panic("%s(): vcn > end_vcn\n", __FUNCTION__);
	ntfs_debug("Done, new_end 0x%llx.",
			(unsigned long long)vcn << vol->cluster_size_shift);
err:
	if (new_end)
		*new_end = vcn << vol->cluster_size_shift;
	if (write_locked)
		lck_rw_unlock_exclusive(&ni->rl.lock);
	else
		lck_rw_unlock_shared(&ni->rl.lock);
	return err;
undo_alloc:
	err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, -1, NULL);
	if (err2) {
		ntfs_error(vol->mp, "Failed to release allocated cluster(s) "
				"in error code path (error %d).  Run chkdsk "
				"to recover the lost space.", err2);
		NVolSetErrors(vol);
	}
	OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag);
	goto err;
undo_sparse:
	/*
	 * If looking up an attribute extent failed or we are not in the base
	 * attribute record need to look up the base attribute record.
	 */
	if (!a || a->lowest_vcn) {
		ntfs_attr_search_ctx_reinit(ctx);
		err2 = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0,
				NULL, 0, ctx);
		if (err2) {
			ntfs_error(vol->mp, "Failed to re-lookup base "
					"attribute record in error code path "
					"(error %d).  Leaving inconsistent "
					"metadata.  Unmount and run chkdsk.",
					err2);
			NVolSetErrors(vol);
			goto put_err;
		}
		a = ctx->a;
	}
	/*
	 * If we caused the attribute to no longer be sparse we need to make it
	 * sparse again.
	 */
	if (!NInoSparse(ni)) {
		err2 = ntfs_attr_sparse_set(base_ni, ni, ctx);
		if (err2) {
			ntfs_error(vol->mp, "Failed to re-set the attribute "
					"to be sparse in error code path "
					"(error %d).  Leaving inconsistent "
					"metadata.  Unmount and run chkdsk.",
					err2);
			NVolSetErrors(vol);
			goto put_err;
		}
		/*
		 * The attribute may have been moved to make space for the
		 * compressed size so @a is now invalid.
		 */
		a = ctx->a;
	}
	/* Restore the compressed size to the old value. */
	compressed_size -= len << vol->cluster_size_shift;
	lck_spin_lock(&ni->size_lock);
	ni->compressed_size = compressed_size;
	a->compressed_size = cpu_to_sle64(compressed_size);
	lck_spin_unlock(&ni->size_lock);
	/* Ensure the modified mft record is written out. */
	NInoSetMrecNeedsDirtying(ctx->ni);
	if (ni == base_ni)
		NInoSetDirtySizes(ni);
put_err:
	ntfs_attr_search_ctx_put(ctx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
undo_merge:
	/* Free the clusters we allocated. */
	err2 = ntfs_cluster_free_from_rl(vol, rl, vcn, len, NULL);
	if (err2) {
		ntfs_error(vol->mp, "Failed to release allocated cluster(s) "
				"in error code path (error %d).  Unmount and "
				"run chkdsk to recover the lost space.", err2);
		NVolSetErrors(vol);
	}
	/* Punch the original hole back into the runlist. */
	err2 = ntfs_rl_punch_nolock(vol, &ni->rl, vcn, len);
	if (err2) {
		ntfs_error(vol->mp, "Failed to restore hole in error code "
				"path in error code path (error %d).  Leaving "
				"inconsistent metadata.  Unmount and run "
				"chkdsk.", err2);
		NVolSetErrors(vol);
	}
	goto err;
undo1:
	panic("%s(): TODO\n", __FUNCTION__);
	return err;
#endif
}

/**
 * ntfs_attr_extend_allocation - extend the allocated space of an attribute
 * @ni:			ntfs inode of the attribute whose allocation to extend
 * @new_alloc_size:	new size in bytes to which to extend the allocation to
 * @new_data_size:	new size in bytes to which to extend the data to
 * @data_start:		beginning of region which is required to be non-sparse
 * @ictx:		index context
 * @dst_alloc_size:	if not NULL, this pointer is set to the allocated size
 * @atomic:		if true must complete the entire exension or abort
 *
 * Extend the allocated space of an attribute described by the ntfs inode @ni
 * to @new_alloc_size bytes.  If @data_start is -1, the whole extension may be
 * implemented as a hole in the file (as long as both the volume and the ntfs
 * inode @ni have sparse support enabled).  If @data_start is >= 0, then the
 * region between the old allocated size and @data_start - 1 may be made sparse
 * but the regions between @data_start and @new_alloc_size must be backed by
 * actual clusters.
 *
 * If @new_data_size is -1, it is ignored.  If it is >= 0, then the data size
 * of the attribute is extended to @new_data_size and the UBC size of the VFS
 * vnode is updated to match.
 * WARNING: It is a bug for @new_data_size to be smaller than the old data size
 * as well as for @new_data_size to be greater than @new_alloc_size.
 *
 * If @ictx is not NULL, the extension is for an index allocation or bitmap
 * attribute extension.  In this case, if there is not enough space in the mft
 * record for the extended index allocation/bitmap attribute, the index root is
 * moved to an index block if it is not empty to create more space in the mft
 * record.  NOTE: At present @ictx is only set when the attribute being resized
 * is non-resident.
 *
 * If @atomic is true only return success if the entire extension is complete.
 * If only a partial extension is possible abort with an appropriate error.  If
 * @atomic is false partial extensions are acceptable in certain circumstances
 * (see below).
 *
 * For resident attributes extending the allocation involves resizing the
 * attribute record and if necessary moving it and/or other attributes into
 * extent mft records and/or converting the attribute to a non-resident
 * attribute which in turn involves extending the allocation of a non-resident
 * attribute as described below.
 *
 * For non-resident attributes this involves allocating clusters in the data
 * zone on the volume (except for regions that are being made sparse) and
 * extending the run list to describe the allocated clusters as well as
 * updating the mapping pairs array of the attribute.  This in turn involves
 * resizing the attribute record and if necessary moving it and/or other
 * attributes into extent mft records and/or splitting the attribute record
 * into multiple extent attribute records.
 *
 * Also, the attribute list attribute is updated if present and in some of the
 * above cases (the ones where extent mft records/attributes come into play),
 * an attribute list attribute is created if not already present.
 *
 * Return 0 on success and errno on error.
 *
 * In the case that an error is encountered but a partial extension at least up
 * to @data_start (if present) is possible, the allocation is partially
 * extended and success is returned.  If @data_start is -1 then partial
 * allocations are not performed.
 *
 * If @dst_alloc_size is not NULL, then *@dst_alloc_size is set to the new
 * allocated size when the ntfs_attr_extend_allocation() returns success.  If
 * an error is returned *@dst_alloc_size is undefined.  This is useful so that
 * the caller has a simple way of checking whether or not the allocation was
 * partial.
 *
 * Thus if @data_start is not -1 the caller should supply @dst_alloc_size and
 * then compare *@dst_alloc_size to @new_alloc_size to determine if the
 * allocation was partial.  And if @data_start is -1 there is no point in
 * supplying @dst_alloc_size as *@dst_alloc_size will always be equal to
 * @new_alloc_size.
 *
 * Locking: - Caller must hold @ni->lock on the inode for writing.
 *	    - The runlist @ni must be unlocked as it is taken for writing.
 */
errno_t ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
		const s64 new_data_size, const s64 data_start,
		ntfs_index_context *ictx, s64 *dst_alloc_size,
		const BOOL atomic)
{
	VCN vcn, lowest_vcn, stop_vcn;
	s64 start, ll, old_alloc_size, alloc_size, alloc_start, alloc_end;
	s64 nr_allocated, nr_freed;
	ntfs_volume *vol = ni->vol;
	ntfs_inode *base_ni;
	MFT_RECORD *base_m, *m;
	ATTR_RECORD *a;
	ntfs_attr_search_ctx *actx;
	ntfs_rl_element *rl;
	unsigned attr_len, arec_size, name_size, mp_size, mp_ofs, max_size;
	unsigned al_entry_len, new_al_alloc;
	errno_t err, err2;
	BOOL is_sparse, is_first, mp_rebuilt, al_entry_added;
	ntfs_runlist runlist;

	start = data_start;
#ifdef DEBUG
	lck_spin_lock(&ni->size_lock);
	old_alloc_size = ni->allocated_size;
	lck_spin_unlock(&ni->size_lock);
	ntfs_debug("Entering for mft_no 0x%llx, attribute type 0x%x, "
			"old_allocated_size 0x%llx, "
			"new_allocated_size 0x%llx, new_data_size 0x%llx, "
			"data_start 0x%llx.", (unsigned long long)ni->mft_no,
			(unsigned)le32_to_cpu(ni->type),
			(unsigned long long)old_alloc_size,
			(unsigned long long)new_alloc_size,
			(unsigned long long)new_data_size,
			(unsigned long long)start);
#endif
	/* This cannot be called for the attribute list attribute. */
	if (ni->type == AT_ATTRIBUTE_LIST)
		panic("%s(): ni->type == AT_ATTRIBUTE_LIST\n", __FUNCTION__);
	name_size = ni->name_len * sizeof(ntfschar);
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	is_first = TRUE;
retry_extend:
	/*
	 * For non-resident attributes, @start and @new_size need to be aligned
	 * to cluster boundaries for allocation purposes.
	 */
	if (NInoNonResident(ni)) {
		if (start > 0)
			start &= ~(s64)vol->cluster_size_mask;
		new_alloc_size = (new_alloc_size + vol->cluster_size - 1) &
				~(s64)vol->cluster_size_mask;
	}
	if (new_data_size >= 0 && new_data_size > new_alloc_size)
		panic("%s(): new_data_size >= 0 && new_data_size > "
				"new_alloc_size\n", __FUNCTION__);
	/* Check if new size is allowed in $AttrDef. */
	err = ntfs_attr_size_bounds_check(vol, ni->type, new_alloc_size);
	if (err) {
		/* Only emit errors when the write will fail completely. */
		lck_spin_lock(&ni->size_lock);
		old_alloc_size = ni->allocated_size;
		lck_spin_unlock(&ni->size_lock);
		if (start < 0 || start >= old_alloc_size) {
			if (err == ERANGE) {
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because the new "
						"allocation would exceed the "
						"maximum allowed size for "
						"this attribute type.",
						(unsigned long long)ni->mft_no,
						(unsigned)
						le32_to_cpu(ni->type));
			} else {
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because this "
						"attribute type is not "
						"defined on the NTFS volume.  "
						"Possible corruption!  You "
						"should run chkdsk!",
						(unsigned long long)ni->mft_no,
						(unsigned)
						le32_to_cpu(ni->type));
			}
		}
		/* Translate error code to be POSIX conformant for write(2). */
		if (err == ERANGE)
			err = EFBIG;
		else
			err = EIO;
		return err;
	}
	/*
	 * We will be modifying both the runlist (if non-resident) and the mft
	 * record so lock them both down.
	 */
	lck_rw_lock_exclusive(&ni->rl.lock);
	err = ntfs_mft_record_map(base_ni, &base_m);
	if (err) {
		base_m = NULL;
		actx = NULL;
		goto err_out;
	}
	actx = ntfs_attr_search_ctx_get(base_ni, base_m);
	if (!actx) {
		err = ENOMEM;
		goto err_out;
	}
	lck_spin_lock(&ni->size_lock);
	alloc_size = ni->allocated_size;
	lck_spin_unlock(&ni->size_lock);
	/*
	 * If non-resident, seek to the last extent.  If resident, there is
	 * only one extent, so seek to that.
	 */
	vcn = (NInoNonResident(ni) && alloc_size > 0) ?
			(alloc_size - 1) >> vol->cluster_size_shift : 0;
	/*
	 * Abort if someone did the work whilst we waited for the locks.  If we
	 * just converted the attribute from resident to non-resident it is
	 * likely that exactly this has happened already.  We cannot quite
	 * abort if we need to update the data size.
	 */
	if (new_alloc_size <= alloc_size) {
		ntfs_debug("Allocated size already exceeds requested size.");
		new_alloc_size = alloc_size;
		if (new_data_size < 0)
			goto done;
		/*
		 * We want the first attribute extent so that we can update the
		 * data size.
		 */
		vcn = 0;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, vcn, NULL, 0,
			actx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto err_out;
	}
	m = actx->m;
	a = actx->a;
	/* Use goto to reduce indentation. */
	if (a->non_resident)
		goto do_non_resident_extend;
	if (NInoNonResident(ni))
		panic("%s(): NInoNonResident(ni)\n", __FUNCTION__);
	/*
	 * As things are now this function should never be called with an index
	 * context for the resize of a resident attribute.
	 */
	if (ictx)
		panic("%s(): ictx\n", __FUNCTION__);
	/* The total length of the attribute value. */
	attr_len = le32_to_cpu(a->value_length);
	/*
	 * Extend the attribute record to be able to store the new attribute
	 * size.  ntfs_attr_record_resize() will not do anything if the size is
	 * not changing.
	 */
	arec_size = (le16_to_cpu(a->value_offset) + new_alloc_size + 7) & ~7;
	if (arec_size < le32_to_cpu(m->bytes_allocated) -
			le32_to_cpu(m->bytes_in_use) &&
			!ntfs_attr_record_resize(m, a, arec_size)) {
		/* The resize succeeded! */
		if (new_data_size > attr_len) {
			if (!ubc_setsize(ni->vn, new_data_size)) {
				ntfs_error(vol->mp, "Failed to set size in "
						"UBC.");
				/*
				 * This cannot fail as it is a shrinking
				 * resize.
				 */
				lck_spin_lock(&ni->size_lock);
				err = ntfs_attr_record_resize(m, a,
						le16_to_cpu(a->value_offset) +
						ni->allocated_size);
				lck_spin_unlock(&ni->size_lock);
				if (err)
					panic("%s(): Failed to shrink "
							"resident attribute "
							"record (error %d)\n",
							__FUNCTION__, err);
				err = EIO;
				goto err_out;
			}
			/* Zero the extended attribute value. */
			bzero((u8*)a + le16_to_cpu(a->value_offset) + attr_len,
					(u32)new_data_size - attr_len);
			lck_spin_lock(&ni->size_lock);
			ni->initialized_size = ni->data_size = new_data_size;
			a->value_length = cpu_to_le32((u32)new_data_size);
		} else
			lck_spin_lock(&ni->size_lock);
		ni->allocated_size = le32_to_cpu(a->length) -
				le16_to_cpu(a->value_offset);
		lck_spin_unlock(&ni->size_lock);
		if (new_data_size > attr_len)
			a->value_length = cpu_to_le32((u32)new_data_size);
		goto dirty_done;
	}
	/*
	 * We have to drop all the locks so we can call
	 * ntfs_attr_make_non_resident().
	 */
	ntfs_attr_search_ctx_put(actx);
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	/*
	 * Not enough space in the mft record, try to make the attribute
	 * non-resident and if successful restart the extension process.
	 */
	err = ntfs_attr_make_non_resident(ni);
	if (!err)
		goto retry_extend;
	/*
	 * Could not make non-resident.  If this is due to this not being
	 * permitted for this attribute type try to make other attributes
	 * non-resident and/or move this or other attributes out of the mft
	 * record this attribute is in.  Otherwise fail.
	 */
	if (err != EPERM) {
		if (err != ENOSPC) {
			/*
			 * Only emit errors when the write will fail
			 * completely.
			 */
			lck_spin_lock(&ni->size_lock);
			old_alloc_size = ni->allocated_size;
			lck_spin_unlock(&ni->size_lock);
			if (start < 0 || start >= old_alloc_size)
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because the "
						"conversion from resident to "
						"non-resident attribute "
						"failed (error %d).",
						(unsigned long long)ni->mft_no,
						(unsigned)le32_to_cpu(ni->type),
						err);
			if (err != ENOMEM) {
				NVolSetErrors(vol);
				err = EIO;
			}
		}
		goto conv_err_out;
	}
	/*
	 * To make space in the mft record we would like to try to make other
	 * attributes non-resident if that would save space.
	 *
	 * FIXME: We cannot do this at present unless the attribute is the
	 * attribute being resized as there could be an ntfs inode matching
	 * this attribute in memory and it would become out of date with its
	 * metadata if we touch its attribute record.
	 *
	 * FIXME: We do not need to do this if this is the attribute being
	 * resized as we already tried to make the attribute non-resident and
	 * it did not work or we would never have gotten here in the first
	 * place.
	 *
	 * Thus we have to either move other attributes to extent mft records
	 * thus making more space in the base mft record or we have to move the
	 * attribute being resized to an extent mft record thus giving it more
	 * space.  In any case we need to have an attribute list attribute so
	 * start by adding it if it does not yet exist.
	 *
	 * Before we start, we can check whether it is possible to fit the
	 * attribute to be resized inside an mft record.  If not then there is
	 * no point in proceeding.
	 *
	 * This should never really happen as the attribute size should never
	 * be allowed to grow so much and such requests should never be made by
	 * the driver and if they are they should be caught by the call to
	 * ntfs_attr_size_bounds_check().
	 */
	if (arec_size > vol->mft_record_size - sizeof(MFT_RECORD)) {
		/* Only emit errors when the write will fail completely. */
		lck_spin_lock(&ni->size_lock);
		old_alloc_size = ni->allocated_size;
		lck_spin_unlock(&ni->size_lock);
		if (start < 0 || start >= old_alloc_size)
			ntfs_error(vol->mp, "Cannot extend allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because the attribute may not be "
					"non-resident and the requested size "
					"exceeds the maximum possible "
					"resident attribute record size.",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type));
		/* Use POSIX conformant write(2) error code. */
		err = EFBIG;
		goto conv_err_out;
	}
	/*
	 * The resident attribute can fit in an mft record.  Now have to decide
	 * whether to make other attributes non-resident/move other attributes
	 * out of the mft record or whether to move the attribute record to be
	 * resized out to a new mft record.
	 *
	 * TODO: We never call ntfs_attr_extend_allocation() for attributes
	 * that cannot be non-resident thus we never get here thus we simply
	 * panic() here to remind us that we need to implement this code if we
	 * ever start calling this function for attributes that must remain
	 * resident.
	 */
	panic("%s(): Attribute may not be non-resident.\n", __FUNCTION__);
do_non_resident_extend:
	if (!NInoNonResident(ni))
		panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
	if (new_alloc_size == alloc_size) {
		if (vcn)
			panic("%s(): vcn\n", __FUNCTION__);
		goto alloc_done;
	}
	/*
	 * We are going to allocate starting at the old allocated size and are
	 * going to allocate up to the new allocated size.
	 */
	alloc_start = alloc_size;
	rl = NULL;
	if (ni->rl.elements) {
		/* Seek to the end of the runlist. */
		rl = &ni->rl.rl[ni->rl.elements - 1];
	}
	/*
	 * Cache the lowest VCN for later.  Need to do it here to silence
	 * compiler warning about possible use of uninitialiezd variable.
	 */
	lowest_vcn = sle64_to_cpu(a->lowest_vcn);
	/* If this attribute extent is not mapped, map it now. */
	if (alloc_size > 0 && (!ni->rl.elements ||
			rl->lcn == LCN_RL_NOT_MAPPED ||
			(rl->lcn == LCN_ENOENT && rl > ni->rl.rl &&
			(rl-1)->lcn == LCN_RL_NOT_MAPPED))) {
		err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
		if (err || !ni->rl.elements) {
			if (!err)
				err = EIO;
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because the "
						"mapping of a runlist "
						"fragment failed (error %d).",
						(unsigned long long)ni->mft_no,
						(unsigned)le32_to_cpu(ni->type),
						err);
			if (err != ENOMEM)
				err = EIO;
			goto err_out;
		}
		/* Seek to the end of the runlist. */
		rl = &ni->rl.rl[ni->rl.elements - 1];
	}
	/*
	 * We now know the runlist of the last extent is mapped and @rl is at
	 * the end of the runlist.  We want to begin extending the runlist.
	 *
	 * If the data starts after the end of the old allocation or no data
	 * start is specified (@start < 0), this is a $DATA attribute and
	 * sparse attributes are enabled on the volume and for this inode, then
	 * create a sparse region between the old allocated size and the start
	 * of the data or the new allocated size if no data start is specified.
	 * Otherwise proceed with filling the whole space between the old
	 * allocated size and the new allocated size with clusters.
	 */
	if ((start >= 0 && start <= alloc_size) || ni->type != AT_DATA ||
			!NVolSparseEnabled(vol) || NInoSparseDisabled(ni)) {
		is_sparse = FALSE;
		goto skip_sparse;
	}
	/*
	 * If @start is less than zero we create the sparse region from the old
	 * allocated size to the new allocated size.  Otherwise we end the
	 * sparse region at @start and fill with real clusters between @start
	 * and the new allocated size.
	 */
	alloc_end = start;
	if (start < 0)
		alloc_end = new_alloc_size;
	ntfs_debug("Adding hole starting at byte offset 0x%llx and finishing "
			"at byte offset 0x%llx.",
			(unsigned long long)alloc_start,
			(unsigned long long)alloc_end);
	/*
	 * Allocate more memory if needed.  We ensure there is space at least
	 * for two new elements as this is what needs to happen when this is
	 * the very first allocation, i.e. the file has zero clusters allocated
	 * at the moment.
	 */
	if ((ni->rl.elements + 2) * sizeof(*rl) > ni->rl.alloc) {
		ntfs_rl_element *rl2;

		rl2 = OSMalloc(ni->rl.alloc + NTFS_ALLOC_BLOCK,
				ntfs_malloc_tag);
		if (!rl2) {
			err = ENOMEM;
			goto err_out;
		}
		if (ni->rl.elements) {
			memcpy(rl2, ni->rl.rl, ni->rl.elements * sizeof(*rl2));
			/* Seek to the end of the runlist. */
			rl = &rl2[ni->rl.elements - 1];
		}
		if (ni->rl.alloc)
			OSFree(ni->rl.rl, ni->rl.alloc, ntfs_malloc_tag);
		ni->rl.rl = rl2;
		ni->rl.alloc += NTFS_ALLOC_BLOCK;
	}
	if (ni->rl.elements) {
		/* Sanity check that this is the end element. */
		if (rl->length || rl->lcn >= LCN_HOLE)
			panic("%s(): rl->length || rl->lcn >= LCN_HOLE)\n",
					__FUNCTION__);
	} else /* if (!ni->rl.elements) */ {
		/*
		 * The runlist is empty thus we are now creating both the
		 * sparse element and the end element.  Thus need to set
		 * everything up so we end up with two new elements rather than
		 * one.
		 *
		 * Note we do not need to set up @rl->lcn and @rl->length as
		 * they are both unconditionally overwritten below.
		 */
		if (alloc_size > 0)
			panic("%s(): alloc_size > 0\n", __FUNCTION__);
		rl = ni->rl.rl;
		rl->vcn = 0;
		ni->rl.elements = 1;
	}
	/*
	 * If a last real element exists and it is sparse, need to extend it
	 * instead of adding a new hole.
	 *
	 * Replace the terminator element with a sparse element and add a new
	 * terminator.  We know this is the end of the attribute thus we can
	 * use LCN_ENOENT even if the old terminator was LCN_RL_NOT_MAPPED.
	 */
	if (rl->vcn != alloc_start >> vol->cluster_size_shift)
		panic("%s(): rl->vcn != alloc_start >> "
				"vol->cluster_size_shift\n", __FUNCTION__);
	if (ni->rl.elements > 1 && (rl - 1)->lcn == LCN_HOLE)
		rl--;
	else {
		rl->lcn = LCN_HOLE;
		rl[1].length = 0;
		ni->rl.elements++;
	}
	rl[1].vcn = alloc_end >> vol->cluster_size_shift;
	if (rl[1].vcn <= rl->vcn)
		panic("%s(): rl[1].vcn <= rl->vcn\n", __FUNCTION__);
	rl->length = rl[1].vcn - rl->vcn;
	rl[1].lcn = LCN_ENOENT;
	is_sparse = TRUE;
	/*
	 * If the entire extension is sparse skip the allocation of real
	 * clusters and proceed to updating the mapping pairs array.
	 */
	if (start < 0) {
		nr_allocated = 0;
		goto skip_real_alloc;
	}
	/*
	 * We allocated part of the extension as a hole, now we are going to
	 * allocate the remainder of the extension with real clusters.
	 */
	alloc_start = start;
skip_sparse:
	/*
	 * We want to begin allocating clusters starting at the last allocated
	 * cluster to reduce fragmentation.  If there are no valid LCNs in the
	 * attribute we let the cluster allocator choose the starting cluster.
	 *
	 * If the last LCN is a hole or similar seek back to last real LCN.
	 */
	if (ni->rl.elements) {
		while (rl->lcn < 0 && rl > ni->rl.rl)
			rl--;
	}
	// FIXME: Need to implement partial allocations so at least part of the
	// write can be performed when @start >= 0 (and hence @data_start >= 0).
	// This is needed for POSIX write(2) conformance.  But do not allow
	// partial allocations for non-DATA attributes as partial metadata is
	// no use.  The @start >= 0 check may be sufficient to exclude non-data
	// attributes...
	// FIXME: When we implement partial allocations we need to only allow
	// them to happen when @atomic is false.
	runlist.rl = NULL;
	runlist.alloc = runlist.elements = 0;
	nr_allocated = (new_alloc_size - alloc_start) >>
			vol->cluster_size_shift;
	err = ntfs_cluster_alloc(vol, alloc_start >> vol->cluster_size_shift,
			nr_allocated, (ni->rl.elements && (rl->lcn >= 0)) ?
			rl->lcn + rl->length : -1, DATA_ZONE, TRUE, &runlist);
	if (err) {
		if (start < 0 || start >= alloc_size)
			ntfs_error(vol->mp, "Cannot extend allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because the allocation of clusters "
					"failed (error %d).",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
		if (err != ENOMEM && err != ENOSPC)
			err = EIO;
		nr_allocated = 0;
		goto trunc_err_out;
	}
	err = ntfs_rl_merge(&ni->rl, &runlist);
	if (err) {
		if (start < 0 || start >= alloc_size)
			ntfs_error(vol->mp, "Cannot extend allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because the runlist merge failed "
					"(error %d).",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
		if (err != ENOMEM)
			err = EIO;
		err2 = ntfs_cluster_free_from_rl(vol, runlist.rl, 0, -1,
				NULL);
		if (err2) {
			ntfs_error(vol->mp, "Failed to release allocated "
					"cluster(s) in error code path (error "
					"%d).  Run chkdsk to recover the lost "
					"space.", err2);
			NVolSetErrors(vol);
		}
		OSFree(runlist.rl, runlist.alloc, ntfs_malloc_tag);
		nr_allocated = 0;
		goto trunc_err_out;
	}
	ntfs_debug("Allocated 0x%llx clusters.",
			(unsigned long long)(new_alloc_size - alloc_start) >>
			vol->cluster_size_shift);
skip_real_alloc:
	/* Find the runlist element with which the attribute extent starts. */
	rl = ntfs_rl_find_vcn_nolock(ni->rl.rl, lowest_vcn);
	if (!rl)
		panic("%s(): !rl\n", __FUNCTION__);
	if (!rl->length)
		panic("%s(): !rl->length\n", __FUNCTION__);
	if (rl->lcn < LCN_HOLE)
		panic("%s(): rl->lcn < LCN_HOLE\n", __FUNCTION__);
	mp_rebuilt = FALSE;
	attr_len = le32_to_cpu(a->length);
	/* Get the size for the new mapping pairs array for this extent. */
	err = ntfs_get_size_for_mapping_pairs(vol, rl, lowest_vcn, -1,
			&mp_size);
	if (err) {
		if (start < 0 || start >= alloc_size)
			ntfs_error(vol->mp, "Cannot extend allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because determining the size for the "
					"mapping pairs failed (error %d).",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
		err = EIO;
		goto undo_alloc;
	}
	mp_ofs = le16_to_cpu(a->mapping_pairs_offset);
retry_attr_rec_resize:
	/* Extend the attribute record to fit the bigger mapping pairs array. */
	err = ntfs_attr_record_resize(m, a, mp_size + mp_ofs);
	if (!err)
		goto build_mpa;
	if (err != ENOSPC)
		panic("%s(): err != ENOSPC\n", __FUNCTION__);
	/*
	 * Not enough space in the mft record.  If this is an index related
	 * extension, check if the index root attribute is in the same mft
	 * record as the attribute being extended and if it is and it is not
	 * empty move its entries into an index allocation block.  Note we do
	 * not check whether that actually creates enough space because how
	 * much space is needed exactly is very hard to determine in advance
	 * (due to potential need for associated attribute list attribute
	 * extensions) and also because even if it does not create enough space
	 * it will still help and save work later on when working for example
	 * on the attribute list attribute.
	 */
	if (ictx) {
		long delta;
		INDEX_ROOT *ir;
		INDEX_HEADER *ih;
		INDEX_ENTRY *ie, *first_ie;
		ntfs_index_context *root_ictx;
		ntfs_attr_search_ctx root_actx;

		if (ni->type != AT_INDEX_ALLOCATION && ni->type != AT_BITMAP)
			panic("%s(): ni->type != AT_INDEX_ALLOCATION && "
					"ni->type != AT_BITMAP\n",
					__FUNCTION__);
		ntfs_attr_search_ctx_init(&root_actx, actx->ni, m);
		err = ntfs_attr_find_in_mft_record(AT_INDEX_ROOT, ni->name,
				ni->name_len, NULL, 0, &root_actx);
		if (err) {
			if (err != ENOENT) {
				ntfs_error(vol->mp, "Failed to find index "
						"root attribute in mft_no "
						"0x%llx (error %d).  Inode is "
						"corrupt.  Run chkdsk.",
						(unsigned long long)ni->mft_no,
						err);
				NVolSetErrors(vol);
			}
			/*
			 * The index root is in a different mft record so we
			 * cannot gain anything by moving out its entries.  Set
			 * @ictx to NULL so we do not waste our time trying
			 * again.
			 */
			ictx = NULL;
			goto ictx_done;
		}
		/*
		 * We found the index root in the same mft record as the
		 * attribute (extent) to be extended.  Check whether it is
		 * empty or not.
		 */
		ir = (INDEX_ROOT*)((u8*)root_actx.a +
				le16_to_cpu(root_actx.a->value_offset));
		ih = &ir->index;
		first_ie = ie = (INDEX_ENTRY*)((u8*)ih +
				le32_to_cpu(ih->entries_offset));
		while (!(ie->flags & INDEX_ENTRY_END))
			ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length));
		/*
		 * If there are no entries other than the end entry we cannot
		 * gain anything by moving out the entries from the index root.
		 * Set @ictx to NULL so we do not waste our time trying again.
		 */
		if (ie == first_ie) {
			ictx = NULL;
			goto ictx_done;
		}
		/*
		 * We cannot have gotten this far if the current index context
		 * is locked and/or it is the index root.
		 *
		 * Also, we need to undo what we have done so far as the
		 * metadata is currently in an inconsistent state and things
		 * will get really confused when moving the entries from the
		 * index root to the index allocation block and the same
		 * attribute we are extending at the moment is extended.
		 * Another reason is that the mft record will be dropped by the
		 * move thus we would expose invalid metadata to concurrent
		 * threads which is a Bad Thing(TM).
		 *
		 * For the same reasons we also need to drop the runlist lock
		 * we are holding.
		 */
		if (ictx->is_locked)
			panic("%s(): ictx->is_locked\n", __FUNCTION__);
		if (ictx->is_root)
			panic("%s(): ictx->is_root\n", __FUNCTION__);
		ll = alloc_size >> vol->cluster_size_shift;
		err = ntfs_cluster_free(ni, ll, -1, actx, NULL);
		if (err) {
			ntfs_error(vol->mp, "Failed to release allocated "
					"cluster(s) (error %d).  Run chkdsk "
					"to recover the lost cluster(s).", err);
			NVolSetErrors(vol);
		}
		m = actx->m;
		a = actx->a;
		/*
		 * If the runlist truncation fails and/or the search context is
		 * no longer valid, we cannot resize the attribute record or
		 * build the mapping pairs array thus we mark the volume dirty
		 * and tell the user to run chkdsk.
		 */
		err = ntfs_rl_truncate_nolock(vol, &ni->rl, ll);
		if (err || actx->is_error) {
			if (actx->is_error)
				err = actx->error;
			ntfs_error(vol->mp, "Failed to %s (error %d).  Run "
					"chkdsk.", actx->is_error ? "restore "
					"attribute search context" :
					"truncate attribute runlist", err);
			NVolSetErrors(vol);
			goto err_out;
		}
		lck_rw_unlock_exclusive(&ni->rl.lock);
		/* Find the index root by walking up the tree path. */
		root_ictx = ictx;
		while (!root_ictx->is_root) {
			root_ictx = root_ictx->up;
			/*
			 * If we go all the way round to the beginning without
			 * finding the root something has gone badly wrong.
			 */
			if (root_ictx == ictx)
				panic("%s(): root_ictx == ictx\n",
						__FUNCTION__);
		}
		/*
		 * We need a proper deallocatable attribute search context thus
		 * switch the one pointing to the attribute to be resized to
		 * point to the index root.  FIXME: We are not updating
		 * @actx->al_entry as this is not going to be touched at all.
		 * Having said that set it to NULL just in case.
		 */
		actx->a = root_actx.a;
		actx->al_entry = NULL;
		/*
		 * Lock the index root node.  We already have the index root
		 * attribute thus only need to do the revalidation part of
		 * re-locking.
		 */
		root_ictx->is_locked = 1;
		root_ictx->actx = actx;
		root_ictx->bytes_free = le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use);
		root_ictx->ir = ir;
		delta = (u8*)ih - (u8*)root_ictx->index;
		if (delta) {
			INDEX_ENTRY **entries;
			unsigned u;

			root_ictx->index = ih;
			root_ictx->entry = (INDEX_ENTRY*)(
					(u8*)root_ictx->entry + delta);
			entries = root_ictx->entries;
			for (u = 0; u < root_ictx->nr_entries; u++)
				entries[u] = (INDEX_ENTRY*)((u8*)
						entries[u] + delta);
		}
		/*
		 * Move the index root entries to an index allocation block.
		 *
		 * Note we do not need to worry about this causing infinite
		 * recursion in the case that we were called from
		 * ntfs_index_block_alloc() which was called from
		 * ntfs_index_move_root_to_allocation_block() because the
		 * latter will have emptied the index root before calling
		 * ntfs_index_block_alloc() thus we will bail out above when
		 * checking whether the index root is empty the second time
		 * round and the recursion will stop there.  This is a very
		 * seldom occurence thus there is no point in special casing it
		 * in the code in a more efficient but more complicated way.
		 *
		 * A complication is that ntfs_attr_resize() may have been
		 * called from ntfs_index_block_alloc() and in this case when
		 * we call ntfs_index_move_root_to_allocation_block() it will
		 * call ntfs_index_block_alloc() again which will cause a
		 * deadlock (or with lock debugging enabled panic()) because
		 * ntfs_index_block_alloc() takes the bitmap inode lock for
		 * writing.  To avoid this ntfs_index_block_alloc() sets
		 * @ictx->bmp_is_locked and we need to set
		 * @root_ictx->bmp_is_locoked to the same value so that when
		 * ntfs_index_move_root_to_allocation_block() calls
		 * ntfs_index_block_alloc() the latter will know not to take
		 * the bitmap inode lock again.
		 */
		root_ictx->bmp_is_locked = ictx->bmp_is_locked;
		err = ntfs_index_move_root_to_allocation_block(root_ictx);
		if (root_ictx != ictx)
			root_ictx->bmp_is_locked = 0;
		if (err) {
			ntfs_error(vol->mp, "Failed to move index root to "
					"index allocation block (error %d).",
					err);
			if (root_ictx->is_locked)
				ntfs_index_ctx_unlock(root_ictx);
			/*
			 * This is a disaster as it means the index context is
			 * no longer valid thus we have to bail out all the way.
			 */
			return err;
		}
		/* Unlock the newly created index block. */
		if (root_ictx->is_root)
			panic("%s(): root_ictx->is_root\n", __FUNCTION__);
		if (!root_ictx->is_locked)
			panic("%s(): !root_ictx->is_locked\n", __FUNCTION__);
		ntfs_index_ctx_unlock(root_ictx);
		/*
		 * We are done.  The index root is now empty thus the mft
		 * record should now have enough space.  Because we undid
		 * everything and dropped the runlist lock as well as the mft
		 * record when moving the index root entries into the index
		 * allocation block we need to restart the attribute allocation
		 * extension again.
		 *
		 * But first we set @ictx to NULL so we do not get here again
		 * in the case that there still is not enough free space.  This
		 * is not a disaster as we can just carry on doing other
		 * rearrangements to free up enough space in the mft record.
		 */
		ictx = NULL;
		goto retry_extend;
	}
ictx_done:
	/*
	 * There is not enough space in the mft record.
	 *
	 * We need to add an attribute list attribute if it is not already
	 * present.
	 */
	if (!NInoAttrList(base_ni)) {
		err = ntfs_attr_list_add(base_ni, base_m, actx);
		if (err || actx->is_error) {
			if (!err)
				err = actx->error;
			ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error "
					"%d).", actx->is_error ?
					"remap extent mft record of" :
					"add attribute list attribute to",
					(unsigned long long)base_ni->mft_no,
					err);
			goto undo;
		}
		/*
		 * The attribute location will have changed so update it from
		 * the search context.
		 */
		m = actx->m;
		a = actx->a;
		/*
		 * Retry the original attribute record resize as we may now
		 * have enough space to create the complete remaining mapping
		 * pairs array in the moved attribute record.
		 *
		 * This can for example happen when the attribute was moved out
		 * to an extent mft record which has much more free space than
		 * the base mft record had.
		 */
		goto retry_attr_rec_resize;
	}
	/*
	 * If the attribute record is in an extent mft record we know the
	 * attribute can be outside the base mft record (as it already is) thus
	 * we can simply resize the attribute to the maximum size possible and
	 * then proceed to fill it with mapping pairs data until it is full,
	 * then start a new extent in a new mft record, etc, until all runlist
	 * elements have been saved in mapping pairs arrays.
	 */
	if (m != base_m) {
		ATTR_LIST_ENTRY *al_entry;
		unsigned new_al_size;

		/*
		 * If the attribute record is not the only one in the extent
		 * mft record then move it to a new extent mft record as that
		 * will allow the attribute record to grow larger thus reducing
		 * the total number of extent attribute records needed to a
		 * minimum.
		 */
		if (!ntfs_attr_record_is_only_one(m, a)) {
move_attr:
			lck_rw_lock_shared(&base_ni->attr_list_rl.lock);
			err = ntfs_attr_record_move(actx);
			lck_rw_unlock_shared(&base_ni->attr_list_rl.lock);
			if (err) {
				if (start < 0 || start >= alloc_size)
					ntfs_error(vol->mp, "Failed to move "
							"attribute extent "
							"from mft record "
							"0x%llx to an extent "
							"mft record (error "
							"%d).",
							(unsigned long long)
							actx->ni->mft_no, err);
				goto undo;
			}
			/*
			 * The attribute location will have changed so update
			 * it from the search context.
			 */
			m = actx->m;
			a = actx->a;
			/*
			 * Retry the original attribute record resize as we may
			 * now have enough space to create the complete
			 * remaining mapping pairs array in the moved attribute
			 * record.
			 */
			goto retry_attr_rec_resize;
		}
		max_size = (le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use)) & ~7;
add_mapping_pairs_to_attr:
		max_size += attr_len - mp_ofs;
		err = ntfs_attr_record_resize(m, a, max_size + mp_ofs);
		/*
		 * We worked out the exact size we can extend to so the resize
		 * cannot fail.
		 */
		if (err)
			panic("%s(): err (ntfs_attr_record_resize())\n",
					__FUNCTION__);
		/*
		 * If the new size and the old size are the same we cannot add
		 * anything to this extent so do not bother rebuilding the
		 * mapping pairs array and go straight to creating the next
		 * extent.
		 */
		if (attr_len == le32_to_cpu(a->length)) {
start_new_attr:
			stop_vcn = sle64_to_cpu(a->highest_vcn) + 1;
			goto skip_mpa_build;
		}
build_mpa:
		mp_rebuilt = TRUE;
		/* Generate the mapping pairs directly into the attribute. */
		err = ntfs_mapping_pairs_build(vol, (s8*)a + mp_ofs,
				le32_to_cpu(a->length) - mp_ofs, rl,
				lowest_vcn, -1, &stop_vcn);
		if (err && err != ENOSPC) {
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because building "
						"the mapping pairs array "
						"failed (error %d).",
						(unsigned long long)ni->mft_no,
						(unsigned)le32_to_cpu(ni->type),
						err);
			err = EIO;
			/*
			 * Need to set @a->highest_vcn to enable correct error
			 * recovery.
			 */
			if (!is_first)
				a->highest_vcn = cpu_to_sle64(sle64_to_cpu(
						a->lowest_vcn) - 1);
			goto undo;
		}
		/* Update the highest_vcn. */
		a->highest_vcn = cpu_to_sle64(stop_vcn - 1);
		/*
		 * We have finished with this extent so update the current
		 * allocated size and attribute length to reflect this.  We
		 * need to do this to enable error handling and recovery.
		 */
		alloc_size = stop_vcn << vol->cluster_size_shift;
		attr_len = le32_to_cpu(a->length);
		/*
		 * If the mapping pairs build succeeded, i.e. the current
		 * attribute extent contains the end of the runlist, we are
		 * done and only need to update the attribute sizes in the base
		 * attribute extent so go and do that.
		 */
		if (!err)
			goto update_sizes;
		/*
		 * We have finished with this extent mft record thus we release
		 * it after ensuring the changes make it to disk later.  We do
		 * this by hand as we want to keep the current attribute list
		 * attribute entry as we will be inserting the entry for the
		 * next attribute extent immediately after it.
		 */
		NInoSetMrecNeedsDirtying(actx->ni);
skip_mpa_build:
		/* Get the size of the remaining mapping pairs array. */
		rl = ntfs_rl_find_vcn_nolock(rl, stop_vcn);
		if (!rl)
			panic("%s(): !rl (skip_mpa_build)\n", __FUNCTION__);
		if (!rl->length)
			panic("%s(): !rl->length (skip_mpa_build)\n",
					__FUNCTION__);
		if (rl->lcn < LCN_HOLE)
			panic("%s(): rl->lcn < LCN_HOLE (skip_mpa_build)\n",
					__FUNCTION__);
		err = ntfs_get_size_for_mapping_pairs(vol, rl, stop_vcn, -1,
				&mp_size);
		if (err) {
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot complete "
						"extension of allocation of "
						"mft_no 0x%llx, attribute type "
						"0x%x, because determining "
						"the size for the mapping "
						"pairs failed (error %d).",
						(unsigned long long)ni->mft_no,
						(unsigned)le32_to_cpu(ni->type),
						err);
			err = EIO;
			goto undo;
		}
		/* We only release extent mft records. */
		if (actx->ni != base_ni)
			ntfs_extent_mft_record_unmap(actx->ni);
		/*
		 * We now need to allocate a new extent mft record, attach it
		 * to the base ntfs inode and set up the search context to
		 * point to it, then create a new attribute extent in it of
		 * either maximum size or the left to do mapping pairs size and
		 * then build the mapping pairs array in it.  Finally, add an
		 * attribute list attribute entry for the new attribute extent.
		 */
		err = ntfs_mft_record_alloc(vol, NULL, NULL, base_ni,
				&actx->ni, &m, &a);
		if (err) {
			/*
			 * Make it safe to release the attribute search
			 * context.
			 */
			actx->ni = base_ni;
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot extend allocation "
						"of mft_no 0x%llx, attribute "
						"type 0x%x, because "
						"allocating a new extent mft "
						"record failed (error %d),",
						(unsigned long long)ni->mft_no,
						(unsigned)le32_to_cpu(ni->type),
						err);
			goto undo;
		}
		actx->m = m;
		actx->a = a;
		/* We are no longer working on the extent we started with. */
		is_first = FALSE;
		/*
		 * Get the size needed for the remaining mapping pairs array
		 * and make space for an attribute large enough to hold it.  If
		 * there is not enough space to do so make the maximum amount
		 * of space available.
		 */
		lowest_vcn = stop_vcn;
		/*
		 * Calculate the offset into the new attribute at which the
		 * mapping pairs array begins.  The mapping pairs array is
		 * placed after the name aligned to an 8-byte boundary which in
		 * turn is placed immediately after the non-resident attribute
		 * record itself.
		 */
		mp_ofs = offsetof(ATTR_RECORD, compressed_size) + ((name_size +
				7) & ~7);
		err = ntfs_attr_record_make_space(m, a, mp_ofs + mp_size);
		if (err) {
			if (err != ENOSPC)
				panic("%s(): err != ENOSPC\n", __FUNCTION__);
			max_size = (le32_to_cpu(m->bytes_allocated) -
					le32_to_cpu(m->bytes_in_use)) & ~7;
			if (max_size < mp_ofs)
				panic("%s(): max_size < mp_ofs\n",
						__FUNCTION__);
			err = ntfs_attr_record_make_space(m, a, max_size);
			/*
			 * We worked out the exact maximum size so the call
			 * cannot fail.
			 */
			if (err)
				panic("%s(): err ("
						"ntfs_attr_record_make_space()"
						")\n", __FUNCTION__);
		}
		/*
		 * Now setup the new attribute record.  The entire attribute
		 * has been zeroed and the length of the attribute record has
		 * been set.
		 *
		 * Before we proceed with setting up the attribute, add an
		 * attribute list attribute entry for the created attribute
		 * extent.
		 */
		al_entry = actx->al_entry = (ATTR_LIST_ENTRY*)(
				(u8*)actx->al_entry +
				le16_to_cpu(actx->al_entry->length));
		al_entry_len = (offsetof(ATTR_LIST_ENTRY, name) + name_size +
				7) & ~7;
		new_al_size = base_ni->attr_list_size + al_entry_len;
		/* Out of bounds checks. */
		if ((u8*)al_entry < base_ni->attr_list || (u8*)al_entry >
				base_ni->attr_list + new_al_size ||
				(u8*)al_entry + al_entry_len >
				base_ni->attr_list + new_al_size) {
			/* Inode is corrupt. */
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot complete "
						"extension of allocation of "
						"mft_no 0x%llx, attribute type "
						"0x%x, because the inode is "
						"corrupt.  Run chkdsk.",
						(unsigned long long)ni->mft_no,
						(unsigned)
						le32_to_cpu(ni->type));
			err = EIO;
			goto free_undo;
		}
		err = ntfs_attr_size_bounds_check(vol, AT_ATTRIBUTE_LIST,
				new_al_size);
		if (err) {
			if (err == ERANGE) {
				if (start < 0 || start >= alloc_size)
					ntfs_error(vol->mp, "Cannot complete "
							"extension of "
							"allocation of mft_no "
							"0x%llx, attribute "
							"type 0x%x, because "
							"the attribute list "
							"attribute would "
							"become to large.  "
							"You need to "
							"defragment your "
							"volume and then try "
							"again.",
							(unsigned long long)
							ni->mft_no, (unsigned)
							le32_to_cpu(ni->type));
				err = ENOSPC;
			} else {
				if (start < 0 || start >= alloc_size)
					ntfs_error(vol->mp, "Cannot complete "
							"extension of "
							"allocation of mft_no "
							"0x%llx, attribute "
							"type 0x%x, because "
							"the attribute list "
							"attribute is unknown "
							"on the volume.  The "
							"volume is corrupt.  "
							"Run chkdsk.",
							(unsigned long long)
							ni->mft_no, (unsigned)
							le32_to_cpu(ni->type));
				NVolSetErrors(vol);
				err = EIO;
			}
			goto free_undo;
		}
		/*
		 * Reallocate the memory buffer if needed and create space for
		 * the new entry.
		 */
		new_al_alloc = (new_al_size + NTFS_ALLOC_BLOCK - 1) &
				~(NTFS_ALLOC_BLOCK - 1);
		if (new_al_alloc > base_ni->attr_list_alloc) {
			u8 *tmp, *al, *al_end;
			unsigned al_entry_ofs;

			tmp = OSMalloc(new_al_alloc, ntfs_malloc_tag);
			if (!tmp) {
				if (start < 0 || start >= alloc_size)
					ntfs_error(vol->mp, "Cannot complete "
							"extension of "
							"allocation of mft_no "
							"0x%llx, attribute "
							"type 0x%x, because "
							"there is not enough "
							"memory to extend "
							"the attribute list "
							"attribute.",
							(unsigned long long)
							ni->mft_no, (unsigned)
							le32_to_cpu(ni->type));
				err = ENOMEM;
				goto free_undo;
			}
			al = base_ni->attr_list;
			al_entry_ofs = (u8*)al_entry - al;
			al_end = al + base_ni->attr_list_size;
			memcpy(tmp, al, al_entry_ofs);
			if ((u8*)al_entry < al_end)
				memcpy(tmp + al_entry_ofs + al_entry_len,
						al + al_entry_ofs,
						base_ni->attr_list_size -
						al_entry_ofs);
			al_entry = actx->al_entry = (ATTR_LIST_ENTRY*)(tmp +
					al_entry_ofs);
			OSFree(base_ni->attr_list, base_ni->attr_list_alloc,
					ntfs_malloc_tag);
			base_ni->attr_list_alloc = new_al_alloc;
			base_ni->attr_list = tmp;
		} else if ((u8*)al_entry < base_ni->attr_list +
				base_ni->attr_list_size)
			memmove((u8*)al_entry + al_entry_len, al_entry,
					base_ni->attr_list_size -
					((u8*)al_entry - base_ni->attr_list));
		base_ni->attr_list_size = new_al_size;
		/* Set up the attribute extent and the attribute list entry. */
		al_entry->type = a->type = ni->type;
		al_entry->length = cpu_to_le16(al_entry_len);
		a->non_resident = 1;
		al_entry->name_length = a->name_length = ni->name_len;
		a->name_offset = const_cpu_to_le16(offsetof(ATTR_RECORD,
				compressed_size));
		al_entry->name_offset = offsetof(ATTR_LIST_ENTRY, name);
		al_entry->instance = a->instance = m->next_attr_instance;
		/*
		 * Increment the next attribute instance number in the mft
		 * record as we consumed the old one.
		 */
		m->next_attr_instance = cpu_to_le16((le16_to_cpu(
				m->next_attr_instance) + 1) & 0xffff);
		al_entry->lowest_vcn = a->lowest_vcn =
				cpu_to_sle64(lowest_vcn);
		al_entry->mft_reference = MK_LE_MREF(actx->ni->mft_no,
				actx->ni->seq_no);
		a->mapping_pairs_offset = cpu_to_le16(mp_ofs);
		/* Copy the attribute name into place. */
		if (name_size) {
			memcpy((u8*)a + offsetof(ATTR_RECORD, compressed_size),
					ni->name, name_size);
			memcpy(&al_entry->name, ni->name, name_size);
		}
		/* For tidyness, zero out the unused space. */
		if (al_entry_len > offsetof(ATTR_LIST_ENTRY, name) + name_size)
			memset((u8*)al_entry +
					offsetof(ATTR_LIST_ENTRY, name) +
					name_size, 0, al_entry_len -
					(offsetof(ATTR_LIST_ENTRY, name) +
					name_size));
		/*
		 * Need to set @a->highest_vcn to enable correct error
		 * recovery.
		 */
		a->highest_vcn = cpu_to_sle64(lowest_vcn - 1);
		/*
		 * Extend the attribute list attribute and copy in the modified
		 * value from the cache.
		 */
		err = ntfs_attr_list_sync_extend(base_ni, base_m,
				(u8*)al_entry - base_ni->attr_list, actx);
		if (err || actx->is_error) {
			/*
			 * If @actx->is_error indicates error this is fatal as
			 * we cannot build the mapping pairs array into it as
			 * it is not mapped.
			 *
			 * However, we may still be able to recover from this
			 * situation by freeing the extent mft record and thus
			 * deleting the attribute record.  This only works when
			 * this is the only attribute record in the mft record
			 * and when we just created this extent attribute
			 * record.  We can easily determine if this is the only
			 * attribute in the mft record by scanning through the
			 * cached attribute list attribute.
			 */
			if (!err)
				err = actx->error;
			ntfs_error(vol->mp, "Failed to %s mft_no 0x%llx (error "
					"%d).", actx->is_error ?
					"remap extent mft record of" :
					"extend and sync attribute list "
					"attribute to",
					(unsigned long long)base_ni->mft_no,
					err);
			goto undo;
		}
		/*
		 * Finally, proceed to building the mapping pairs array into
		 * the attribute record.
		 */
		goto build_mpa;
	}
	/*
	 * We now know that the attribute is in the base mft record.
	 *
	 * For performance reasons we want to keep the first extent of the
	 * unnamed $DATA attribute of files and the $I30 named
	 * $INDEX_ALLOCATION and $BITMAP attributes of directories in the base
	 * mft record even if this means that the first extent will be nearly
	 * empty.  This ensures that loading an inode is faster and thus stat()
	 * and getattrlist() will be faster.
	 *
	 * If the attribute is one of the above described ones then we keep the
	 * existing extent as it is (unless it is actually empty in which case
	 * we add at least some mapping data to it) and start a new extent in a
	 * new extent mft record.
	 *
	 * In all other cases we move the attribute to a new extent mft record
	 * and retry the attribute resize as it may now fit.
	 */
	if (a->lowest_vcn || (!S_ISDIR(base_ni->mode) &&
			(ni->type != AT_DATA || ni->name_len)) ||
			(S_ISDIR(base_ni->mode) &&
			(!ni->name_len || ni->name != I30)))
		goto move_attr;
	max_size = (le32_to_cpu(m->bytes_allocated) -
			le32_to_cpu(m->bytes_in_use)) & ~7;
	al_entry_len = le16_to_cpu(actx->al_entry->length); 
	/*
	 * A single mapping pair can be up to 17 bytes in size so we need at
	 * least that much free space.  But we need to align the attribute
	 * length to 8 bytes thus the 17 becomes 24.
	 *
	 * Further, we will be adding at least one attribute list attribute
	 * entry thus we want to definitely have space for that to happen.  If
	 * the attribute list attribute is non-resident we may have to add
	 * another mapping pair which would as above be 24 bytes or if it is
	 * resident we would have to add an actual attribute list entry which
	 * would be the same size as the one for the current attribute record.
	 * As this is guaranteed to be larger than 24 bytes we use the larger
	 * size as the minimum to leave free.
	 *
	 * Thus the minimum of free space we require before adding any mapping
	 * pairs to the current attribute record is 24 + @al_entry_len.
	 *
	 * There may be a lot of free space so it would be silly to only use
	 * the minimum.  On one hand we would like to consume as much of the
	 * free space as possible to keep the number of attribute extents to a
	 * minimum.  On the other hand we would like to keep enough spare space
	 * for four attribute list attribute entries (this is an arbitrary
	 * choice) to simplify future expansion of the attribute list
	 * attribute.
	 */
	if (!*((u8*)a + mp_ofs)) {
		/*
		 * There are no mapping pairs in this attribute record thus we
		 * either have to add some mapping pairs or if the available
		 * space is less than our minimum we have to move the attribute
		 * record out into a new extent mft record.
		 */
		if (max_size < 24 + al_entry_len)
			goto move_attr;
		/*
		 * We have our minimum amount of space and possibly a lot more.
		 * If we have less than our desired spare space use our minimum
		 * and if we have more than that use everything except the
		 * desired spare space.
		 */
		if (max_size < 24 + (4 * al_entry_len))
			max_size = 24;
		else
			max_size -= 4 * al_entry_len;
	} else {
		/*
		 * Check if it would be sensible to add at least some mapping
		 * pairs to the current attribute record.
		 *
		 * If the amount of free space is less than the desired spare
		 * space we leave this attribute record be and start a new
		 * extent and if we have more than that use everything except
		 * the desired spare space.
		 */
		if (max_size < 24 + (4 * al_entry_len))
			goto start_new_attr;
		max_size -= 4 * al_entry_len;
	}
	/*
	 * We want to add some mapping pairs to the current attribute before
	 * starting the next one.
	 *
	 * @max_size is already set to the number of bytes to consume from the
	 * free space in the mft record and it is guaranteed that the mft
	 * record has at least that much free space.
	 */
	goto add_mapping_pairs_to_attr;
update_sizes:
	/*
	 * We now have extended the allocated size of the attribute.  Reflect
	 * this in the ntfs_inode structure and the attribute record.
	 */
	if (a->lowest_vcn) {
		/*
		 * We are not in the first attribute extent, switch to it, but
		 * first ensure the changes will make it to disk later.
		 */
		NInoSetMrecNeedsDirtying(actx->ni);
		ntfs_attr_search_ctx_reinit(actx);
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0,
				NULL, 0, actx);
		if (err) {
			if (start < 0 || start >= alloc_size)
				ntfs_error(vol->mp, "Cannot complete "
						"extension of allocation of "
						"mft_no 0x%llx, attribute type "
						"0x%x, because lookup of "
						"first attribute extent "
						"failed (error %d).",
						(unsigned long long)
						base_ni->mft_no, (unsigned)
						le32_to_cpu(ni->type), err);
			if (err == ENOENT)
				err = EIO;
			goto undo_do_trunc;
		}
		/* @m is not used any more so no need to set it. */
		a = actx->a;
	}
	/*
	 * If we created a hole and the attribute is not marked as sparse, mark
	 * it as sparse now.
	 */
	if (is_sparse && !NInoSparse(ni)) {
		err = ntfs_attr_sparse_set(base_ni, ni, actx);
		if (err) {
			ntfs_error(vol->mp, "Failed to set the attribute to "
					"be sparse (error %d).", err);
			goto undo_do_trunc;
		}
		/*
		 * The attribute may have been moved to make space for the
		 * compressed size so @a is now invalid.
		 */
		a = actx->a;
	}
	lck_spin_lock(&ni->size_lock);
	ni->allocated_size = new_alloc_size;
	a->allocated_size = cpu_to_sle64(new_alloc_size);
	if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION &&
			NInoCompressed(ni))) {
		ni->compressed_size += nr_allocated << vol->cluster_size_shift;
		a->compressed_size = cpu_to_sle64(ni->compressed_size);
	}
	lck_spin_unlock(&ni->size_lock);
	if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
		lck_spin_lock(&base_ni->size_lock);
		base_ni->allocated_size = new_alloc_size;
		lck_spin_unlock(&base_ni->size_lock);
	}
alloc_done:
	if (new_data_size > sle64_to_cpu(a->data_size)) {
		if (!ubc_setsize(ni->vn, new_data_size)) {
			ntfs_error(vol->mp, "Failed to set size in UBC.");
			/*
			 * This can only happen if a previous resize failed and
			 * the UBC size was already out of date in which case
			 * we can just leave it out of date and continue to
			 * completion returning an error.  FIXME: We could roll
			 * back the changes to the metadata at some point but
			 * it does not seem worth it at the moment given that
			 * the error can only happen if there already was an
			 * error thus it is very unlikely.
			 */ 
			err = EIO;
		}
		lck_spin_lock(&ni->size_lock);
		ni->data_size = new_data_size;
		a->data_size = cpu_to_sle64(new_data_size);
		lck_spin_unlock(&ni->size_lock);
		if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
			lck_spin_lock(&base_ni->size_lock);
			base_ni->data_size = new_data_size;
			lck_spin_unlock(&base_ni->size_lock);
		}
	}
dirty_done:
	/* Ensure the changes make it to disk. */
	NInoSetMrecNeedsDirtying(actx->ni);
	/*
	 * We have modified the size.  If the ntfs inode is the base inode,
	 * cause the sizes to be written to all the directory index entries
	 * pointing to the base inode when the inode is written to disk.  Do
	 * not do this for directories as they have both sizes set to zero in
	 * their index entries.
	 */
	if (ni == base_ni && !S_ISDIR(ni->mode))
		NInoSetDirtySizes(ni);
done:
	ntfs_attr_search_ctx_put(actx);
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	ntfs_debug("Done, new_allocated_size 0x%llx.",
			(unsigned long long)new_alloc_size);
	if (dst_alloc_size)
		*dst_alloc_size = new_alloc_size;
	return err;
free_undo:
	/* We have not yet added an attribute list entry for the new extent. */
	al_entry_added = FALSE;
	goto free_extent;
undo:
	ntfs_attr_search_ctx_reinit(actx);
	if (is_first && !mp_rebuilt)
		goto undo_alloc;
	/* Look up the attribute extent we were working on. */
	if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, lowest_vcn,
			NULL, 0, actx)) {
		/* There is nothing we can do now, bail out. */
		ntfs_error(vol->mp, "Failed to find current attribute extent "
				"in error code path.  Leaving inconsistent "
				"metadata.  Run chkdsk.");
		NVolSetErrors(vol);
		goto err_out;
	}
	if (is_first)
		actx->a->highest_vcn = cpu_to_sle64(
				(alloc_size >> vol->cluster_size_shift) - 1);
undo_alloc:
	ll = alloc_size >> vol->cluster_size_shift;
	if (ntfs_cluster_free(ni, ll, -1, actx, &nr_freed)) {
		ntfs_error(vol->mp, "Failed to release allocated cluster(s) "
				"in error code path.  Run chkdsk to recover "
				"the lost cluster(s).");
		NVolSetErrors(vol);
		/*
		 * Still need to know how many real clusters are effectively
		 * truncated from the attribute extentsion.
		 */
		nr_freed = ntfs_rl_get_nr_real_clusters(&ni->rl, ll, -1);
	}
	m = actx->m;
	a = actx->a;
undo_hole:
	/*
	 * If the runlist truncation fails and/or the search context is no
	 * longer valid, we cannot resize the attribute record or build the
	 * mapping pairs array thus we mark the volume dirty and tell the user
	 * to run chkdsk.
	 */
	if (ntfs_rl_truncate_nolock(vol, &ni->rl, ll) || actx->is_error) {
		ntfs_error(vol->mp, "Failed to %s in error code path.  Run "
				"chkdsk.", actx->is_error ?
				"restore attribute search context" :
				"truncate attribute runlist");
		NVolSetErrors(vol);
	} else if (is_first) {
		if (mp_rebuilt) {
			/* We are working on the original extent, restore it. */
			if (ntfs_attr_record_resize(m, a, attr_len)) {
				ntfs_error(vol->mp, "Failed to restore "
						"attribute record in error "
						"code path.  Run chkdsk.");
				NVolSetErrors(vol);
			} else /* if (success) */ {
				mp_ofs = le16_to_cpu(a->mapping_pairs_offset);
				if (ntfs_mapping_pairs_build(vol, (s8*)a +
						mp_ofs, attr_len - mp_ofs,
						ni->rl.rl, lowest_vcn, -1,
						NULL)) {
					ntfs_error(vol->mp, "Failed to "
							"restore mapping "
							"pairs array in error "
							"code path.  Run "
							"chkdsk.");
					NVolSetErrors(vol);
				}
				if (actx->ni != base_ni)
					NInoSetMrecNeedsDirtying(actx->ni);
			}
		}
	} else if (/* !is_first && */ a->highest_vcn ==
			cpu_to_sle64(sle64_to_cpu(a->lowest_vcn) - 1)) {
		/* We need to delete the attribute list entry, too. */
		al_entry_added = TRUE;
		/* We are working on a new extent, remove it. */
		if (!ntfs_attr_record_is_only_one(m, a)) {
			ntfs_attr_record_delete_internal(m, a);
			if (actx->ni != base_ni)
				NInoSetMrecNeedsDirtying(actx->ni);
		} else {
free_extent:
			if (!ntfs_extent_mft_record_free(base_ni, actx->ni,
					m)) {
				/*
				 * The extent inode no longer exists.  Make it
				 * safe to release/reinit the search context.
				 */
				actx->ni = base_ni;
			} else {
				ntfs_error(vol->mp, "Failed to free extent "
						"mft record 0x%llx of mft_no "
						"0x%llx in error code path.  "
						"Leaving inconsistent "
						"metadata.  Run chkdsk.",
						(unsigned long long)
						actx->ni->mft_no,
						(unsigned long long)
						base_ni->mft_no);
				NVolSetErrors(vol);
			}
		}
		if (al_entry_added) {
			ntfs_attr_list_entry_delete(base_ni, actx->al_entry);
			ntfs_attr_search_ctx_reinit(actx);
			if (ntfs_attr_list_sync_shrink(base_ni, 0, actx)) {
				ntfs_error(vol->mp, "Failed to restore "
						"attribute list attribute in "
						"base inode 0x%llx.  Leaving "
						"inconsistent metadata.  "
						"Run chkdsk.",
						(unsigned long long)
						base_ni->mft_no);
				NVolSetErrors(vol);
			}
		}
	}
undo_do_trunc:
	lck_spin_lock(&ni->size_lock);
	if (alloc_size == ni->allocated_size) {
		lck_spin_unlock(&ni->size_lock);
		goto undo_skip_update_sizes;
	}
	lck_spin_unlock(&ni->size_lock);
	ntfs_attr_search_ctx_reinit(actx);
	/* Look up the first attribute extent. */
	if (ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			actx)) {
		/* There is nothing we can do now, bail out. */
		ntfs_error(vol->mp, "Failed to find first attribute extent in "
				"error code path.  Leaving inconsistent "
				"metadata.  Run chkdsk.");
		NVolSetErrors(vol);
		goto err_out;
	}
	a = actx->a;
	lck_spin_lock(&ni->size_lock);
	ni->allocated_size = alloc_size;
	a->allocated_size = cpu_to_sle64(alloc_size);
	if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION &&
			NInoCompressed(ni))) {
		ni->compressed_size += (nr_allocated - nr_freed) <<
				vol->cluster_size_shift;
		a->compressed_size = cpu_to_sle64(ni->compressed_size);
	}
	lck_spin_unlock(&ni->size_lock);
	if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
		lck_spin_lock(&base_ni->size_lock);
		base_ni->allocated_size = alloc_size;
		lck_spin_unlock(&base_ni->size_lock);
	}
	/* Ensure the changes make it to disk. */
	if (actx->ni != base_ni)
		NInoSetMrecNeedsDirtying(actx->ni);
	/*
	 * We have modified the size.  If the ntfs inode is the base inode,
	 * cause the sizes to be written to all the directory index entries
	 * pointing to the base inode when the inode is written to disk.  Do
	 * not do this for directories as they have both sizes set to zero in
	 * their index entries.
	 */
	if (ni == base_ni && !S_ISDIR(ni->mode))
		NInoSetDirtySizes(ni);
undo_skip_update_sizes:
	ntfs_attr_search_ctx_put(actx);
	NInoSetMrecNeedsDirtying(base_ni);
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	/*
	 * Things are now consistent, try to truncate the attribute back to its
	 * old size which will cause the allocation to be restored to its old
	 * size.
	 *
	 * TODO: We should support partial allocations and when we do so we
	 * should only put the allocated size back if the error was not ENOSPC
	 * and partial allocations are acceptable for this attribute.  In that
	 * case would also need to update @ni->data_size, @a->data_size, and
	 * the size in the vnode @ni->vn via ubc_setsize().
	 */
	if (!is_first) {
		lck_spin_lock(&ni->size_lock);
		ll = ni->data_size;
		lck_spin_unlock(&ni->size_lock);
		if (ntfs_attr_resize(ni, ll, 0, ictx)) {
			ntfs_error(vol->mp, "Failed to undo partial "
					"allocation in inode 0x%llx in error "
					"code path.",
					(unsigned long long)base_ni->mft_no);
			NVolSetErrors(vol);
		}
	}
conv_err_out:
	ntfs_debug("Failed (error %d).", err);
	return err;
err_out:
	if (actx)
		ntfs_attr_search_ctx_put(actx);
	if (base_m)
		ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	goto conv_err_out;
trunc_err_out:
	mp_rebuilt = FALSE;
	if (is_sparse) {
		ll = alloc_size >> vol->cluster_size_shift;
		/*
		 * Silence compiler warning about possible use of uninitalized
		 * variable.
		 */
		attr_len = 0;
		goto undo_hole;
	}
	goto err_out;
}

/**
 * ntfs_attr_resize - called to change the size of an ntfs attribute inode
 * @ni:		ntfs inode for which to change the size
 * @new_size:	new size in bytes to which to resize the ntfs attribute @ni
 * @ioflags:	flags further describing the resize request
 * @ictx:	index context or NULL
 *
 * Resize the attribute described by the ntfs inode @ni to @new_size bytes.
 *
 * Note: We only support size changes for normal attributes at present, i.e.
 * not compressed and not encrypted.
 *
 * The flags in @ioflags further describe the resize request.  The following
 * ioflags are currently defined in OS X kernel (a lot of them are not
 * applicable to resize requests however):
 *	IO_UNIT		- Do i/o as atomic unit.
 *	IO_APPEND	- Append write to end.
 *	IO_SYNC		- Do i/o synchronously.
 *	IO_NODELOCKED	- Underlying node already locked.
 *	IO_NDELAY	- FNDELAY flag set in file table.
 *	IO_NOZEROFILL	- F_SETSIZE fcntl uses this to prevent zero filling.
 *	IO_TAILZEROFILL	- Zero fills at the tail of write.
 *	IO_HEADZEROFILL	- Zero fills at the head of write.
 *	IO_NOZEROVALID	- Do not zero fill if valid page.
 *	IO_NOZERODIRTY	- Do not zero fill if page is dirty.
 *	IO_CLOSE	- The i/o was issued from close path.
 *	IO_NOCACHE	- Same effect as VNOCACHE_DATA, but only for this i/o.
 *	IO_RAOFF	- Same effect as VRAOFF, but only for this i/o.
 *	IO_DEFWRITE	- Defer write if vfs.defwrite is set.
 *	IO_PASSIVE	- This is background i/o so do not throttle other i/o.
 * In particular the only flags that are used in the kernel when calling
 * vnode_setsize() are IO_SYNC and IO_NOZEROFILL.
 *
 * TODO: The @ioflags are currently ignored.
 *
 * If @ictx is not NULL, the resize is for an index allocation or bitmap
 * attribute extension.  In this case, if there is not enough space in the mft
 * record for the extended index allocation/bitmap attribute, the index root is
 * moved to an index block if it is not empty to create more space in the mft
 * record.
 *
 * Return 0 on success and errno on error.
 *
 * Locking: - Caller must hold @ni->lock on the inode for writing.
 *	    - If called for a shrinking operation, the tail of the new final
 *	      partial page will be zeroed by the call to ubc_setsize() thus it
 *	      must not be locked / mapped or the ubc_setsize() call would
 *	      deadlock.
 */
errno_t ntfs_attr_resize(ntfs_inode *ni, s64 new_size, int ioflags,
		ntfs_index_context *ictx)
{
	s64 old_size, nr_freed, new_alloc_size, old_alloc_size, compressed_size;
	VCN highest_vcn, old_highest_vcn, lowest_vcn;
	ntfs_inode *eni, *base_ni;
	ntfs_volume *vol = ni->vol;
	ntfs_attr_search_ctx *actx;
	MFT_RECORD *m;
	ATTR_RECORD *a;
	ATTR_LIST_ENTRY *al_entry;
	u8 *del_al_start, *al_end;
	int size_change, alloc_change;
	unsigned mp_size, attr_len, arec_size;
	errno_t err;
	BOOL need_ubc_setsize = TRUE;
	static const char es[] = "  Leaving inconsistent metadata.  Unmount "
			"and run chkdsk.";

	ntfs_debug("Entering for mft_no 0x%llx.",
			(unsigned long long)ni->mft_no);
	/*
	 * Cannot be called for directory inodes as metadata access happens via
	 * the corresponding index inodes.
	 */
	if (S_ISDIR(ni->mode))
		panic("%s(): Called for directory inode.\n", __FUNCTION__);
	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	/*
	 * We are going to change the size thus we need the ntfs inode lock
	 * taken for exclusive access which is already done by the caller.
	 *
	 * When shrinking start by changing the size in the UBC of the vnode.
	 * This will cause all pages in the VM beyond the new size to be thrown
	 * away and the last page to be pushed out to disk and its end
	 * invalidated.
	 *
	 * We guarantee that the size in the UBC in the vnode will always be
	 * smaller or equal to the data_size in the ntfs inode thus no need to
	 * check the data_size.
	 */
	old_size = ubc_getsize(ni->vn);
	if (new_size < old_size) {
		err = ubc_setsize(ni->vn, new_size);
		if (!err) {
			ntfs_error(vol->mp, "Failed to shrink size in UBC.");
			err = EIO;
			goto err;
		}
		need_ubc_setsize = FALSE;
	}
retry_resize:
	/*
	 * Lock the runlist for writing and map the mft record to ensure it is
	 * safe to modify the attribute runlist and sizes.
	 */
	lck_rw_lock_exclusive(&ni->rl.lock);
	err = ntfs_mft_record_map(base_ni, &m);
	if (err) {
		ntfs_error(vol->mp, "Failed to map mft record for mft_no "
				"0x%llx (error %d).",
				(unsigned long long)ni->mft_no, err);
		goto unl_err;
	}
	actx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!actx) {
		ntfs_error(vol->mp, "Failed to allocate a search context (not "
				"enough memory).");
		err = ENOMEM;
		goto unm_err;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			actx);
	if (err) {
		if (err == ENOENT) {
			ntfs_error(vol->mp, "Open attribute is missing from "
					"mft record.  Inode 0x%llx is "
					"corrupt.  Run chkdsk.",
					(unsigned long long)ni->mft_no);
			err = EIO;
		} else
			ntfs_error(vol->mp, "Failed to lookup attribute "
					"(error %d).", err);
		goto put_err;
	}
	m = actx->m;
	a = actx->a;
	if (old_size != ntfs_attr_size(a)) {
		/*
		 * A failed truncate caused the ubc size to get out of sync.
		 * The current size of the attribute value is the correct old
		 * size.
		 */
		old_size = ntfs_attr_size(a);
	}
	/* Calculate the new allocated size. */
	if (NInoNonResident(ni))
		new_alloc_size = (new_size + vol->cluster_size - 1) &
				~(s64)vol->cluster_size_mask;
	else
		new_alloc_size = (new_size + 7) & ~7;
	/* The current allocated size is the old allocated size. */
	lck_spin_lock(&ni->size_lock);
	old_alloc_size = ni->allocated_size;
	compressed_size = ni->compressed_size;
	lck_spin_unlock(&ni->size_lock);
	/*
	 * The change in the file size.  This will be 0 if no change, >0 if the
	 * size is growing, and <0 if the size is shrinking.
	 */
	size_change = -1;
	if (new_size - old_size >= 0) {
		size_change = 1;
		if (new_size == old_size)
			size_change = 0;
	}
	if (need_ubc_setsize && size_change < 0) {
		/*
		 * A previous truncate failed thus we did not catch that this
		 * is a shrinking resize earlier on.
		 */
		err = ubc_setsize(ni->vn, new_size);
		if (!err) {
			ntfs_error(vol->mp, "Failed to shrink size in UBC.");
			err = EIO;
			goto put_err;
		}
		need_ubc_setsize = FALSE;
	}
	/* As above for the allocated size. */
	alloc_change = -1;
	if (new_alloc_size - old_alloc_size >= 0) {
		alloc_change = 1;
		if (new_alloc_size == old_alloc_size)
			alloc_change = 0;
	}
	/*
	 * If neither the size nor the allocation are being changed there is
	 * nothing to do.
	 */
	if (!size_change && !alloc_change)
		goto unm_done;
	/* If the size is changing, check if new size is allowed in $AttrDef. */
	if (size_change) {
		err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
		if (err) {
			if (err == ERANGE) {
				ntfs_error(vol->mp, "Resize would cause the "
						"mft_no 0x%llx to %simum size "
						"for its attribute type "
						"(0x%x).  Aborting resize.",
						(unsigned long long)ni->mft_no,
						size_change > 0 ? "exceed "
						"the max" : "go under the min",
						(unsigned)
						le32_to_cpu(ni->type));
				err = EFBIG;
			} else {
				ntfs_error(vol->mp, "Mft_no 0x%llx has "
						"unknown attribute type "
						"0x%x.  Aborting resize.",
						(unsigned long long)ni->mft_no,
						(unsigned)
						le32_to_cpu(ni->type));
				err = EIO;
			}
			goto put_err;
		}
	}
	/*
	 * The index root attribute, i.e. directory indexes and index inodes
	 * can be marked compressed or encrypted but this means to create
	 * compressed/encrypted files, not that the attribute is
	 * compressed/encrypted.
	 */
	if (ni->type != AT_INDEX_ALLOCATION &&
			(NInoCompressed(ni) || NInoEncrypted(ni))) {
		ntfs_warning(vol->mp, "Changes in inode size are not "
				"supported yet for %s attributes, ignoring.",
				NInoCompressed(ni) ? "compressed" :
				"encrypted");
		err = ENOTSUP;
		goto put_err;
	}
	if (a->non_resident)
		goto do_non_resident_resize;
	if (NInoNonResident(ni))
		panic("%s(): NInoNonResident(ni)\n", __FUNCTION__);
	arec_size = (le16_to_cpu(a->value_offset) + new_size + 7) & ~7;
	/* Resize the attribute record to best fit the new attribute size. */
	if (new_size < vol->mft_record_size &&
			!ntfs_resident_attr_value_resize(m, a, new_size)) {
		/* The resize succeeded! */
		NInoSetMrecNeedsDirtying(actx->ni);
		lck_spin_lock(&ni->size_lock);
		/* Update the sizes in the ntfs inode and all is done. */
		ni->allocated_size = le32_to_cpu(a->length) -
				le16_to_cpu(a->value_offset);
		ni->data_size = le32_to_cpu(a->value_length);
		/*
		 * Note ntfs_resident_attr_value_resize() has already done any
		 * necessary data clearing in the attribute record.  When the
		 * file is being shrunk ubc_setsize() will already have zeroed
		 * the last partial page, i.e. since this is the resident case
		 * this is the page with index 0.  However, when the file is
		 * being expanded, the page cache page data between the old
		 * data_size, i.e. old_size, and the new_size has not been
		 * zeroed.  Fortunately, we do not need to zero it either since
		 * on one hand it will either already be zero due to pagein
		 * clearing partial page data beyond the data_size in which
		 * case there is nothing to do or in the case of the file being
		 * mmap()ped at the same time, POSIX specifies that the
		 * behaviour is unspecified thus we do not have to do anything.
		 * This means that in our implementation in the rare case that
		 * the file is mmap()ped and a write occured into the mmap()ped
		 * region just beyond the file size and we now extend the file
		 * size to incorporate this dirty region outside the file size,
		 * a pageout of the page would result in this data being
		 * written to disk instead of being cleared.  Given POSIX
		 * specifies that this corner case is undefined, we choose to
		 * leave it like that as this is much simpler for us as we
		 * cannot lock the relevant page now since we are holding too
		 * many ntfs locks which would result in lock reversal
		 * deadlocks.
		 */
		ni->initialized_size = new_size;
		lck_spin_unlock(&ni->size_lock);
		goto unm_done;
	}
	/* If the above resize failed, this must be an attribute extension. */
	if (size_change < 0)
		panic("%s(): size_change < 0\n", __FUNCTION__);
	/*
	 * Not enough space in the mft record.  If this is an index related
	 * extension, check if the index root attribute is in the same mft
	 * record as the attribute being extended and if it is and it is not
	 * empty move its entries into an index allocation block.  Note we do
	 * not check whether that actually creates enough space because how
	 * much space is needed exactly is very hard to determine in advance
	 * (due to potential need for associated attribute list attribute
	 * extensions) and also because even if it does not create enough space
	 * it will still help and save work later on when working for example
	 * on the attribute list attribute.
	 */
	if (ictx) {
		long delta;
		INDEX_ROOT *ir;
		INDEX_HEADER *ih;
		INDEX_ENTRY *ie, *first_ie;
		ntfs_index_context *root_ictx;
		ntfs_attr_search_ctx root_actx;

		/*
		 * This must be an index bitmap extension.  An index allocation
		 * extension is also possible but not here as that cannot be
		 * resident.
		 */
		if (ni->type != AT_BITMAP)
			panic("%s(): ni->type != AT_BITMAP\n", __FUNCTION__);
		ntfs_attr_search_ctx_init(&root_actx, actx->ni, m);
		err = ntfs_attr_find_in_mft_record(AT_INDEX_ROOT, ni->name,
				ni->name_len, NULL, 0, &root_actx);
		if (err) {
			if (err != ENOENT) {
				ntfs_error(vol->mp, "Failed to find index "
						"root attribute in mft_no "
						"0x%llx (error %d).  Inode is "
						"corrupt.  Run chkdsk.",
						(unsigned long long)ni->mft_no,
						err);
				NVolSetErrors(vol);
			}
			/*
			 * The index root is in a different mft record so we
			 * cannot gain anything by moving out its entries.  Set
			 * @ictx to NULL so we do not waste our time trying
			 * again.
			 */
			ictx = NULL;
			goto ictx_done;
		}
		/*
		 * We found the index root in the same mft record as the
		 * attribute to be extended.  Check whether it is empty or not.
		 */
		ir = (INDEX_ROOT*)((u8*)root_actx.a +
				le16_to_cpu(root_actx.a->value_offset));
		ih = &ir->index;
		first_ie = ie = (INDEX_ENTRY*)((u8*)ih +
				le32_to_cpu(ih->entries_offset));
		while (!(ie->flags & INDEX_ENTRY_END))
			ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length));
		/*
		 * If there are no entries other than the end entry we cannot
		 * gain anything by moving out the entries from the index root.
		 * Set @ictx to NULL so we do not waste our time trying again.
		 */
		if (ie == first_ie) {
			ictx = NULL;
			goto ictx_done;
		}
		/*
		 * We cannot have gotten this far if the current index context
		 * is locked and/or it is the index root.
		 *
		 * Also, we need to drop the runlist lock we are holding as it
		 * may need to be taken when moving the entries from the index
		 * root to the index allocation block.
		 */
		if (ictx->is_locked)
			panic("%s(): ictx->is_locked\n", __FUNCTION__);
		if (ictx->is_root)
			panic("%s(): ictx->is_root\n", __FUNCTION__);
		lck_rw_unlock_exclusive(&ni->rl.lock);
		/* Find the index root by walking up the tree path. */
		root_ictx = ictx;
		while (!root_ictx->is_root) {
			root_ictx = root_ictx->up;
			/*
			 * If we go all the way round to the beginning without
			 * finding the root something has gone badly wrong.
			 */
			if (root_ictx == ictx)
				panic("%s(): root_ictx == ictx\n",
						__FUNCTION__);
		}
		/*
		 * We need a proper deallocatable attribute search context thus
		 * switch the one pointing to the attribute to be resized to
		 * point to the index root.  Note we are not updating
		 * @actx->al_entry as this is not going to be touched at all.
		 * Having said that set it to NULL just in case.
		 */
		actx->a = root_actx.a;
		actx->al_entry = NULL;
		/*
		 * Lock the index root node.  We already have the index root
		 * attribute thus only need to do the revalidation part of
		 * re-locking.
		 */
		root_ictx->is_locked = 1;
		root_ictx->actx = actx;
		root_ictx->bytes_free = le32_to_cpu(m->bytes_allocated) -
				le32_to_cpu(m->bytes_in_use);
		root_ictx->ir = ir;
		delta = (u8*)ih - (u8*)root_ictx->index;
		if (delta) {
			INDEX_ENTRY **entries;
			unsigned u;

			root_ictx->index = ih;
			root_ictx->entry = (INDEX_ENTRY*)(
					(u8*)root_ictx->entry + delta);
			entries = root_ictx->entries;
			for (u = 0; u < root_ictx->nr_entries; u++)
				entries[u] = (INDEX_ENTRY*)((u8*)entries[u] +
						delta);
		}
		/*
		 * Move the index root entries to an index allocation block.
		 *
		 * Note we do not need to worry about this causing infinite
		 * recursion in the case that we were called from
		 * ntfs_index_block_alloc() which was called from
		 * ntfs_index_move_root_to_allocation_block() because the
		 * latter will have emptied the index root before calling
		 * ntfs_index_block_alloc() thus we will bail out above when
		 * checking whether the index root is empty the second time
		 * round and the recursion will stop there.  This is a very
		 * seldom occurence thus there is no point in special casing it
		 * in the code in a more efficient but more complicated way.
		 *
		 * A complication is that ntfs_attr_resize() may have been
		 * called from ntfs_index_block_alloc() and in this case when
		 * we call ntfs_index_move_root_to_allocation_block() it will
		 * call ntfs_index_block_alloc() again which will cause a
		 * deadlock (or with lock debugging enabled panic()) because
		 * ntfs_index_block_alloc() takes the bitmap inode lock for
		 * writing.  To avoid this ntfs_index_block_alloc() sets
		 * @ictx->bmp_is_locked and we need to set
		 * @root_ictx->bmp_is_locoked to the same value so that when
		 * ntfs_index_move_root_to_allocation_block() calls
		 * ntfs_index_block_alloc() the latter will know not to take
		 * the bitmap inode lock again.
		 */
		root_ictx->bmp_is_locked = ictx->bmp_is_locked;
		err = ntfs_index_move_root_to_allocation_block(root_ictx);
		if (root_ictx != ictx)
			root_ictx->bmp_is_locked = 0;
		if (err) {
			ntfs_error(vol->mp, "Failed to move index root to "
					"index allocation block (error %d).",
					err);
			if (root_ictx->is_locked)
				ntfs_index_ctx_unlock(root_ictx);
			/*
			 * This is a disaster as it means the index context is
			 * no longer valid thus we have to bail out all the
			 * way.
			 */
			goto err;
		}
		/* Unlock the newly created index block. */
		if (root_ictx->is_root)
			panic("%s(): root_ictx->is_root\n", __FUNCTION__);
		if (!root_ictx->is_locked)
			panic("%s(): !root_ictx->is_locked\n", __FUNCTION__);
		ntfs_index_ctx_unlock(root_ictx);
		/*
		 * We are done.  The index root is now empty thus the mft
		 * record should now have enough space.  Because we dropped the
		 * mft record when moving the index root entries into the index
		 * allocation block we need to restart the attribute resize
		 * again.
		 *
		 * But first we set @ictx to NULL so we do not get here again
		 * in the case that there still is not enough free space.  This
		 * is not a disaster as we can just carry on doing other
		 * rearrangements to free up enough space in the mft record.
		 */
		ictx = NULL;
		goto retry_resize;
	}
ictx_done:
	/*
	 * We have to drop all the locks so we can call
	 * ntfs_attr_make_non_resident().
	 */
	ntfs_attr_search_ctx_put(actx);
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	/*
	 * Not enough space in the mft record, try to make the attribute
	 * non-resident and if successful restart the truncation process.
	 */
	err = ntfs_attr_make_non_resident(ni);
	if (!err)
		goto retry_resize;
	/*
	 * Could not make non-resident.  If this is due to this not being
	 * permitted for this attribute type try to make other attributes
	 * non-resident and/or move this or other attributes out of the mft
	 * record this attribute is in.  Otherwise fail.
	 */
	if (err != EPERM) {
		if (err != ENOSPC) {
			ntfs_error(vol->mp, "Cannot truncate mft_no 0x%llx, "
					"attribute type 0x%x, because the "
					"conversion from resident to "
					"non-resident attribute failed (error "
					"%d).", (unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err);
			if (err != ENOMEM) {
				NVolSetErrors(vol);
				err = EIO;
			}
		}
		goto err;
	}
	/*
	 * To make space in the mft record we would like to try to make other
	 * attributes non-resident if that would save space.
	 *
	 * FIXME: We cannot do this at present unless the attribute is the
	 * attribute being resized as there could be an ntfs inode matching
	 * this attribute in memory and it would become out of date with its
	 * metadata if we touch its attribute record.
	 *
	 * FIXME: We do not need to do this if this is the attribute being
	 * resized as we already tried to make the attribute non-resident and
	 * it did not work or we would never have gotten here in the first
	 * place.
	 *
	 * Thus we have to either move other attributes to extent mft records
	 * thus making more space in the base mft record or we have to move the
	 * attribute being resized to an extent mft record thus giving it more
	 * space.  In any case we need to have an attribute list attribute so
	 * start by adding it if it does not yet exist.
	 *
	 * Before we start, we can check whether it is possible to fit the
	 * attribute to be resized inside an mft record.  If not then there is
	 * no point in proceeding.
	 *
	 * This should never really happen as the attribute size should never
	 * be allowed to grow so much and such requests should never be made by
	 * the driver and if they are they should be caught by the call to
	 * ntfs_attr_size_bounds_check().
	 */
	if (arec_size > vol->mft_record_size - sizeof(MFT_RECORD)) {
		ntfs_error(vol->mp, "Cannot truncate mft_no 0x%llx, attribute "
				"type 0x%x, because the attribute may not be "
				"non-resident and the requested size exceeds "
				"the maximum possible resident attribute "
				"record size.", (unsigned long long)ni->mft_no,
				(unsigned)le32_to_cpu(ni->type));
		/* Use POSIX conformant truncate(2) error code. */
		err = EFBIG;
		goto err;
	}
	/*
	 * The resident attribute can fit in an mft record.  Now have to decide
	 * whether to make other attributes non-resident/move other attributes
	 * out of the mft record or whether to move the attribute record to be
	 * resized out to a new mft record.
	 *
	 * TODO: We never call ntfs_attr_resize() for attributes that cannot be
	 * non-resident thus we never get here thus we simply panic() here to
	 * remind us that we need to implement this code if we ever start
	 * calling this function for attributes that must remain resident.
	 */
	panic("%s(): Attribute may not be non-resident.\n", __FUNCTION__);
do_non_resident_resize:
	if (!NInoNonResident(ni))
		panic("%s(): !NInoNonResident(ni)\n", __FUNCTION__);
	/*
	 * If the size is shrinking, need to reduce the initialized_size and
	 * the data_size before reducing the allocation.
	 */
	if (size_change < 0) {
		/*
		 * Make the valid size smaller (the UBC size is already
		 * up-to-date).
		 */
		lck_spin_lock(&ni->size_lock);
		if (new_size < ni->initialized_size) {
			ni->initialized_size = new_size;
			a->initialized_size = cpu_to_sle64(new_size);
			lck_spin_unlock(&ni->size_lock);
			if (ni->name == I30 &&
					ni->type == AT_INDEX_ALLOCATION) {
				lck_spin_lock(&base_ni->size_lock);
				base_ni->initialized_size = new_size;
				lck_spin_unlock(&base_ni->size_lock);
			}
		} else
			lck_spin_unlock(&ni->size_lock);
		/*
		 * If the size is shrinking it makes no sense for the
		 * allocation to be growing.
		 */
		if (alloc_change > 0)
			panic("%s(): alloc_change > 0\n", __FUNCTION__);
	} else if (/*size_change >= 0 && */ alloc_change > 0){
		/*
		 * The file size is growing or staying the same but the
		 * allocation can be shrinking, growing or staying the same.
		 *
		 * If the allocating is shrinking or staying the same we fall
		 * down into the same code as the size shrinking base
		 * allocation shrinking.
		 *
		 * Only if the allocation is growing do we need to extend the
		 * allocation and possibly update the data size here.  If we
		 * are updating the data size, since we are not touching the
		 * initialized_size we do not need to worry about the actual
		 * data on disk.  And as far as the VM pages are concerned,
		 * there will be no pages beyond the old data size and any
		 * partial region in the last page between the old and new data
		 * size (or the end of the page if the new data size is outside
		 * the page) does not need to be modified as explained above
		 * for the resident attribute resize case.  To do this, we
		 * simply drop the locks we hold and leave all the work to our
		 * friendly helper ntfs_attr_extend_allocation().
		 *
		 * Note by setting @data_start to -1 (last parameter to
		 * ntfs_attr_extend_allocation()) we guarantee that the
		 * allocation is not partial.
		 */
		ntfs_attr_search_ctx_put(actx);
		ntfs_mft_record_unmap(base_ni);
		lck_rw_unlock_exclusive(&ni->rl.lock);
		err = ntfs_attr_extend_allocation(ni, new_size,
				size_change > 0 ? new_size : -1, -1, ictx,
				NULL, FALSE);
		if (err)
			goto err;
		goto done;
	}
	/* alloc_change <= 0 */
	/* If the actual size is changing need to update it now. */
	if (size_change) {
		lck_spin_lock(&ni->size_lock);
		ni->data_size = new_size;
		a->data_size = cpu_to_sle64(new_size);
		lck_spin_unlock(&ni->size_lock);
		if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
			lck_spin_lock(&base_ni->size_lock);
			base_ni->data_size = new_size;
			lck_spin_unlock(&base_ni->size_lock);
		}
	}
	/* Ensure the modified mft record is written out. */
	NInoSetMrecNeedsDirtying(actx->ni);
	/* If the allocated size is not changing, we are done. */
	if (!alloc_change)
		goto unm_done;
	/*
	 * Free the clusters.  Note we cannot recover once this is done because
	 * someone else can allocate the clusters at any point after we free
	 * them.  Thus any errors will lead to a more or less corrupt file
	 * system depending on how consistent we can make the volume after an
	 * error occurs.
	 */
	err = ntfs_cluster_free(ni, new_alloc_size >>
			vol->cluster_size_shift, -1, actx, &nr_freed);
	m = actx->m;
	a = actx->a;
	if (err) {
		ntfs_error(vol->mp, "Failed to release cluster(s) (error "
				"%d).  Unmount and run chkdsk to recover the "
				"lost cluster(s).", err);
		NVolSetErrors(vol);
	} else {
		/*
		 * Truncate the runlist.  The call to ntfs_cluster_free() has
		 * already ensured that all needed runlist fragments have been
		 * mapped so we do not need to worry about mapping runlist
		 * fragments here.  Note given we have managed to read all the
		 * runlist fragments already the chances of us failing anywhere
		 * in the below code is very small indeed.  Only running out of
		 * memory or a disk/sector failure between the above
		 * ntfs_cluster_free() call and the below calls can cause us to
		 * fail here.
		 *
		 * FIXME: Note that this is not quite true as if
		 * ntfs_cluster_free() aborts with an error it may not have
		 * gotten round to mapping the runlist fragments.  If this
		 * happens ntfs_rl_truncate_nolock() could end up doing a lot
		 * of weird things so we only call it if the
		 * ntfs_cluster_free() succeeded for now.
		 */
		err = ntfs_rl_truncate_nolock(vol, &ni->rl, new_alloc_size >>
				vol->cluster_size_shift);
	}
	/*
	 * If the runlist truncation failed and/or the search context is no
	 * longer valid, we cannot resize the attribute record or build the
	 * mapping pairs array thus we abort.
	 */
	if (err || actx->is_error) {
		if (actx->is_error)
			err = actx->error;
		ntfs_error(vol->mp, "Failed to %s (error %d).%s",
				actx->is_error ?
				"restore attribute search context" :
				"truncate attribute runlist", err, es);
		err = EIO;
		goto bad_out;
	}
	/*
	 * The runlist is now up to date.  If this attribute is sparse we need
	 * to check if it is still sparse and if not we need to change it to a
	 * non-sparse file.  And if it is still sparse we need to update the
	 * compressed size which we postpone till later so we can do it at the
	 * same time as the update of the allocated size.
	 *
	 * To determine whether the attribute is still sparse we compare the
	 * new compressed size to the new allocated size.  If the two have now
	 * become the same the attribute is no longer sparse.  If the
	 * compressed size is still smaller than the allocated size the
	 * attribute is still sparse.
	 */
	compressed_size -= nr_freed << vol->cluster_size_shift;
	if (NInoSparse(ni) && compressed_size >= new_alloc_size) {
		if (compressed_size > new_alloc_size)
			panic("%s(): compressed_size > new_alloc_size\n",
					__FUNCTION__);
		/* Switch the attribute to not be sparse any more. */
		ntfs_attr_sparse_clear(base_ni, ni, actx);
	}
	/* Update the allocated/compressed size. */
	lck_spin_lock(&ni->size_lock);
	ni->allocated_size = new_alloc_size;
	a->allocated_size = cpu_to_sle64(new_alloc_size);
	if (NInoSparse(ni) || (ni->type != AT_INDEX_ALLOCATION &&
			NInoCompressed(ni))) {
		if (nr_freed) {
			if (compressed_size < 0)
				panic("%s(): compressed_size < 0\n",
						__FUNCTION__);
			ni->compressed_size = compressed_size;
			a->compressed_size = cpu_to_sle64(ni->compressed_size);
		}
	}
	lck_spin_unlock(&ni->size_lock);
	if (ni->name == I30 && ni->type == AT_INDEX_ALLOCATION) {
		lck_spin_lock(&base_ni->size_lock);
		base_ni->allocated_size = new_alloc_size;
		lck_spin_unlock(&base_ni->size_lock);
	}
	/*
	 * We have the base attribute extent in @actx and we have set it up
	 * already with the new allocated size.  If the truncation point is not
	 * in the base extent, need to switch to the extent containing the
	 * truncation point now so we can update its attribute record, too.
	 * But before doing so need to ensure the modified mft record is
	 * written out.
	 */
	highest_vcn = new_alloc_size >> vol->cluster_size_shift;
	old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1;
	ntfs_debug("highest_vcn 0x%llx, old_highest_vcn 0x%llx.",
			(unsigned long long)highest_vcn,
			(unsigned long long)old_highest_vcn);
	if (highest_vcn >= old_highest_vcn) {
		NInoSetMrecNeedsDirtying(actx->ni);
		err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
				highest_vcn, NULL, 0, actx);
		if (err) {
			if (err == ENOENT)
				ntfs_error(vol->mp, "Attribute extent is "
						"missing from mft_no 0x%llx.  "
						"Run chkdsk.",
						(unsigned long long)
						ni->mft_no);
			else
				ntfs_error(vol->mp, "Failed to lookup "
						"attribute extent in mft_no "
						"0x%llx (error %d).%s",
						(unsigned long long)
						ni->mft_no, err, es);
			err = EIO;
			goto bad_out;
		}
		m = actx->m;
		a = actx->a;
		old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1;
		ntfs_debug("Switched to extent attribute record, "
				"old_highest_vcn is now 0x%llx.",
				(unsigned long long)old_highest_vcn);
	}
	/*
	 * If the truncation point is at the very beginning of this attribute
	 * extent and the extent is not the base extent we need to remove the
	 * entire extent and hence do not need to waste time truncating it.
	 *
	 * If this is the base extent we have to truncate it to zero allocated
	 * size and if the truncation point is in the middle of the extent we
	 * need to truncate it to the truncation point.
	 */
	lowest_vcn = sle64_to_cpu(a->lowest_vcn);
	ntfs_debug("lowest_vcn 0x%llx.", (unsigned long long)lowest_vcn);
	if (!lowest_vcn || highest_vcn != lowest_vcn) {
		/*
		 * Get the size for the shrunk mapping pairs array for the
		 * runlist fragment starting at the lowest_vcn of this extent.
		 */
		err = ntfs_get_size_for_mapping_pairs(vol,
				ni->rl.elements ? ni->rl.rl : NULL, lowest_vcn,
				-1, &mp_size);
		if (err) {
			ntfs_error(vol->mp, "Cannot shrink allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because determining the size for the "
					"mapping pairs failed (error %d).%s",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err,
					es);
			NInoSetMrecNeedsDirtying(actx->ni);
			err = EIO;
			goto bad_out;
		}
		/*
		 * Generate the mapping pairs array directly into the attribute
		 * record.
		 */
		err = ntfs_mapping_pairs_build(vol, (s8*)a +
				le16_to_cpu(a->mapping_pairs_offset), mp_size,
				ni->rl.elements ? ni->rl.rl : NULL, lowest_vcn,
				-1, NULL);
		if (err) {
			ntfs_error(vol->mp, "Cannot shrink allocation of "
					"mft_no 0x%llx, attribute type 0x%x, "
					"because building the mapping pairs "
					"failed (error %d).%s",
					(unsigned long long)ni->mft_no,
					(unsigned)le32_to_cpu(ni->type), err,
					es);
			NInoSetMrecNeedsDirtying(actx->ni);
			err = EIO;
			goto bad_out;
		}
		/* Update the highest_vcn to the new truncation point. */
		a->highest_vcn = cpu_to_sle64(highest_vcn - 1);
		/*
		 * Shrink the attribute record for the new mapping pairs array.
		 * Note, this cannot fail since we are making the attribute
		 * smaller thus by definition there is enough space to do so.
		 */
		attr_len = le32_to_cpu(a->length);
		err = ntfs_attr_record_resize(m, a, mp_size +
				le16_to_cpu(a->mapping_pairs_offset));
		if (err)
			panic("%s(): err\n", __FUNCTION__);
	}
	/* If there is no attribute list we are done. */
	if (!NInoAttrList(base_ni)) {
		/* Ensure the modified mft record is written out. */
		NInoSetMrecNeedsDirtying(base_ni);
		goto unm_done;
	}
	/*
	 * If the current extent is not the base extent and it has a lowest_vcn
	 * equal to the new highest_vcn, we need to delete the current extent.
	 *
	 * Also need to delete all subsequent attribute extents if any exist.
	 * We know that some exist if the old highest_vcn of the current extent
	 * is lower than the old end of the attribute.
	 *
	 * When deleting the attribute extents, free the extent mft records if
	 * the only attribute record in the mft record is the attribute extent
	 * being deleted.  In this case do not need to actually modify the
	 * attribute record at all, just mark the mft record as not in use and
	 * clear its bit in the mft bitmap.  For each deleted attribute extent
	 * also need to delete the corresponding attribute list attribute
	 * entry but we postpone this until we have dealt with all the extents
	 * first.
	 *
	 * When finished, check the attribute list attribute and if it no
	 * longer references any mft records other than the base mft record
	 * delete the attribute list attribute altogether.
	 */
	al_end = base_ni->attr_list + base_ni->attr_list_size;
	del_al_start = (u8*)actx->al_entry;
	if (lowest_vcn && highest_vcn == lowest_vcn) {
		/*
		 * We need to delete the current extent thus manually
		 * reinitialize the attribute search context without unmapping
		 * the current extent.
		 */
		eni = actx->ni;
		actx->ni = base_ni;
		ntfs_attr_search_ctx_reinit(actx);
		al_entry = (ATTR_LIST_ENTRY*)del_al_start;
		goto delete_attr;
	}
	/* Ensure the modified mft record is written out. */
	NInoSetMrecNeedsDirtying(actx->ni);
	del_al_start += le16_to_cpu(((ATTR_LIST_ENTRY*)del_al_start)->length);
	al_entry = (ATTR_LIST_ENTRY*)del_al_start;
	/*
	 * Reinitialize the attribute search context thus unmapping the current
	 * extent if it is not in the base mft record.
	 */
	ntfs_attr_search_ctx_reinit(actx);
	/*
	 * Check if there are more extents by looking at the highest vcn of the
	 * current extent which is in @old_highest_vcn.  If it is below the old
	 * allocated size it means that @al_entry points to the attribute list
	 * entry describing the next attribute extent.
	 */
	while (old_highest_vcn < (old_alloc_size >> vol->cluster_size_shift)) {
		/* Sanity checks. */
		if ((u8*)al_entry + sizeof(ATTR_LIST_ENTRY) >= al_end ||
				(u8*)al_entry < base_ni->attr_list) {
			ntfs_error(vol->mp, "Attribute list attribute is "
					"corrupt in mft_no 0x%llx.  Run "
					"chkdsk.",
					(unsigned long long)base_ni->mft_no);
			err = EIO;
			goto bad_out;
		}
		/*
		 * Map the mft record containing the next extent if it is not
		 * the base mft record which is already mapped and described by
		 * the attribute search context @actx.
		 */
		if (MREF_LE(al_entry->mft_reference) == base_ni->mft_no) {
			/* We want the base mft record. */
			if (MSEQNO_LE(al_entry->mft_reference) !=
					base_ni->seq_no) {
				ntfs_error(vol->mp, "Found stale mft "
						"reference in attribute list "
						"attribute of mft_no 0x%llx.  "
						"Inode is corrupt.  Run "
						"chkdsk.", (unsigned long long)
						base_ni->mft_no);
				err = EIO;
				goto bad_out;
			}
			eni = base_ni;
			m = actx->m;
		} else {
			/* We want an extent mft record. */
			err = ntfs_extent_mft_record_map(base_ni,
					le64_to_cpu(al_entry->mft_reference),
					&eni, &m);
			if (err) {
				ntfs_error(vol->mp, "Failed to map extent mft "
						"record 0x%llx of mft_no "
						"0x%llx.  Inode is corrupt.  "
						"Run chkdsk.",
						(unsigned long long)MREF_LE(
						al_entry->mft_reference),
						(unsigned long long)
						base_ni->mft_no);
				err = EIO;
				goto bad_out;
			}
		}
		/* Locate the attribute extent in the mft record. */
		a = (ATTR_RECORD*)((u8*)m + le16_to_cpu(m->attrs_offset));
		do {
			/* Sanity checks. */
			if ((u8*)a < (u8*)m || (u8*)a > (u8*)m +
					le32_to_cpu(m->bytes_allocated))
				goto corrupt_err;
			/*
			 * We cannot reach the end of the attributes without
			 * finding the attribute extent we are looking for.
			 */
			if (a->type == AT_END || !a->length)
				goto corrupt_err;
			/*
			 * The attribute instance is unique thus if we find the
			 * correct instance we have found the attribute extent.
			 */
			if (al_entry->instance == a->instance) {
				/*
				 * If the type and/or the name are mismatched
				 * between the attribute list entry and the
				 * attribute record, there is corruption.
				 */
				if (al_entry->type != a->type)
					goto corrupt_err;
				if (!ntfs_are_names_equal((ntfschar*)((u8*)a +
						le16_to_cpu(a->name_offset)),
						a->name_length,
						(ntfschar*)((u8*)al_entry +
						al_entry->name_offset),
						al_entry->name_length,
						NVolCaseSensitive(vol),
						vol->upcase, vol->upcase_len))
					goto corrupt_err;
				/* We found the attribute extent. */
				break;
			}
			/* Proceed to the next attribute in the mft record. */
			a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length));
		} while (1);
		/* Record the highest_vcn of the new extent. */
		old_highest_vcn = sle64_to_cpu(a->highest_vcn) + 1;
delete_attr:
		/*
		 * If this is the only attribute record in the mft record, free
		 * the mft record.  Note if this is the case it is not possible
		 * for the mft record to be the base record as it would at
		 * least have to contain the attribute record for the attribute
		 * list attribute so no need to check for this case.
		 *
		 * If it is not the only attribute record in the mft record,
		 * delete the attribute record from the mft record.
		 */
		if ((u8*)m + le16_to_cpu(m->attrs_offset) == (u8*)a &&
				((ATTR_RECORD*)((u8*)a +
				le32_to_cpu(a->length)))->type == AT_END) {
			err = ntfs_extent_mft_record_free(base_ni, eni, m);
			if (err) {
				ntfs_error(vol->mp, "Failed to free extent "
						"mft_no 0x%llx (error %d).  "
						"Unmount and run chkdsk to "
						"recover the lost inode.",
						(unsigned long long)
						eni->mft_no, err);
				NVolSetErrors(vol);
				if (eni != base_ni) {
					NInoSetMrecNeedsDirtying(eni);
					ntfs_extent_mft_record_unmap(eni);
				}
			}
		} else {
			ntfs_attr_record_delete_internal(m, a);
			/* Unmap the mft record if it is not the base record. */
			if (eni != base_ni) {
				NInoSetMrecNeedsDirtying(eni);
				ntfs_extent_mft_record_unmap(eni);
			}
		}
		/* Go to the next entry in the attribute list attribute. */
		al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
				le16_to_cpu(al_entry->length));
	}
	/*
	 * There are no more extents.  If we deleted any attribute extents we
	 * need to remove their attribute list attribute entries now.
	 */
	if ((u8*)al_entry != del_al_start) {
		unsigned al_ofs;
		BOOL have_extent_records;

		al_ofs = del_al_start - base_ni->attr_list;
		ntfs_attr_list_entries_delete(base_ni,
				(ATTR_LIST_ENTRY*)del_al_start, al_entry);
		/*
		 * Scan all entries in the attribute list attribute.  If there
		 * are no more references to extent mft records, delete the
		 * attribute list attribute.
		 *
		 * Otherwise truncate the attribute list attribute and update
		 * its value from the in memory copy.
		 */
		err = ntfs_attr_list_is_needed(base_ni, NULL,
				&have_extent_records);
		if (err)
			goto put_err;
		if (!have_extent_records) {
			/*
			 * There are no extent mft records left in use thus
			 * delete the attribute list attribute.
			 */
			err = ntfs_attr_list_delete(base_ni, actx);
			if (err)
				goto put_err;
		} else {
			/*
			 * There still are extent mft records left in use thus
			 * update the attribute list attribute size and write
			 * the modified data to disk.
			 */
			err = ntfs_attr_list_sync_shrink(base_ni, al_ofs, actx);
			if (err)
				goto put_err;
		}
	}
unm_done:
	ntfs_attr_search_ctx_put(actx);
	ntfs_mft_record_unmap(base_ni);
	lck_rw_unlock_exclusive(&ni->rl.lock);
	/* Set the UBC size if not set yet. */
	if (need_ubc_setsize && !ubc_setsize(ni->vn, new_size)) {
		ntfs_error(vol->mp, "Failed to set the size in UBC.");
		err = EIO;
		/*
		 * This should never fail and if it does it can only happen as
		 * the result of a previous resize having failed.  Thus we do
		 * not try to roll back the metadata changes and simply bail
		 * out.
		 */
		goto err;
	}
done:
	/*
	 * If we have modified the size of the base inode, cause the sizes to
	 * be written to all the directory index entries pointing to the base
	 * inode when the inode is written to disk.  Do not do this for
	 * directories as they have both sizes set to zero in their index
	 * entries.
	 */
	if (ni == base_ni && !S_ISDIR(ni->mode) &&
			(size_change || alloc_change))
		NInoSetDirtySizes(ni);
	// TODO:/FIXME: We have to clear the S_ISUID and S_ISGID bits in the
	// file mode. - Only to be done on success and (size_change ||
	// alloc_change).
	/*
	 * Update the last_data_change_time (mtime) and last_mft_change_time
	 * (ctime) on the base ntfs inode @base_ni unless this is an attribute
	 * inode update in which case only update the ctime as named stream/
	 * extended attribute semantics expect on OS X.
	 *
	 * FIXME: For open(O_TRUNC) it is correct to always change the
	 * {m,c}time.  But for {,f}truncate() we have to only set {m,c}time if
	 * a change happened, i.e. only if size_change is true.  Problem is we
	 * cannot know from which code path we are being called as both system
	 * calls on OS X call vnode_setattr() which calls VNOP_SETATTR() which
	 * calls ntfs_vnop_setattr() which then calls us...  For now at least
	 * we always update the times thus we follow open(O_TRUNC) semantics
	 * and disobey {,f}truncate() semantics.
	 */
	base_ni->last_mft_change_time = ntfs_utc_current_time();
	if (ni == base_ni)
		base_ni->last_data_change_time = base_ni->last_mft_change_time;
	NInoSetDirtyTimes(base_ni);
	/*
	 * If this is not a directory or it is an encrypted directory, set the
	 * needs archiving bit except for the core system files.
	 */
	if (!S_ISDIR(base_ni->mode) || NInoEncrypted(base_ni)) {
		BOOL need_set_archive_bit = TRUE;
		if (vol->major_ver >= 2) {
			if (ni->mft_no <= FILE_Extend)
				need_set_archive_bit = FALSE;
		} else {
			if (ni->mft_no <= FILE_UpCase)
				need_set_archive_bit = FALSE;
		}
		if (need_set_archive_bit) {
			base_ni->file_attributes |= FILE_ATTR_ARCHIVE;
			NInoSetDirtyFileAttributes(base_ni);
		}
	}
	ntfs_debug("Done.");
	return 0;
corrupt_err:
	ntfs_error(vol->mp, "Mft record 0x%llx of mft_no 0x%llx is corrupt.  "
			"Unmount and run chkdsk.",
			(unsigned long long)eni->mft_no,
			(unsigned long long)base_ni->mft_no);
	if (eni != base_ni)
		ntfs_extent_mft_record_unmap(eni);
	err = EIO;
bad_out:
	if (err != ENOMEM && err != ENOTSUP)
		NVolSetErrors(vol);
put_err:
	ntfs_attr_search_ctx_put(actx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
unl_err:
	lck_rw_unlock_exclusive(&ni->rl.lock);
err:
	/* Reset the UBC size. */
	if (!ubc_setsize(ni->vn, old_size))
		ntfs_error(vol->mp, "Failed to restore UBC size.  Leaving UBC "
				"size out of sync with attribute data size.");
	ntfs_debug("Failed (error %d).", err);
	return err;
}

/**
 * ntfs_attr_set - fill (a part of) an attribute with a byte
 * @ni:		ntfs inode describing the attribute to fill
 * @ofs:	offset inside the attribute at which to start to fill
 * @cnt:	number of bytes to fill
 * @val:	the unsigned 8-bit value with which to fill the attribute
 *
 * Fill @cnt bytes of the attribute described by the ntfs inode @ni starting at
 * byte offset @ofs inside the attribute with the constant byte @val.
 *
 * This function is effectively like memset() applied to an ntfs attribute.
 * Note this function actually only operates on the page cache pages belonging
 * to the ntfs attribute and it marks them dirty after doing the memset().
 * Thus it relies on the vm dirty page write code paths to cause the modified
 * pages to be written to the mft record/disk.
 *
 * Return 0 on success and errno on error.  An error code of ESPIPE means that
 * @ofs + @cnt were outside the end of the attribute and no write was
 * performed.
 *
 * Note: This function does not take care of the initialized size!
 *
 * Locking: - Caller must hold an iocount reference on the vnode of the ntfs
 *	      inode @ni.
 *	    - Caller must hold @ni->lock for reading or writing.
 */
errno_t ntfs_attr_set(ntfs_inode *ni, s64 ofs, const s64 cnt, const u8 val)
{
	s64 end, data_size;
	ntfs_volume *vol = ni->vol;
	upl_t upl;
	upl_page_info_array_t pl;
	u8 *kaddr;
	unsigned start_ofs, end_ofs, size;
	errno_t err;

	ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%x.",
			(unsigned long long)ofs, (unsigned long long)cnt,
			(unsigned)val);
	if (ofs < 0)
		panic("%s(): ofs < 0\n", __FUNCTION__);
	if (cnt < 0)
		panic("%s(): cnt < 0\n", __FUNCTION__);
	if (!cnt)
		goto done;
	/*
	 * FIXME: Compressed and encrypted attributes are not supported when
	 * writing and we should never have gotten here for them.
	 */
	if (NInoCompressed(ni))
		panic("%s(): Inode is compressed.\n", __FUNCTION__);
	if (NInoEncrypted(ni))
		panic("%s(): Inode is encrypted.\n", __FUNCTION__);
	/* Work out the starting index and page offset. */
	start_ofs = (unsigned)ofs & PAGE_MASK;
	/* Work out the ending index and page offset. */
	end = ofs + cnt;
	end_ofs = (unsigned)end & PAGE_MASK;
	/* If the end is outside the inode size return ESPIPE. */
	lck_spin_lock(&ni->size_lock);
	data_size = ni->data_size;
	lck_spin_unlock(&ni->size_lock);
	if (end > data_size) {
		ntfs_error(vol->mp, "Request exceeds end of attribute.");
		return ESPIPE;
	}
	ofs &= ~PAGE_MASK_64;
	end &= ~PAGE_MASK_64;
	/* If there is a first partial page, need to do it the slow way. */
	if (start_ofs) {
		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, TRUE);
		if (err) {
			ntfs_error(vol->mp, "Failed to read first partial "
					"page (ofs 0x%llx).",
					(unsigned long long)ofs);
			return err;
		}
		/*
		 * If the last page is the same as the first page, need to
		 * limit the write to the end offset.
		 */
		size = PAGE_SIZE;
		if (ofs == end)
			size = end_ofs;
		memset(kaddr + start_ofs, val, size - start_ofs);
		ntfs_page_unmap(ni, upl, pl, TRUE);
		ofs += PAGE_SIZE;
		if (ofs >= (end + end_ofs))
			goto done;
	}
	/*
	 * Do the whole pages the fast way.
	 *
	 * TODO: It may be possible to optimize this loop by creating a
	 * sequence of large page lists by hand, mapping them, then running the
	 * memset, then unmapping them and committing them.  This incurs a
	 * higher cpu time because of the larger mapping required but incurs
	 * many fewer calls into the ubc thus less locks will need to be taken
	 * which may well speed things up a lot.  It will need to be
	 * benchmarked to determine which is actually faster so leaving it the
	 * easier way for now.
	 */
	for (; ofs < end; ofs += PAGE_SIZE) {
		/* Find or create the current page. */
		err = ntfs_page_grab(ni, ofs, &upl, &pl, &kaddr, TRUE);
		if (err) {
			ntfs_error(vol->mp, "Failed to grab page (ofs "
					"0x%llx).", (unsigned long long)ofs);
			return err;
		}
		memset(kaddr, val, PAGE_SIZE);
		ntfs_page_unmap(ni, upl, pl, TRUE);
	}
	/* If there is a last partial page, need to do it the slow way. */
	if (end_ofs) {
		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, TRUE);
		if (err) {
			ntfs_error(vol->mp, "Failed to read last partial page "
					"(ofs 0x%llx).",
					(unsigned long long)ofs);
			return err;
		}
		memset(kaddr, val, end_ofs);
		ntfs_page_unmap(ni, upl, pl, TRUE);
	}
done:
	ntfs_debug("Done.");
	return 0;
}

/**
 * ntfs_resident_attr_read - read from an attribute which is resident
 * @ni:		resident ntfs inode describing the attribute from which to read
 * @ofs:	byte offset in attribute at which to start reading
 * @cnt:	number of bytes to copy into the destination buffer @buf
 * @buf:	destination buffer into which to copy attribute data
 *
 * Map the base mft record of the ntfs inode @ni, find the attribute it
 * describes, and copy @cnt bytes from byte offset @ofs into the destination
 * buffer @buf.  If @buf is bigger than the attribute size, zero the remainder.
 *
 * We do not need to worry about compressed attributes because when they are
 * resident the data is not actually compressed and we do not need to worry
 * about encrypted attributes because encrypted attributes cannot be resident.
 *
 * Return 0 on success and errno on error.  Note that a return value of EAGAIN
 * means that someone converted the attribute to non-resident before we took
 * the necessary locks to read from the resident attribute thus we could not
 * perform the read.  The caller needs to cope with this and perform a
 * non-resident read instead.
 */
errno_t ntfs_resident_attr_read(ntfs_inode *ni, const s64 ofs, const u32 cnt,
		u8 *buf)
{
	s64 max_size;
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ntfs_attr_search_ctx *ctx;
	ATTR_RECORD *a;
	unsigned attr_len, init_len, bytes;
	errno_t err;

	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	/* Map, pin, and lock the mft record. */
	err = ntfs_mft_record_map(base_ni, &m);
	if (err)
		goto err;
	/*
	 * If a parallel write made the attribute non-resident, drop the mft
	 * record and return EAGAIN.
	 */
	if (NInoNonResident(ni)) {
		err = EAGAIN;
		goto unm_err;
	}
	ctx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!ctx) {
		err = ENOMEM;
		goto unm_err;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			ctx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto put_err;
	}
	a = ctx->a;
	lck_spin_lock(&ni->size_lock);
	/* These can happen when we race with a shrinking truncate. */
	attr_len = le32_to_cpu(a->value_length);
	if (attr_len > ni->data_size)
		attr_len = ni->data_size;
	max_size = ubc_getsize(ni->vn);
	if (attr_len > max_size)
		attr_len = max_size;
	init_len = attr_len;
	if (init_len > ni->initialized_size)
		init_len = ni->initialized_size;
	lck_spin_unlock(&ni->size_lock);
	/*
	 * If we are reading from the initialized attribute part, copy the data
	 * over into the destination buffer.
	 */
	bytes = cnt;
	if (init_len > ofs) {
		u32 available = init_len - ofs;
		if (bytes > available)
			bytes = available;
		memcpy(buf, (u8*)a + le16_to_cpu(a->value_offset) + ofs, bytes);
	}
	/* Zero the remainder of the destination buffer if any. */
	if (bytes < cnt)
		bzero(buf + bytes, cnt - bytes);
put_err:
	ntfs_attr_search_ctx_put(ctx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
err:
	return err;
}

/**
 * ntfs_resident_attr_write - write to an attribute which is resident
 * @ni:		resident ntfs inode describing the attribute to which to write
 * @buf:	source buffer from which to copy attribute data
 * @cnt:	number of bytes to copy into the attribute from the buffer
 * @ofs:	byte offset in attribute at which to start writing
 *
 * Map the base mft record of the ntfs inode @ni, find the attribute it
 * describes, and copy @cnt bytes from the buffer @buf into the attribute value
 * at byte offset @ofs.
 *
 * We do not need to worry about compressed attributes because when they are
 * resident the data is not actually compressed and we do not need to worry
 * about encrypted attributes because encrypted attributes cannot be resident.
 *
 * Return 0 on success and errno on error.  Note that a return value of EAGAIN
 * means that someone converted the attribute to non-resident before we took
 * the necessary locks to write to the resident attribute thus we could not
 * perform the write.  The caller needs to cope with this and perform a
 * non-resident write instead.
 */
errno_t ntfs_resident_attr_write(ntfs_inode *ni, u8 *buf, u32 cnt,
		const s64 ofs)
{
	ntfs_inode *base_ni;
	MFT_RECORD *m;
	ntfs_attr_search_ctx *ctx;
	ATTR_RECORD *a;
	errno_t err;
	u32 attr_len;

	base_ni = ni;
	if (NInoAttr(ni))
		base_ni = ni->base_ni;
	/* Map, pin, and lock the mft record. */
	err = ntfs_mft_record_map(base_ni, &m);
	if (err)
		goto err;
	/*
	 * If a parallel write made the attribute non-resident, drop the mft
	 * record and return EAGAIN.
	 */
	if (NInoNonResident(ni)) {
		err = EAGAIN;
		goto unm_err;
	}
	ctx = ntfs_attr_search_ctx_get(base_ni, m);
	if (!ctx) {
		err = ENOMEM;
		goto unm_err;
	}
	err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len, 0, NULL, 0,
			ctx);
	if (err) {
		if (err == ENOENT)
			err = EIO;
		goto put_err;
	}
	a = ctx->a;
	if (a->non_resident)
		panic("%s(): a->non_resident\n", __FUNCTION__);
	lck_spin_lock(&ni->size_lock);
	/* These can happen when we race with a shrinking truncate. */
	attr_len = le32_to_cpu(a->value_length);
	if (ofs > attr_len) {
		ntfs_error(ni->vol->mp, "Cannot write past end of resident "
				"attribute.");
		lck_spin_unlock(&ni->size_lock);
		err = EINVAL;
		goto put_err;
	}
	if (ofs + cnt > attr_len) {
		ntfs_error(ni->vol->mp, "Truncating resident write.");
		cnt = attr_len - ofs;
	}
	if (ofs + cnt > ni->initialized_size)
		ni->initialized_size = ofs + cnt;
	lck_spin_unlock(&ni->size_lock);
	/* Copy the data over from the destination buffer. */
	memcpy((u8*)a + le16_to_cpu(a->value_offset) + ofs, buf, cnt);
	/* Mark the mft record dirty to ensure it gets written out. */
	NInoSetMrecNeedsDirtying(ctx->ni);
put_err:
	ntfs_attr_search_ctx_put(ctx);
unm_err:
	ntfs_mft_record_unmap(base_ni);
err:
	return err;
}