/*
 * extended memory interface for MN2WS.
 */

#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/capability.h>
#include <linux/ptrace.h>

#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
#include <asm/fcntl.h>
#include <asm/mman.h>
#include <asm/exmem.h>
#ifdef CONFIG_ARM
#include <asm/setup.h>
#endif

#include "exmemdrv.h"

#ifdef CONFIG_ARM
/* TODO */
#else
/* exmem pagesize define */
enum exmem_psindex {PS_INDEX_4Kb = 0, PS_INDEX_128Kb = 1, PS_INDEX_4Mb = 2};

#define MIN_PS_INDEX  PS_INDEX_4Kb  /* 1K pagesize is not supported in exmem */
#define MAX_PS_INDEX  PS_INDEX_4Mb

typedef struct {
	unsigned long size;
	unsigned long mask;
	unsigned long ptel_ps;
} pgsz_struct;

const pgsz_struct ps_param[] = {{0x00001000, 0xfffff000, _PAGE_PS_4Kb},
				{0x00020000, 0xfffe0000, _PAGE_PS_128Kb},
				{0x00400000, 0xffc00000, _PAGE_PS_4Mb}};
#endif /* CONFIG_ARM */

#define EXMEM_PAGE_ALIGN(addr, psindex) \
	(((addr) +  ps_param[psindex].size - 1) & ps_param[psindex].mask)

typedef struct dma_info {
	struct list_head list;
	void	*paddr;
	int	order;
} dma_info_t;

static LIST_HEAD(dma_info_head);
static DEFINE_SPINLOCK(dma_info_lock);

enum check_pte_consistency_range_error {
    E_CHKPTE_NOPTE = 1,		/* pte is NULL */
    E_CHKPTE_DISCONT,		/* physical addresses are discontinuous */
    E_CHKPTE_PGFLG_PRESENTV,	/* PRESENT bit and VALID bit are not consistent  */
    E_CHKPTE_PGFLG_PROTNX,	/* PROT bit and NX bit are not consistent */
    E_CHKPTE_PGFLG_CACHE,	/* CACHE bit is not consistent */
    E_CHKPTE_PGFLG_GLOBAL	/* GLOBAL bit is not consistent */
};

#define IS_EXMEM_VADDR_UNCACHED(vaddr)	((vaddr) >= 0x40000000)

static pte_t *get_pte_offset(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte = NULL;

	pgd = pgd_offset(mm, addr);
	if (pgd && pgd_val(*pgd)) {
		pud = pud_offset(pgd, addr);
		if (pud && pud_val(*pud)) {
			pmd = pmd_offset(pud, addr);
			if (pmd && pmd_val(*pmd)) {
				pte = pte_offset_map(pmd, addr);
			}
		}
	}
	return pte;
}

static unsigned long exmem_addr_trans(unsigned long vaddr)
{
	pte_t *pte;

	pte = get_pte_offset(current->mm, vaddr);

	if (!pte || !pte_val(*pte)){
		return 0;
	}

	return ((pte_val(*pte) & PAGE_MASK) | (vaddr & ~PAGE_MASK));
}

/*
 *  ioctl() function for exmem. ----------------------------------------------
 */

static int pgprot_range_cache_attr(struct vm_area_struct *vma,
				   int start, int size, unsigned long attr)
{
	unsigned long beg = start;
	unsigned long end = start + size;
	struct mm_struct *mm = vma->vm_mm;
	int err = 0;

	if (start >= end) {
		BUG();
	}

	spin_lock(&mm->page_table_lock);
	do {
		pgd_t   *pgd;
		pud_t	*pud;
		pmd_t   *pmd;
		pte_t   *pte;

		pgd = pgd_offset(mm, start);
		pud = pud_alloc(mm, pgd, start);
		if (!pud) {
			err = -ENOMEM;
			goto out;
		}
		pmd = pmd_alloc(mm, pud, start);
		if (!pmd) {
			err = -ENOMEM;
			goto out;
		}
		pte = pte_alloc_map(mm, pmd, start);
		if (!pte) {
			err = -ENOMEM;
			goto out;
		}

#ifdef CONFIG_ARM
		set_pte_at(mm, start,
			pte, __pte(exmem_pte_cache_attr(pte_val(*pte), attr)));
#else
		set_pte(pte, __pte(exmem_pte_cache_attr(pte_val(*pte), attr)));
#endif

		purge_cache((unsigned long)pfn_to_kaddr(pte_pfn(*pte)), PAGE_SIZE, PURGE_CACHE_D_PURGE_INV);

		start += PAGE_SIZE;

	} while (start && (start < end));

	flush_tlb_range(vma, beg, end);
out:
	spin_unlock(&mm->page_table_lock);

	return err;
}

static int exmem_ioctl_alloc(unsigned long start, size_t size,
			     unsigned long flags, unsigned long attr)
{
	struct vm_area_struct *vma;
	int prot = PROT_READ | PROT_WRITE;
	int retval;

	flags |= MAP_POPULATE|MAP_LOCKED|MAP_PRIVATE|MAP_ANONYMOUS;

	if (start != PAGE_ALIGN(start)) {
		return -EINVAL;
	}
	size = PAGE_ALIGN(size);
	if (!size || (size >> PAGE_SHIFT) > num_physpages) {
		return -EINVAL;
	}

	retval = do_mmap(NULL, start, size, prot, flags, (unsigned long)NULL);
	if (retval < 1) {
		return -ENOMEM;
	}

	vma = find_vma(current->mm, retval);
	if (pgprot_range_cache_attr(vma, retval, size, attr)) {
		do_munmap(current->mm, start, size);
		return -ENOMEM;
	}

	return  retval;
}

/*                                    */
/* for cache purge by virtual address */
/*                                    */

static void exmem_ioc_vaddr_purge_cache(unsigned long vstart, size_t size, int flags)
{
	unsigned long i, w, paddr;

	for (i = 0; i < size; i += w) {
		w = PAGE_SIZE - ((vstart + i) & ~PAGE_MASK);
		if ((size - i) < w) {
			w = size - i;
		}
		paddr = exmem_addr_trans(vstart + i);
		if (paddr != 0) {
#ifdef CONFIG_ARM
			vaddr_purge_L1cache(vstart + i, w, flags);
			purge_L2cache(paddr, w, flags);
#else /* CONFIG_ARM */
			purge_cache(paddr, w, flags);
#endif /* CONFIG_ARM */
		}
	}
}

#if defined(CONFIG_UNIPHIER_HAS_L2CA) && defined(EXMEM_ALLOW_USER_L2COP)
static void exmem_ioc_vaddr_purge_L1cache(unsigned long vstart, size_t size, int flags)
{
	unsigned long i, w, paddr;

	for (i = 0; i < size; i += w) {
		w = PAGE_SIZE - ((vstart + i) & ~PAGE_MASK);
		if ((size - i) < w) {
			w = size - i;
		}
		paddr = exmem_addr_trans(vstart + i);
		if (paddr != 0) {
#ifdef CONFIG_ARM
			vaddr_purge_L1cache(vstart + i, w, flags);
#else /* CONFIG_ARM */
			purge_L1cache(paddr, w, flags);
#endif /* CONFIG_ARM */
		}
	}
}

static void exmem_ioc_vaddr_purge_L2cache(unsigned long vstart, size_t size, int flags)
{
	unsigned long i, w, paddr;

	for (i = 0; i < size; i += w) {
		w = PAGE_SIZE - ((vstart + i) & ~PAGE_MASK);
		if ((size - i) < w) {
			w = size - i;
		}
		paddr = exmem_addr_trans(vstart + i);
		if (paddr != 0) {
			purge_L2cache(paddr, w, flags);
		}
	}
}
#endif	/* defined(CONFIG_UNIPHIER_HAS_L2CA) && defined(EXMEM_ALLOW_USER_L2COP) */

static void mark_pages(struct page *page, int order)
{
	struct page *last_page = page + (1 << order);
	while (page < last_page)
		SetPageReserved(page++);
}

static void unmark_pages(struct page *page, int order)
{
	struct page *last_page = page + (1 << order);
	while (page < last_page)
		ClearPageReserved(page++);
}

static int get_size(int order)
{
	int i;
	int size = PAGE_SIZE;

	for (i = 0; i < order; i++) {
		size <<= 1;
	}
	return size;
}

static int exmem_ioc_dma_alloc(void **start, size_t size)
{
	int order;
	unsigned long vaddr, paddr;
	int retval = 0;
	dma_info_t *info;
	unsigned long flags;

	if (size == 0) {
		retval = -EINVAL;
		goto out;
	}

	info = kmalloc(sizeof(dma_info_t), GFP_KERNEL);
	if (!info) {
		retval = -ENOMEM;
		goto out;
	}

	order = get_order(size);
	vaddr = __get_free_pages(GFP_KERNEL|GFP_DMA|__GFP_NOWARN|__GFP_NORETRY|
			__GFP_ZERO|__GFP_COMP, order);

	if (vaddr) {
		paddr = virt_to_phys((void *)vaddr);
		mark_pages(virt_to_page(vaddr), order);
		purge_cache(paddr, get_size(order), PURGE_CACHE_D_PURGE_INV);

		if (put_user(paddr, (unsigned long *)start)) {
			retval = -EFAULT;
			goto out_free_pages;
		} else {
			info->paddr = (void *)paddr;
			info->order = order;
			spin_lock_irqsave(&dma_info_lock, flags);
			list_add_tail(&info->list, &dma_info_head);
			spin_unlock_irqrestore(&dma_info_lock, flags);
			retval = 0;
			goto out;
		}
	} else {
		retval = -ENOMEM;
		goto out_free_info;
	}

out_free_pages:
	unmark_pages(virt_to_page(vaddr), order);
	free_pages(vaddr, order);

out_free_info:
	kfree(info);

out:
	return retval;
}

static int exmem_ioc_dma_free(void *start)
{
	dma_info_t *info = NULL;
	struct list_head *p;
	unsigned long vaddr;
	struct page *pg;
	unsigned long flags;

	spin_lock_irqsave(&dma_info_lock, flags);
	list_for_each(p, &dma_info_head) {
		info = list_entry(p, dma_info_t, list);
		if (info->paddr == start) {
			break;
		}
	}

	if (p == &dma_info_head) {
		/* unknown start address */
		spin_unlock_irqrestore(&dma_info_lock, flags);
		return -EINVAL;
	}

	vaddr = (unsigned long)phys_to_virt((unsigned long)start);
	pg = virt_to_page(vaddr);

	/* FIXME */
//	if (page_count(pg) == 1) {
		list_del(p);
		spin_unlock_irqrestore(&dma_info_lock, flags);
		unmark_pages(pg, info->order);
		free_pages(vaddr, info->order);
		kfree(info);
		return 0;
//	} else {
//		/* someone map this page */
//		spin_unlock_irqrestore(&dma_info_lock, flags);
//		return -EBUSY;
//	}
}


void *exmem_dma_alloc_noncache(size_t size)
{
	unsigned long paddr;

	if (exmem_ioc_dma_alloc((void **)&paddr, size) == 0) {
#ifdef CONFIG_ARM
		return ioremap_nocache(paddr, size);
#else
		return (phys_to_virt(paddr) + ZONE_DMA_UNCACHE_OFFSET);
#endif
	} else {
		return NULL;
	}
}
EXPORT_SYMBOL(exmem_dma_alloc_noncache);


static int exmem_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
		       unsigned long a)
{
	struct exmem_ioctl arg;
	struct exmem_dma_alloc arg_dalloc;
	struct exmem_dma_free arg_dfree;
	struct mm_struct *mm = current->mm;
	int err = 0;

	switch (cmd) {
	case EXMEM_IOC_UNC_ALLOC:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		down_write(&mm->mmap_sem);
		err = exmem_ioctl_alloc(arg.start, arg.size,
					arg.flags, EXMEM_PAGE_UCUC);
		up_write(&mm->mmap_sem);
		break;

	case EXMEM_IOC_UNC_FREE:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		down_write(&mm->mmap_sem);
		err = do_munmap(mm, arg.start, arg.size);
		up_write(&mm->mmap_sem);
		break;

	case EXMEM_IOC_VADDR_PURGE_CACHE:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		exmem_ioc_vaddr_purge_cache(arg.start, arg.size, arg.flags);
		break;

	case EXMEM_IOC_PADDR_PURGE_CACHE:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		purge_cache(arg.start, arg.size, arg.flags);
		break;

#if defined(CONFIG_UNIPHIER_HAS_L2CA)
	case EXMEM_IOC_UNCWG_ALLOC:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		down_write(&mm->mmap_sem);
		err = exmem_ioctl_alloc(arg.start, arg.size,
								arg.flags, EXMEM_PAGE_UCWG);
		up_write(&mm->mmap_sem);
		break;

#if defined(EXMEM_ALLOW_USER_L2COP)
	case EXMEM_IOC_VADDR_PURGE_L1C:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		exmem_ioc_vaddr_purge_L1cache(arg.start, arg.size, arg.flags);
		break;

	case EXMEM_IOC_PADDR_PURGE_L1C:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		purge_L1cache(arg.start, arg.size, arg.flags);
		break;

	case EXMEM_IOC_VADDR_PURGE_L2C:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		exmem_ioc_vaddr_purge_L2cache(arg.start, arg.size, arg.flags);
		break;

	case EXMEM_IOC_PADDR_PURGE_L2C:
		if (copy_from_user(&arg, (struct exmem_ioctl *)a,
				   sizeof(struct exmem_ioctl))) {
			return -EFAULT;
		}
		purge_L2cache(arg.start, arg.size, arg.flags);
		break;
#endif	/* defined(EXMEM_ALLOW_USER_L2COP) */
#endif	/* defined(CONFIG_UNIPHIER_HAS_L2CA) */

	case EXMEM_IOC_DMA_ALLOC:
		if (copy_from_user(&arg_dalloc, (struct exmem_dma_alloc *)a,
				   sizeof(struct exmem_dma_alloc))) {
			return -EFAULT;
		}
		err = exmem_ioc_dma_alloc(arg_dalloc.start, arg_dalloc.size);
		break;

	case EXMEM_IOC_DMA_FREE:
		if (copy_from_user(&arg_dfree, (struct exmem_dma_free *)a,
				   sizeof(struct exmem_dma_free))) {
			return -EFAULT;
		}
		err = exmem_ioc_dma_free(arg_dfree.start);
		break;

	default:
		return -EINVAL;
	}

	return err;
}

/*
 *  mmap() function for exmem. ---------------------------------------------
 */

#ifdef CONFIG_ARM
	/* to avoid compile error for now */
#else
static int ptel_ps_to_index(unsigned long ptel_ps)
{
	int i;
	for (i = MIN_PS_INDEX; i <= MAX_PS_INDEX; i++) {
		if (ptel_ps == ps_param[i].ptel_ps) {
			return i;
		}
	}
	printk(KERN_DEBUG "exmem : ptel_ps_to_index : invalid argument\n");
	BUG();
	return -1;
}

/*
 * This function must be called with the MM semaphore held.
 * And must be called with the page table spinlock held.
 * The range specified by the addr and len is in the vma's range.
 */
static int check_pte_consistency_range(struct vm_area_struct *vma,
			   unsigned long addr, unsigned long len)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long offset;
	pte_t *pte, *pte_base;

	if (len == 0) {
		return 0;
	}

	/* get the first physical address */
	pte_base = get_pte_offset(mm, addr);
	if (!pte_base) {
		return  -E_CHKPTE_NOPTE;
	}

	for (offset = 0; offset < len; offset += PAGE_SIZE) {
		pte = get_pte_offset(mm, addr + offset);
		if (!pte) {
			return -E_CHKPTE_NOPTE;
		}

		/* continuity check */
		if (((pte_val(*pte_base) & PAGE_MASK) + offset) !=
		    (pte_val(*pte) & PAGE_MASK)) {
			return -E_CHKPTE_DISCONT;
		}

		/* pte PRESENT bit and VALID bit check*/
		if ((pte_val(*pte_base) & _PAGE_PRESENTV) !=
		    (pte_val(*pte) & _PAGE_PRESENTV)) {
			return -E_CHKPTE_PGFLG_PRESENTV;
		}
		/* pte PROT bit and NX bit check*/
		if ((pte_val(*pte_base) & (_PAGE_PROT| _PAGE_NX)) !=
		    (pte_val(*pte) & (_PAGE_PROT| _PAGE_NX))) {
			return -E_CHKPTE_PGFLG_PROTNX;
		}

		/* pte CACHE bit check*/
		if ((pte_val(*pte_base) & _PAGE_CACHE) !=
		    (pte_val(*pte) & _PAGE_CACHE)) {
			return -E_CHKPTE_PGFLG_CACHE;
		}

		/* pte GLOBAL bit check*/
		if ((pte_val(*pte_base) & _PAGE_GLOBAL) !=
		    (pte_val(*pte) & _PAGE_GLOBAL)) {
			return -E_CHKPTE_PGFLG_GLOBAL;
		}
	}
	return 0;
}

/*
 * This function must be called with the MM semaphore held.
 * And must be called with the page table spinlock held.
 * The range specified by the addr and len is in the vma's range.
 */
static void set_pte_pagesize_range(struct vm_area_struct *vma,
	unsigned long addr, unsigned long len, unsigned long ptel_ps)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long setaddr;

	for (setaddr = addr; setaddr < addr + len; setaddr += PAGE_SIZE) {
		pte_t *pte, entry;
		pte = get_pte_offset(mm, setaddr);
		entry = *pte;
		entry = pte_mkdirty(entry);
		entry = pte_mkyoung(entry);
		entry = __pte((pte_val(entry) & ~_PAGE_PS_MASK) | ptel_ps);
		set_pte(pte, entry);
	}
	return;
}

/*
 * This function must be called with the MM semaphore held.
 * And must be called with the page table spinlock held.
 * The range specified by the addr and len is in the vma's range.
 */
static void adjust_pte_pagesize_range(struct vm_area_struct *vma,
	unsigned long addr, unsigned long len, int psidx)
{
	unsigned long lstart, lend, hstart, hend, llen, hlen;
	int i;

	lstart = addr;
	hend = addr + len;
	for (i = MIN_PS_INDEX; i < psidx; i++) {
		lend = EXMEM_PAGE_ALIGN(lstart, i + 1);
		hstart = hend & ps_param[i + 1].mask;
		if (lend >= hstart) {
			break;
		}

		llen = lend - lstart;
		if (llen) {
			set_pte_pagesize_range(vma, lstart, llen, ps_param[i].ptel_ps);
		}
		hlen = hend - hstart;
		if (hlen) {
			set_pte_pagesize_range(vma, hstart, hlen, ps_param[i].ptel_ps);
		}

		lstart = lend;
		hend = hstart;
	}
	set_pte_pagesize_range(vma, lstart, hend - lstart, ps_param[i].ptel_ps);

	return;
}

/*
 * This function must be called with the MM semaphore held.
 */
static void optimize_pte_pagesize(struct vm_area_struct *vma)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long addr, len;
	int i, psidx;

	addr = vma->vm_start;
	len  = vma->vm_end-vma->vm_start;

	psidx = MIN_PS_INDEX;
	for (i = MAX_PS_INDEX; i > MIN_PS_INDEX ; i--) {
		if (((vma->vm_pgoff << PAGE_SHIFT)  & ~ps_param[i].mask) ==
		    (addr & ~ps_param[i].mask)) {
			psidx = i;
			break;
		}
	}

	spin_lock(&mm->page_table_lock);

	if (check_pte_consistency_range(vma, addr, len)) {
		goto out;
	}
	adjust_pte_pagesize_range(vma, addr, len, psidx);
	flush_tlb_range(mm->mmap, addr, addr + len);

out:
	spin_unlock(&mm->page_table_lock);
	return;
}
#endif /* CONFIG_ARM */

static inline int range_is_allowed(unsigned long pfn, unsigned long size)
{
	return 1;
}

extern void __attribute__((weak)) unxlate_dev_mem_ptr(unsigned long phys, void *addr);

static ssize_t exmem_read(struct file * file, char __user * buf,
			  size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t read, sz;
	char *ptr;

	if (!valid_phys_addr_range(p, count))
		return -EFAULT;
	read = 0;

	while (count > 0) {
		/*
		 * Handle first page in case it's not aligned
		 */
		if (-p & (PAGE_SIZE - 1))
			sz = -p & (PAGE_SIZE - 1);
		else
			sz = PAGE_SIZE;

		sz = min_t(unsigned long, sz, count);

		if (!range_is_allowed(p >> PAGE_SHIFT, count))
			return -EPERM;

		/*
		 * On ia64 if a page has been mapped somewhere as
		 * uncached, then it must also be accessed uncached
		 * by the kernel or data corruption may occur
		 */
		ptr = xlate_dev_mem_ptr(p);
		if (!ptr)
			return -EFAULT;

		if (copy_to_user(buf, ptr, sz)) {
			unxlate_dev_mem_ptr(p, ptr);
			return -EFAULT;
		}

		unxlate_dev_mem_ptr(p, ptr);

		buf += sz;
		p += sz;
		count -= sz;
		read += sz;
	}

	*ppos += read;
	return read;
}

static ssize_t exmem_write(struct file * file, const char __user * buf,
			   size_t count, loff_t *ppos)
{
	unsigned long p = *ppos;
	ssize_t written, sz;
	unsigned long copied;
	void *ptr;

	if (!valid_phys_addr_range(p, count))
		return -EFAULT;

	written = 0;

	while (count > 0) {
		/*
		 * Handle first page in case it's not aligned
		 */
		if (-p & (PAGE_SIZE - 1))
			sz = -p & (PAGE_SIZE - 1);
		else
			sz = PAGE_SIZE;

		sz = min_t(unsigned long, sz, count);

		if (!range_is_allowed(p >> PAGE_SHIFT, sz))
			return -EPERM;

		/*
		 * On ia64 if a page has been mapped somewhere as
		 * uncached, then it must also be accessed uncached
		 * by the kernel or data corruption may occur
		 */
		ptr = xlate_dev_mem_ptr(p);
		if (!ptr) {
			if (written)
				break;
			return -EFAULT;
		}

		copied = copy_from_user(ptr, buf, sz);
		if (copied) {
			written += sz - copied;
			unxlate_dev_mem_ptr(p, ptr);
			if (written)
				break;
			return -EFAULT;
		}

		unxlate_dev_mem_ptr(p, ptr);

		buf += sz;
		p += sz;
		count -= sz;
		written += sz;
	}

	*ppos += written;
	return written;
}

extern int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file,
		unsigned long pfn, unsigned long size, pgprot_t *vma_prot);

#define get_unmapped_area_mem	NULL

static const struct vm_operations_struct mmap_exmem_ops = {
};

static int exmem_mmap(struct file *file, struct vm_area_struct *vma)
{
	size_t size = vma->vm_end - vma->vm_start;
	unsigned long pfn0 = vma->vm_pgoff;
#ifdef CONFIG_ARM
	struct meminfo *mi = &meminfo;
	int found = 0;
	int node, i;
#endif

	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
		return -EINVAL;

	if (!range_is_allowed(vma->vm_pgoff, size))
		return -EPERM;

	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
					  &vma->vm_page_prot))
		return -EINVAL;
	vma->vm_ops = &mmap_exmem_ops;

#ifdef CONFIG_ARM
	for_each_online_node(node) {
		for_each_nodebank(i, mi, node) {
			struct membank *bank = &mi->bank[i];
			unsigned long pfn_start = bank_pfn_start(bank);
			unsigned long pfn_end   = bank_pfn_end(bank);

			if ((pfn_start <= pfn0) &&
				(pfn0 + (size >> PAGE_SHIFT) <= pfn_end)) {
				found = 1;
			}
		}
	}

	if (found) {
#else
	if (pfn_valid(pfn0) && pfn_valid(pfn0 + (size >> PAGE_SHIFT) - 1)) {
#endif
		unsigned long start;
		unsigned long pfn;
		unsigned long remain;

		/* pre-screening for vm_insert_page */
		for (pfn = pfn0; pfn < pfn0 + (size >> PAGE_SHIFT); pfn++) {
			struct page *pg = pfn_to_page(pfn);
			if (!page_count(pg) || PageAnon(pg)) {
				/*
				 * This region cannot mapped by vm_insert_page,
				 * because some page_count is not set or
				 * some is anonymous page.
				 * Now, try using remap_pfn_range()
				 * for /dev/mem compatiblitiy.
				 */
				goto do_remap_pfn;
			}
		}

		start = vma->vm_start;
		pfn = pfn0;
		remain = size;

		vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
		while (remain > 0) {
			if (vm_insert_page(vma, start, pfn_to_page(pfn))) {
				return -EAGAIN;
			}
			start += PAGE_SIZE;
			pfn++;
			remain -= PAGE_SIZE;
		}
		goto succeed_map;
	}

do_remap_pfn:
	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
	if (remap_pfn_range(vma, vma->vm_start, pfn0, size,
		vma->vm_page_prot)) {
		return -EAGAIN;
	}

succeed_map:
	if (!(vma->vm_flags & VM_SHARED)) {
		return 0;
	}

#ifdef CONFIG_ARM
	/* TODO */
#else
	optimize_pte_pagesize(vma);
#endif

	return 0;
}

static loff_t exmem_lseek(struct file * file, loff_t offset, int orig)
{
	loff_t ret;

	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
	switch (orig) {
	case 0:
		file->f_pos = offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	case 1:
		file->f_pos += offset;
		ret = file->f_pos;
		force_successful_syscall_return();
		break;
	default:
		ret = -EINVAL;
	}
	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
	return ret;
}

static int exmem_open(struct inode *inode, struct file *filp)
{
	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}

const struct file_operations exmem_fops =
{
	.llseek		= exmem_lseek,
	.read		= exmem_read,
	.write		= exmem_write,
	.ioctl		= exmem_ioctl,
	.mmap		= exmem_mmap,
	.open		= exmem_open,
	.get_unmapped_area = get_unmapped_area_mem,
};

#ifdef CONFIG_ARM
/* stub to avoid compile error */
void mn2ws_prepare_split_vma(struct vm_area_struct *vma, unsigned long addr)
{
	return;
}

void mn2ws_prepare_move_vma(struct vm_area_struct *vma,
	unsigned long old_addr, unsigned long old_len, unsigned long new_addr)
{
	return;
}
#else
/*
 * This function must be called with the MM semaphore held.
 * And must be called with the page table spinlock held.
 * The addr is in the vma's range.
 */
static void split_pte_pagesize(struct vm_area_struct *vma, unsigned long addr)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long l_addr, h_addr, l_len, h_len;
	int ret, psidx;
	pte_t *pte;

	pte = get_pte_offset(mm, addr);
	if (!pte || !(pte_val(*pte))) {
		return;
	}
	psidx = ptel_ps_to_index(pte_val(*pte) & _PAGE_PS_MASK);
	if ((addr & ~ps_param[psidx].mask) == 0) {
		return;
	}
	l_addr = addr & ps_param[psidx].mask;
	l_len  = addr - l_addr;
	h_addr = addr;
	h_len  = ps_param[psidx].size - l_len;

	ret = check_pte_consistency_range(vma, l_addr, ps_param[psidx].size);
	if (ret) {
		printk(KERN_DEBUG "exmem : split_pte_pagesize : "
		       "pte error : %d\n", ret);
		BUG();
	}
	adjust_pte_pagesize_range(vma, l_addr, l_len, MAX_PS_INDEX);
	adjust_pte_pagesize_range(vma, h_addr, h_len, MAX_PS_INDEX);

	flush_tlb_range(mm->mmap, l_addr, l_addr + ps_param[psidx].size);

	return;
}

/*
 * This function must be called with the MM semaphore held.
 * The addr is in the vma's range.
 */
void mn2ws_prepare_split_vma(struct vm_area_struct *vma, unsigned long addr)
{
	struct mm_struct *mm = vma->vm_mm;

	if (!(vma->vm_flags & VM_IO) || !(vma->vm_flags & VM_RESERVED)) {
		return;
	}

	if (vma->vm_start > addr || vma->vm_end < addr) {
		printk(KERN_DEBUG "exmem : mn2ws_prepare_split_vma : "
		       "invalid address\n");
		BUG();
	}

	if (vma->vm_start != addr && vma->vm_end != addr) {
		spin_lock(&mm->page_table_lock);
		split_pte_pagesize(vma, addr);
		spin_unlock(&mm->page_table_lock);
	}

	return;
}

/*
 * This function must be called with the MM semaphore held.
 * And must be called with the page table spinlock held.
 * The range specified by the addr and len is in the vma's range.
 */
static void deoptimize_pte_pagesize_range(struct vm_area_struct *vma,
	unsigned long addr, unsigned long len, int psidx)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long setaddr;
	int ret, oldidx;
	pte_t *pte;

	setaddr = addr;
	while (setaddr < addr+len) {
		pte = get_pte_offset(mm, setaddr);
		if (pte && pte_val(*pte)) {
			oldidx = ptel_ps_to_index(pte_val(*pte) & _PAGE_PS_MASK);
			if (psidx < oldidx) {
				ret = check_pte_consistency_range(vma, setaddr, ps_param[oldidx].size);
				if (ret) {
					printk(KERN_DEBUG "exmem : deoptimize_pte_pagesize_range : "
					       "pte error : %d\n", ret);
					BUG();
				}
				set_pte_pagesize_range(vma, setaddr, ps_param[oldidx].size,
						   ps_param[psidx].ptel_ps);
			}
			setaddr += ps_param[oldidx].size;
		} else {
			setaddr += ps_param[MIN_PS_INDEX].size;
		}
	}

	flush_tlb_range(mm->mmap, addr, addr + len);
	return;
}

/*
 * This Function must be called with the MM semaphore held.
 * The range specified by the old_addr and old_len is in the vma's range.
 */
void mn2ws_prepare_move_vma(struct vm_area_struct *vma,
	unsigned long old_addr, unsigned long old_len, unsigned long new_addr)
{
	struct mm_struct *mm = vma->vm_mm;
	int psidx, i;

	if (!(vma->vm_flags & VM_IO) || !(vma->vm_flags & VM_RESERVED)) {
		return;
	}

	if (vma->vm_start > old_addr || vma->vm_end < old_addr+old_len) {
		printk(KERN_DEBUG "exmem : mn2ws_prepare_move_vma : "
		       "invalid address\n");
		BUG();
	}

	psidx = MIN_PS_INDEX;
	for (i = MAX_PS_INDEX; i > MIN_PS_INDEX; i--) {
		if ((old_addr & ~ps_param[i].mask) ==
		    (new_addr & ~ps_param[i].mask)) {
			psidx = i;
			break;
		}
	}

	spin_lock(&mm->page_table_lock);

	if (vma->vm_start != old_addr) {
	    split_pte_pagesize(vma, old_addr);
	}
	if (vma->vm_end != old_addr + old_len) {
	    split_pte_pagesize(vma, old_addr + old_len);
	}
	deoptimize_pte_pagesize_range(vma, old_addr, old_len, psidx);

	spin_unlock(&mm->page_table_lock);
	return;
}
#endif /* CONFIG_ARM */

void exmem_fixup_page_prot(struct file *file, pgprot_t *prot,
				     unsigned long flags,
				     unsigned long vaddr, unsigned long paddr)
{
	if (!file || (file->f_op != &exmem_fops)) {
		return;
	}

	if (file->f_flags & O_SYNC) {
		*prot = pgprot_noncached(*prot);
		return;
	}

	switch (flags & MAP_CACHEMASK) {
	case MAP_WRITEBACK:
		break;

	case MAP_WRITETHROUGH:
#if defined(CONFIG_ARM) && defined(CONFIG_ARCH_UNIPHIER)
		*prot = pgprot_noncached(*prot);
#else
		*prot = pgprot_through(*prot);
#endif
		break;

	case MAP_UNCACHE:
		*prot =  pgprot_noncached(*prot);
		break;

	case MAP_UNCACHE_UCWG:
		*prot = pgprot_noncached_gathering(*prot);
		break;

	case MAP_UNCACHE_WB:
#if defined(CONFIG_ARM) && defined(CONFIG_ARCH_UNIPHIER)
		*prot = pgprot_noncached_writeback(*prot);
#else
		*prot = pgprot_noncached(*prot);
#endif
		break;

	case MAP_WB_UCWG:
		*prot = pgprot_writeback_gathering(*prot);
		break;

	default:
		if (IS_ADDR_UNCACHED(paddr) || IS_EXMEM_VADDR_UNCACHED(vaddr)) {
			*prot = pgprot_noncached(*prot);
		}
		break;
	}
}
