Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S934013AbYBOGvi (ORCPT ); Fri, 15 Feb 2008 01:51:38 -0500 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1762660AbYBOGtl (ORCPT ); Fri, 15 Feb 2008 01:49:41 -0500 Received: from relay2.sgi.com ([192.48.171.30]:38865 "EHLO relay.sgi.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1756875AbYBOGtg (ORCPT ); Fri, 15 Feb 2008 01:49:36 -0500 Message-Id: <20080215064933.630179244@sgi.com> References: <20080215064859.384203497@sgi.com> User-Agent: quilt/0.46-1 Date: Thu, 14 Feb 2008 22:49:05 -0800 From: Christoph Lameter To: akpm@linux-foundation.org Cc: Andrea Arcangeli Cc: Robin Holt , Avi Kivity , Izik Eidus Cc: kvm-devel@lists.sourceforge.net Cc: Peter Zijlstra , general@lists.openfabrics.org Cc: Steve Wise Cc: Roland Dreier Cc: Kanoj Sarcar Cc: steiner@sgi.com, linux-kernel@vger.kernel.org, linux-mm@kvack.org Cc: daniel.blueman@quadrics.com Subject: [patch 6/6] mmu_rmap_notifier: Skeleton for complex driver that uses its own rmaps Content-Disposition: inline; filename=mmu_rmap_skeleton Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 8864 Lines: 336 The skeleton for the rmap notifier leaves the invalidate_page method of the mmu_notifier empty and hooks a new invalidate_page callback into the global chain for mmu_rmap_notifiers. There are seveal simplifications in here to avoid making this too complex. The reverse maps need to consit of references to vma f.e. Signed-off-by: Christoph Lameter --- Documentation/mmu_notifier/skeleton_rmap.c | 311 +++++++++++++++++++++++++++++ 1 file changed, 311 insertions(+) Index: linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6/Documentation/mmu_notifier/skeleton_rmap.c 2008-02-14 22:23:01.000000000 -0800 @@ -0,0 +1,311 @@ +#include +#include +#include +#include +#include + +/* + * Skeleton for an mmu notifier with rmap callbacks and sleeping during + * invalidate_page. + * + * (C) 2008 Silicon Graphics, Inc. + * Christoph Lameter + * + * Note that the locking is fairly basic. One can add various optimizations + * here and there. There is a single lock for an address space which should be + * satisfactory for most cases. If not then the lock can be split like the + * pte_lock in Linux. It is most likely best to place the locks in the + * page table structure or into whatever the external mmu uses to + * track the mappings. + */ + +struct my_mmu { + /* MMU notifier specific fields */ + struct mmu_notifier notifier; + spinlock_t lock; /* Protects counter and invidual zaps */ + int invalidates; /* Number of active range_invalidate */ + + /* Rmap support */ + struct list_head list; /* rmap list of my_mmu structs */ + unsigned long base; +}; + +/* + * Called with m->lock held + */ +static void my_mmu_insert_page(struct my_mmu *m, + unsigned long address, unsigned long pfn) +{ + /* Must be provided */ + printk(KERN_INFO "insert page %p address=%lx pfn=%ld\n", + m, address, pfn); +} + +/* + * Called with m->lock held + */ +static void my_mmu_zap_range(struct my_mmu *m, + unsigned long start, unsigned long end, int atomic) +{ + /* Must be provided */ + printk(KERN_INFO "zap range %p address=%lx-%lx atomic=%d\n", + m, start, end, atomic); +} + +/* + * Called with m->lock held (optional but usually required to + * protect data structures of the driver). + */ +static void my_mmu_zap_page(struct my_mmu *m, unsigned long address) +{ + /* Must be provided */ + printk(KERN_INFO "zap page %p address=%lx\n", m, address); +} + +/* + * Increment and decrement of the number of range invalidates + */ +static inline void inc_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates++; + spin_unlock(&m->lock); +} + +static inline void dec_active(struct my_mmu *m) +{ + spin_lock(&m->lock); + m->invalidates--; + spin_unlock(&m->lock); +} + +static void my_mmu_invalidate_range_begin(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + inc_active(m); /* Holds off new references */ + my_mmu_zap_range(m, start, end, atomic); +} + +static void my_mmu_invalidate_range_end(struct mmu_notifier *mn, + struct mm_struct *mm, unsigned long start, unsigned long end, + int atomic) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + dec_active(m); /* Enables new references */ +} + +/* + * Populate a page. + * + * A return value of-EAGAIN means please retry this operation. + * + * Acuisition of mmap_sem can be omitted if the caller already holds + * the semaphore. + */ +struct page *my_mmu_populate_page(struct my_mmu *m, + struct vm_area_struct *vma, + unsigned long address, int write) +{ + struct mm_struct *mm = vma->vm_mm; + struct page *page; + int err; + int done = 0; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + spinlock_t *ptl; + + /* No need to do anything if a range invalidate is running */ + if (m->invalidates) + return ERR_PTR(-EAGAIN); + + down_read(&mm->mmap_sem); + do { + page = ERR_PTR(-EAGAIN); + + if (m->invalidates) + break; + + pgd = pgd_offset(mm, address); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto check; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto check; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto check; + + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + if (!ptep) + goto check; + + pte = *ptep; + if (!pte_present(pte)) + goto pte_unlock; + if (write && !pte_write(pte)) + goto pte_unlock; + + page = vm_normal_page(vma, address, pte); + if (page) { + done = 1; + /* + * The m->lock is held to ensure that the count of + * current invalidates stays constant. + * invalidate_page() is held off by the pte lock. + */ + spin_lock(&m->lock); + + if (!m->invalidates) + my_mmu_insert_page(m, address, page_to_pfn(page)); + else + page = ERR_PTR(-EAGAIN); + + spin_unlock(&m->lock); + } +pte_unlock: + pte_unmap_unlock(ptep, ptl); +check: + + if (done) + break; + + /* + * Need to run the page fault handler to get the pte entry + * setup right. + */ + err = get_user_pages(current, vma->vm_mm, address, 1, + write, 1, NULL, NULL); + + if (err < 0) { + page = ERR_PTR(err); + break; + } + + } while (!done); + + up_read(&vma->vm_mm->mmap_sem); + return page; +} + +/* + * All other threads accessing this mm_struct must have terminated by now. + */ +static void my_mmu_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct my_mmu *m = container_of(mn, struct my_mmu, notifier); + + my_mmu_zap_range(m, 0, TASK_SIZE, 0); + /* No concurrent processes thus no worries about RCU */ + list_del(&m->list); + kfree(m); + printk(KERN_INFO "MMU Notifier terminating\n"); +} + +static struct mmu_notifier_ops my_mmu_ops = { + my_mmu_release, + NULL, /* No aging function */ + NULL, /* No atomic invalidate_page function */ + my_mmu_invalidate_range_begin, + my_mmu_invalidate_range_end +}; + +/* Rmap specific fields */ +static LIST_HEAD(my_mmu_list); +static struct rw_semaphore listlock; + +/* + * This function must be called to activate callbacks from a process + */ +int my_mmu_attach_to_process(struct mm_struct *mm) +{ + struct my_mmu *m = kzalloc(sizeof(struct my_mmu), GFP_KERNEL); + + if (!m) + return -ENOMEM; + + m->notifier.ops = &my_mmu_ops; + spin_lock_init(&m->lock); + + /* + * mmap_sem handling can be omitted if it is guaranteed that + * the context from which my_mmu_attach_to_process is called + * is already holding a writelock on mmap_sem. + */ + down_write(&mm->mmap_sem); + mmu_notifier_register(&m->notifier, mm); + up_write(&mm->mmap_sem); + down_write(&listlock); + list_add(&m->list, &my_mmu_list); + up_write(&listlock); + + /* + * RCU sync is expensive but necessary if we need to guarantee + * that multiple threads running on other cpus have seen the + * notifier changes. + */ + synchronize_rcu(); + return 0; +} + + +static void my_sleeping_invalidate_page(struct my_mmu *m, unsigned long address) +{ + /* Must be provided */ + spin_lock(&m->lock); /* Only taken to ensure mmu data integrity */ + my_mmu_zap_page(m, address); + spin_unlock(&m->lock); + printk(KERN_INFO "Sleeping invalidate_page %p address=%lx\n", + m, address); +} + +static unsigned long my_mmu_find_addr(struct my_mmu *m, struct page *page) +{ + /* Determine the address of a page in a mmu segment */ + return -EFAULT; +} + +/* + * A reference must be held on the page passed and the page passed + * must be locked. No spinlocks are held. invalidate_page() is held + * off by us holding the page lock. + */ +static void my_mmu_rmap_invalidate_page(struct mmu_rmap_notifier *mrn, + struct page *page) +{ + struct my_mmu *m; + + BUG_ON(!PageLocked(page)); + down_read(&listlock); + list_for_each_entry(m, &my_mmu_list, list) { + unsigned long address = my_mmu_find_addr(m, page); + + if (address != -EFAULT) + my_sleeping_invalidate_page(m, address); + } + up_read(&listlock); +} + +static struct mmu_rmap_notifier_ops my_mmu_rmap_ops = { + .invalidate_page = my_mmu_rmap_invalidate_page +}; + +static struct mmu_rmap_notifier my_mmu_rmap_notifier = { + .ops = &my_mmu_rmap_ops +}; + +static int __init my_mmu_init(void) +{ + mmu_rmap_notifier_register(&my_mmu_rmap_notifier); + return 0; +} + +late_initcall(my_mmu_init); + -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/