Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755117AbZGVKO4 (ORCPT ); Wed, 22 Jul 2009 06:14:56 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1754528AbZGVKOx (ORCPT ); Wed, 22 Jul 2009 06:14:53 -0400 Received: from smtp161.dfw.emailsrvr.com ([67.192.241.161]:40236 "EHLO smtp161.dfw.emailsrvr.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754168AbZGVKKu (ORCPT ); Wed, 22 Jul 2009 06:10:50 -0400 From: Oren Laadan To: Andrew Morton Cc: Linus Torvalds , containers@lists.linux-foundation.org, linux-kernel@vger.kernel.org, linux-mm@kvack.org, linux-api@vger.kernel.org, Serge Hallyn , Dave Hansen , Ingo Molnar , "H. Peter Anvin" , Alexander Viro , Pavel Emelyanov , Alexey Dobriyan , Oren Laadan , Oren Laadan Subject: [RFC v17][PATCH 42/60] c/r: restore anonymous- and file-mapped- shared memory Date: Wed, 22 Jul 2009 06:00:04 -0400 Message-Id: <1248256822-23416-43-git-send-email-orenl@librato.com> X-Mailer: git-send-email 1.6.0.4 In-Reply-To: <1248256822-23416-1-git-send-email-orenl@librato.com> References: <1248256822-23416-1-git-send-email-orenl@librato.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 10128 Lines: 326 The bulk of the work is in ckpt_read_vma(), which has been refactored: the part that create the suitable 'struct file *' for the mapping is now larger and moved to a separate function. What's left is to read the VMA description, get the file pointer, create the mapping, and proceed to read the contents in. Both anonymous shared VMAs that have been read earlier (as indicated by a look up to objhash) and file-mapped shared VMAs are skipped. Anonymous shared VMAs seen for the first time have their contents read in directly to the backing inode, as indexed by the page numbers (as opposed to virtual addresses). Changelog[v14]: - Introduce patch Signed-off-by: Oren Laadan --- checkpoint/memory.c | 66 ++++++++++++++++++++++++++++++++----------- include/linux/checkpoint.h | 6 ++++ include/linux/mm.h | 2 + mm/filemap.c | 13 ++++++++- mm/shmem.c | 49 ++++++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 18 deletions(-) diff --git a/checkpoint/memory.c b/checkpoint/memory.c index a1d1eca..77234cd 100644 --- a/checkpoint/memory.c +++ b/checkpoint/memory.c @@ -840,13 +840,36 @@ static int restore_read_page(struct ckpt_ctx *ctx, struct page *page, void *p) return 0; } +static struct page *bring_private_page(unsigned long addr) +{ + struct page *page; + int ret; + + ret = get_user_pages(current, current->mm, addr, 1, 1, 1, &page, NULL); + if (ret < 0) + page = ERR_PTR(ret); + return page; +} + +static struct page *bring_shared_page(unsigned long idx, struct inode *ino) +{ + struct page *page = NULL; + int ret; + + ret = shmem_getpage(ino, idx, &page, SGP_WRITE, NULL); + if (ret < 0) + return ERR_PTR(ret); + if (page) + unlock_page(page); + return page; +} + /** * read_pages_contents - read in data of pages in page-array chain * @ctx - restart context */ -static int read_pages_contents(struct ckpt_ctx *ctx) +static int read_pages_contents(struct ckpt_ctx *ctx, struct inode *inode) { - struct mm_struct *mm = current->mm; struct ckpt_pgarr *pgarr; unsigned long *vaddrs; char *buf; @@ -856,17 +879,22 @@ static int read_pages_contents(struct ckpt_ctx *ctx) if (!buf) return -ENOMEM; - down_read(&mm->mmap_sem); + down_read(¤t->mm->mmap_sem); list_for_each_entry_reverse(pgarr, &ctx->pgarr_list, list) { vaddrs = pgarr->vaddrs; for (i = 0; i < pgarr->nr_used; i++) { struct page *page; _ckpt_debug(CKPT_DPAGE, "got page %#lx\n", vaddrs[i]); - ret = get_user_pages(current, mm, vaddrs[i], - 1, 1, 1, &page, NULL); - if (ret < 0) + if (inode) + page = bring_shared_page(vaddrs[i], inode); + else + page = bring_private_page(vaddrs[i]); + + if (IS_ERR(page)) { + ret = PTR_ERR(page); goto out; + } ret = restore_read_page(ctx, page, buf); page_cache_release(page); @@ -877,14 +905,15 @@ static int read_pages_contents(struct ckpt_ctx *ctx) } out: - up_read(&mm->mmap_sem); + up_read(¤t->mm->mmap_sem); kfree(buf); return 0; } /** - * restore_memory_contents - restore contents of a VMA with private memory + * restore_memory_contents - restore contents of a memory region * @ctx - restart context + * @inode - backing inode * * Reads a header that specifies how many pages will follow, then reads * a list of virtual addresses into ctx->pgarr_list page-array chain, @@ -892,7 +921,7 @@ static int read_pages_contents(struct ckpt_ctx *ctx) * these steps until reaching a header specifying "0" pages, which marks * the end of the contents. */ -static int restore_memory_contents(struct ckpt_ctx *ctx) +int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode) { struct ckpt_hdr_pgarr *h; unsigned long nr_pages; @@ -919,7 +948,7 @@ static int restore_memory_contents(struct ckpt_ctx *ctx) ret = read_pages_vaddrs(ctx, nr_pages); if (ret < 0) break; - ret = read_pages_contents(ctx); + ret = read_pages_contents(ctx, inode); if (ret < 0) break; pgarr_reset_all(ctx); @@ -977,9 +1006,9 @@ static unsigned long calc_map_flags_bits(unsigned long orig_vm_flags) * @file - file to map (NULL for anonymous) * @h - vma header data */ -static unsigned long generic_vma_restore(struct mm_struct *mm, - struct file *file, - struct ckpt_hdr_vma *h) +unsigned long generic_vma_restore(struct mm_struct *mm, + struct file *file, + struct ckpt_hdr_vma *h) { unsigned long vm_size, vm_start, vm_flags, vm_prot, vm_pgoff; unsigned long addr; @@ -1026,7 +1055,7 @@ int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, if (IS_ERR((void *) addr)) return PTR_ERR((void *) addr); - return restore_memory_contents(ctx); + return restore_memory_contents(ctx, NULL); } /** @@ -1086,16 +1115,19 @@ static struct restore_vma_ops restore_vma_ops[] = { { .vma_name = "ANON SHARED", .vma_type = CKPT_VMA_SHM_ANON, + .restore = shmem_restore, }, /* anonymous shared (skipped) */ { .vma_name = "ANON SHARED (skip)", .vma_type = CKPT_VMA_SHM_ANON_SKIP, + .restore = shmem_restore, }, /* file-mapped shared */ { .vma_name = "FILE SHARED", .vma_type = CKPT_VMA_SHM_FILE, + .restore = filemap_restore, }, }; @@ -1114,15 +1146,15 @@ static int restore_vma(struct ckpt_ctx *ctx, struct mm_struct *mm) if (IS_ERR(h)) return PTR_ERR(h); - ckpt_debug("vma %#lx-%#lx flags %#lx type %d vmaref %d\n", + ckpt_debug("vma %#lx-%#lx flags %#lx type %d vmaref %d inoref %d\n", (unsigned long) h->vm_start, (unsigned long) h->vm_end, (unsigned long) h->vm_flags, (int) h->vma_type, - (int) h->vma_objref); + (int) h->vma_objref, (int) h->ino_objref); ret = -EINVAL; if (h->vm_end < h->vm_start) goto out; - if (h->vma_objref < 0) + if (h->vma_objref < 0 || h->ino_objref < 0) goto out; if (h->vma_type >= CKPT_VMA_MAX) goto out; diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h index 54cc4b0..5920453 100644 --- a/include/linux/checkpoint.h +++ b/include/linux/checkpoint.h @@ -170,9 +170,15 @@ extern int ckpt_collect_mm(struct ckpt_ctx *ctx, struct task_struct *t); extern int checkpoint_mm(struct ckpt_ctx *ctx, void *ptr); extern void *restore_mm(struct ckpt_ctx *ctx); +extern unsigned long generic_vma_restore(struct mm_struct *mm, + struct file *file, + struct ckpt_hdr_vma *h); + extern int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, struct file *file, struct ckpt_hdr_vma *h); +extern int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode); + #define CKPT_VMA_NOT_SUPPORTED \ (VM_IO | VM_HUGETLB | VM_NONLINEAR | VM_PFNMAP | \ diff --git a/include/linux/mm.h b/include/linux/mm.h index 6c2c3dd..5f341ac 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1198,6 +1198,8 @@ extern int filemap_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, struct ckpt_hdr_vma *hh); extern int special_mapping_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, struct ckpt_hdr_vma *hh); +extern int shmem_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, + struct ckpt_hdr_vma *hh); #endif /* readahead.c */ diff --git a/mm/filemap.c b/mm/filemap.c index a07bb3d..0c4906f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1711,17 +1711,28 @@ int filemap_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_vma *h) { struct file *file; + unsigned long addr; int ret; if (h->vma_type == CKPT_VMA_FILE && (h->vm_flags & (VM_SHARED | VM_MAYSHARE))) return -EINVAL; + if (h->vma_type == CKPT_VMA_SHM_FILE && + !(h->vm_flags & (VM_SHARED | VM_MAYSHARE))) + return -EINVAL; file = ckpt_obj_fetch(ctx, h->vma_objref, CKPT_OBJ_FILE); if (IS_ERR(file)) return PTR_ERR(file); - ret = private_vma_restore(ctx, mm, file, h); + if (h->vma_type == CKPT_VMA_FILE) { + /* private mapped file */ + ret = private_vma_restore(ctx, mm, file, h); + } else { + /* shared mapped file */ + addr = generic_vma_restore(mm, file, h); + ret = (IS_ERR((void *) addr) ? PTR_ERR((void *) addr) : 0); + } return ret; } #endif /* CONFIG_CHECKPOINT */ diff --git a/mm/shmem.c b/mm/shmem.c index 808e14a..9334810 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2406,6 +2406,55 @@ static int shmem_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma) return shmem_vma_checkpoint(ctx, vma, vma_type, ino_objref); } + +int shmem_restore(struct ckpt_ctx *ctx, + struct mm_struct *mm, struct ckpt_hdr_vma *h) +{ + unsigned long addr; + struct file *file; + int ret = 0; + + file = ckpt_obj_fetch(ctx, h->ino_objref, CKPT_OBJ_FILE); + if (PTR_ERR(file) == -EINVAL) + file = NULL; + if (IS_ERR(file)) + return PTR_ERR(file); + + /* if file is NULL, this is the premiere - create and insert */ + if (!file) { + if (h->vma_type != CKPT_VMA_SHM_ANON) + return -EINVAL; + /* + * in theory could pass NULL to mmap and let it create + * the file. But, if 'shm_size != vm_end - vm_start', + * or if 'vm_pgoff != 0', then the vma reflects only a + * portion of the shm object and we need to "manually" + * create the full shm object. + */ + file = shmem_file_setup("/dev/zero", h->ino_size, h->vm_flags); + if (IS_ERR(file)) + return PTR_ERR(file); + ret = ckpt_obj_insert(ctx, file, h->ino_objref, CKPT_OBJ_FILE); + if (ret < 0) + goto out; + } else { + if (h->vma_type != CKPT_VMA_SHM_ANON_SKIP) + return -EINVAL; + /* Already need fput() for the file above; keep path simple */ + get_file(file); + } + + addr = generic_vma_restore(mm, file, h); + if (IS_ERR((void *) addr)) + return PTR_ERR((void *) addr); + + if (h->vma_type == CKPT_VMA_SHM_ANON) + ret = restore_memory_contents(ctx, file->f_dentry->d_inode); + out: + fput(file); + return ret; +} + #endif /* CONFIG_CHECKPOINT */ static void init_once(void *foo) -- 1.6.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/