Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758514AbYJQXN3 (ORCPT ); Fri, 17 Oct 2008 19:13:29 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S1757464AbYJQXLv (ORCPT ); Fri, 17 Oct 2008 19:11:51 -0400 Received: from mailhub.sw.ru ([195.214.232.25]:13296 "EHLO relay.sw.ru" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1757405AbYJQXLu (ORCPT ); Fri, 17 Oct 2008 19:11:50 -0400 From: Andrey Mirkin To: containers@lists.linux-foundation.org, linux-kernel@vger.kernel.org Cc: Pavel Emelyanov , Andrey Mirkin Subject: [PATCH 05/10] Introduce function to dump process Date: Sat, 18 Oct 2008 03:11:33 +0400 Message-Id: <1224285098-573-6-git-send-email-major@openvz.org> X-Mailer: git-send-email 1.5.6 In-Reply-To: <1224285098-573-5-git-send-email-major@openvz.org> References: <1224285098-573-1-git-send-email-major@openvz.org> <1224285098-573-2-git-send-email-major@openvz.org> <1224285098-573-3-git-send-email-major@openvz.org> <1224285098-573-4-git-send-email-major@openvz.org> <1224285098-573-5-git-send-email-major@openvz.org> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 11746 Lines: 448 Functions to dump task struct, fpu state and registers are added. All IDs are saved from the POV of process (container) namespace. Signed-off-by: Andrey Mirkin --- checkpoint/Makefile | 2 +- checkpoint/checkpoint.c | 2 +- checkpoint/checkpoint.h | 1 + checkpoint/cpt_image.h | 123 ++++++++++++++++++++++++ checkpoint/cpt_process.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 checkpoint/cpt_process.c diff --git a/checkpoint/Makefile b/checkpoint/Makefile index 173346b..457cc96 100644 --- a/checkpoint/Makefile +++ b/checkpoint/Makefile @@ -2,4 +2,4 @@ obj-y += sys_core.o obj-$(CONFIG_CHECKPOINT) += cptrst.o -cptrst-objs := sys.o checkpoint.o +cptrst-objs := sys.o checkpoint.o cpt_process.o diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c index c4bddce..aae198d 100644 --- a/checkpoint/checkpoint.c +++ b/checkpoint/checkpoint.c @@ -70,7 +70,7 @@ int dump_container(struct cpt_context *ctx) /* Dump task here */ if (!err) - err = -ENOSYS; + err = cpt_dump_task(root, ctx); out: ctx->nsproxy = NULL; diff --git a/checkpoint/checkpoint.h b/checkpoint/checkpoint.h index 6926aa2..9e46b10 100644 --- a/checkpoint/checkpoint.h +++ b/checkpoint/checkpoint.h @@ -60,3 +60,4 @@ extern int debug_level; #define dprintk(a...) cpt_printk(1, "CPT DBG: " a) int dump_container(struct cpt_context *ctx); +int cpt_dump_task(struct task_struct *tsk, struct cpt_context *ctx); diff --git a/checkpoint/cpt_image.h b/checkpoint/cpt_image.h index 0338dd0..cddfe37 100644 --- a/checkpoint/cpt_image.h +++ b/checkpoint/cpt_image.h @@ -13,6 +13,9 @@ #ifndef __CPT_IMAGE_H_ #define __CPT_IMAGE_H_ 1 +#include +#include + enum _cpt_object_type { CPT_OBJ_TASK = 0, @@ -20,6 +23,8 @@ enum _cpt_object_type /* The objects above are stored in memory while checkpointing */ CPT_OBJ_HEAD = 1024, + CPT_OBJ_X86_REGS, + CPT_OBJ_BITS, }; enum _cpt_content_type { @@ -28,6 +33,8 @@ enum _cpt_content_type { CPT_CONTENT_DATA, CPT_CONTENT_NAME, CPT_CONTENT_REF, + CPT_CONTENT_X86_FPUSTATE, + CPT_CONTENT_X86_FPUSTATE_OLD, CPT_CONTENT_MAX }; @@ -60,4 +67,120 @@ struct cpt_object_hdr __u16 cpt_content; /* Content type: array, reference... */ } __attribute__ ((aligned (8))); +struct cpt_task_image { + __u64 cpt_len; + __u32 cpt_hdrlen; + __u16 cpt_type; + __u16 cpt_content; + + __u64 cpt_state; + __u64 cpt_flags; +#define CPT_PF_EXITING 0 +#define CPT_PF_FORKNOEXEC 1 +#define CPT_PF_SUPERPRIV 2 +#define CPT_PF_DUMPCORE 3 +#define CPT_PF_SIGNALED 4 +#define CPT_PF_USED_MATH 5 + + __u64 cpt_thrflags; + __u64 cpt_thrstatus; + __u32 cpt_pid; + __u32 cpt_tgid; + __u32 cpt_ppid; + __u32 cpt_rppid; + __u32 cpt_pgrp; + __u32 cpt_session; + __u32 cpt_old_pgrp; + __u32 cpt_leader; + __u64 cpt_set_tid; + __u64 cpt_clear_tid; + __u32 cpt_exit_code; + __u32 cpt_exit_signal; + __u32 cpt_pdeath_signal; + __u32 cpt_user; + __u32 cpt_uid; + __u32 cpt_euid; + __u32 cpt_suid; + __u32 cpt_fsuid; + __u32 cpt_gid; + __u32 cpt_egid; + __u32 cpt_sgid; + __u32 cpt_fsgid; + __u8 cpt_comm[TASK_COMM_LEN]; + __u64 cpt_tls[GDT_ENTRY_TLS_ENTRIES]; + __u64 cpt_utime; + __u64 cpt_stime; + __u64 cpt_utimescaled; + __u64 cpt_stimescaled; + __u64 cpt_gtime; + __u64 cpt_prev_utime; + __u64 cpt_prev_stime; + __u64 cpt_start_time; + __u64 cpt_real_start_time; + __u64 cpt_nvcsw; + __u64 cpt_nivcsw; + __u64 cpt_min_flt; + __u64 cpt_maj_flt; +} __attribute__ ((aligned (8))); + +struct cpt_obj_bits +{ + __u64 cpt_len; + __u32 cpt_hdrlen; + __u16 cpt_type; + __u16 cpt_content; + + __u32 cpt_size; + __u32 __cpt_pad1; +} __attribute__ ((aligned (8))); + +#define CPT_SEG_ZERO 0 +#define CPT_SEG_TLS1 1 +#define CPT_SEG_TLS2 2 +#define CPT_SEG_TLS3 3 +#define CPT_SEG_USER32_DS 4 +#define CPT_SEG_USER32_CS 5 +#define CPT_SEG_USER64_DS 6 +#define CPT_SEG_USER64_CS 7 +#define CPT_SEG_LDT 256 + +struct cpt_x86_regs +{ + __u64 cpt_len; + __u32 cpt_hdrlen; + __u16 cpt_type; + __u16 cpt_content; + + __u32 cpt_debugreg[8]; + __u32 cpt_gs; + + __u32 cpt_bx; + __u32 cpt_cx; + __u32 cpt_dx; + __u32 cpt_si; + __u32 cpt_di; + __u32 cpt_bp; + __u32 cpt_ax; + __u32 cpt_ds; + __u32 cpt_es; + __u32 cpt_fs; + __u32 cpt_orig_ax; + __u32 cpt_ip; + __u32 cpt_cs; + __u32 cpt_flags; + __u32 cpt_sp; + __u32 cpt_ss; +} __attribute__ ((aligned (8))); + +static inline __u64 cpt_timespec_export(struct timespec *tv) +{ + return (((u64)tv->tv_sec) << 32) + tv->tv_nsec; +} + +static inline void cpt_timespec_import(struct timespec *tv, __u64 val) +{ + tv->tv_sec = val >> 32; + tv->tv_nsec = (val & 0xFFFFFFFF); +} + #endif /* __CPT_IMAGE_H_ */ diff --git a/checkpoint/cpt_process.c b/checkpoint/cpt_process.c new file mode 100644 index 0000000..58f608d --- /dev/null +++ b/checkpoint/cpt_process.c @@ -0,0 +1,236 @@ +/* + * Copyright (C) 2008 Parallels, Inc. + * + * Author: Andrey Mirkin + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + * + */ + +#include +#include +#include +#include +#include + +#include "checkpoint.h" +#include "cpt_image.h" + +static unsigned int encode_task_flags(unsigned int task_flags) +{ + unsigned int flags = 0; + + if (task_flags & PF_EXITING) + flags |= (1 << CPT_PF_EXITING); + if (task_flags & PF_FORKNOEXEC) + flags |= (1 << CPT_PF_FORKNOEXEC); + if (task_flags & PF_SUPERPRIV) + flags |= (1 << CPT_PF_SUPERPRIV); + if (task_flags & PF_DUMPCORE) + flags |= (1 << CPT_PF_DUMPCORE); + if (task_flags & PF_SIGNALED) + flags |= (1 << CPT_PF_SIGNALED); + if (task_flags & PF_USED_MATH) + flags |= (1 << CPT_PF_USED_MATH); + + return flags; + +} + +int cpt_dump_task_struct(struct task_struct *tsk, struct cpt_context *ctx) +{ + struct cpt_task_image *t; + int i; + int err; + + t = kzalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return -ENOMEM; + + t->cpt_len = sizeof(*t); + t->cpt_type = CPT_OBJ_TASK; + t->cpt_hdrlen = sizeof(*t); + t->cpt_content = CPT_CONTENT_ARRAY; + + t->cpt_state = tsk->state; + t->cpt_flags = encode_task_flags(tsk->flags); + t->cpt_exit_code = tsk->exit_code; + t->cpt_exit_signal = tsk->exit_signal; + t->cpt_pdeath_signal = tsk->pdeath_signal; + t->cpt_pid = task_pid_nr_ns(tsk, ctx->nsproxy->pid_ns); + t->cpt_tgid = task_tgid_nr_ns(tsk, ctx->nsproxy->pid_ns); + t->cpt_ppid = tsk->parent ? + task_pid_nr_ns(tsk->parent, ctx->nsproxy->pid_ns) : 0; + t->cpt_rppid = tsk->real_parent ? + task_pid_nr_ns(tsk->real_parent, ctx->nsproxy->pid_ns) : 0; + t->cpt_pgrp = task_pgrp_nr_ns(tsk, ctx->nsproxy->pid_ns); + t->cpt_session = task_session_nr_ns(tsk, ctx->nsproxy->pid_ns); + t->cpt_old_pgrp = 0; + if (tsk->signal->tty_old_pgrp) + t->cpt_old_pgrp = pid_vnr(tsk->signal->tty_old_pgrp); + t->cpt_leader = tsk->group_leader ? task_pid_vnr(tsk->group_leader) : 0; + t->cpt_utime = tsk->utime; + t->cpt_stime = tsk->stime; + t->cpt_utimescaled = tsk->utimescaled; + t->cpt_stimescaled = tsk->stimescaled; + t->cpt_gtime = tsk->gtime; + t->cpt_prev_utime = tsk->prev_utime; + t->cpt_prev_stime = tsk->prev_stime; + t->cpt_nvcsw = tsk->nvcsw; + t->cpt_nivcsw = tsk->nivcsw; + t->cpt_start_time = cpt_timespec_export(&tsk->start_time); + t->cpt_real_start_time = cpt_timespec_export(&tsk->real_start_time); + t->cpt_min_flt = tsk->min_flt; + t->cpt_maj_flt = tsk->maj_flt; + memcpy(t->cpt_comm, tsk->comm, TASK_COMM_LEN); + for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) { + t->cpt_tls[i] = (((u64)tsk->thread.tls_array[i].b) << 32) + + tsk->thread.tls_array[i].a; + } + /* TODO: encode thread flags and status like task flags */ + t->cpt_thrflags = task_thread_info(tsk)->flags & ~(1<cpt_thrstatus = task_thread_info(tsk)->status; + t->cpt_user = tsk->user->uid; + t->cpt_uid = tsk->uid; + t->cpt_euid = tsk->euid; + t->cpt_suid = tsk->suid; + t->cpt_fsuid = tsk->fsuid; + t->cpt_gid = tsk->gid; + t->cpt_egid = tsk->egid; + t->cpt_sgid = tsk->sgid; + t->cpt_fsgid = tsk->fsgid; + + err = ctx->write(t, sizeof(*t), ctx); + + kfree(t); + return err; +} + +static int cpt_dump_fpustate(struct task_struct *tsk, struct cpt_context *ctx) +{ + struct cpt_obj_bits hdr; + int err; + int content; + unsigned long size; + + content = CPT_CONTENT_X86_FPUSTATE; + size = sizeof(struct i387_fxsave_struct); +#ifndef CONFIG_X86_64 + if (!cpu_has_fxsr) { + size = sizeof(struct i387_fsave_struct); + content = CPT_CONTENT_X86_FPUSTATE_OLD; + } +#endif + + hdr.cpt_len = sizeof(hdr) + size; + hdr.cpt_type = CPT_OBJ_BITS; + hdr.cpt_hdrlen = sizeof(hdr); + hdr.cpt_content = content; + hdr.cpt_size = size; + err = ctx->write(&hdr, sizeof(hdr), ctx); + if (!err) + ctx->write(tsk->thread.xstate, size, ctx); + return err; +} + +static u32 encode_segment(u32 segreg) +{ + segreg &= 0xFFFF; + + if (segreg == 0) + return CPT_SEG_ZERO; + if ((segreg & 3) != 3) { + eprintk("Invalid RPL of a segment reg %x\n", segreg); + return CPT_SEG_ZERO; + } + + /* LDT descriptor, it is just an index to LDT array */ + if (segreg & 4) + return CPT_SEG_LDT + (segreg >> 3); + + /* TLS descriptor. */ + if ((segreg >> 3) >= GDT_ENTRY_TLS_MIN && + (segreg >> 3) <= GDT_ENTRY_TLS_MAX) + return CPT_SEG_TLS1 + ((segreg>>3) - GDT_ENTRY_TLS_MIN); + + /* One of standard desriptors */ +#ifdef CONFIG_X86_64 + if (segreg == __USER32_DS) + return CPT_SEG_USER32_DS; + if (segreg == __USER32_CS) + return CPT_SEG_USER32_CS; + if (segreg == __USER_DS) + return CPT_SEG_USER64_DS; + if (segreg == __USER_CS) + return CPT_SEG_USER64_CS; +#else + if (segreg == __USER_DS) + return CPT_SEG_USER32_DS; + if (segreg == __USER_CS) + return CPT_SEG_USER32_CS; +#endif + eprintk("Invalid segment reg %x\n", segreg); + return CPT_SEG_ZERO; +} + +static int cpt_dump_registers(struct task_struct *tsk, struct cpt_context *ctx) +{ + struct cpt_x86_regs ri; + struct pt_regs *pt_regs; + + ri.cpt_len = sizeof(ri); + ri.cpt_type = CPT_OBJ_X86_REGS; + ri.cpt_hdrlen = sizeof(ri); + ri.cpt_content = CPT_CONTENT_VOID; + + ri.cpt_debugreg[0] = tsk->thread.debugreg0; + ri.cpt_debugreg[1] = tsk->thread.debugreg1; + ri.cpt_debugreg[2] = tsk->thread.debugreg2; + ri.cpt_debugreg[3] = tsk->thread.debugreg3; + ri.cpt_debugreg[4] = 0; + ri.cpt_debugreg[5] = 0; + ri.cpt_debugreg[6] = tsk->thread.debugreg6; + ri.cpt_debugreg[7] = tsk->thread.debugreg7; + + pt_regs = task_pt_regs(tsk); + + ri.cpt_fs = encode_segment(pt_regs->fs); + ri.cpt_gs = encode_segment(tsk->thread.gs); + + ri.cpt_bx = pt_regs->bx; + ri.cpt_cx = pt_regs->cx; + ri.cpt_dx = pt_regs->dx; + ri.cpt_si = pt_regs->si; + ri.cpt_di = pt_regs->di; + ri.cpt_bp = pt_regs->bp; + ri.cpt_ax = pt_regs->ax; + ri.cpt_ds = encode_segment(pt_regs->ds); + ri.cpt_es = encode_segment(pt_regs->es); + ri.cpt_orig_ax = pt_regs->orig_ax; + ri.cpt_ip = pt_regs->ip; + ri.cpt_cs = encode_segment(pt_regs->cs); + ri.cpt_flags = pt_regs->flags; + ri.cpt_sp = pt_regs->sp; + ri.cpt_ss = encode_segment(pt_regs->ss); + + return ctx->write(&ri, sizeof(ri), ctx); +} + +int cpt_dump_task(struct task_struct *tsk, struct cpt_context *ctx) +{ + int err; + + err = cpt_dump_task_struct(tsk, ctx); + + /* Dump task mm */ + + if (!err) + cpt_dump_fpustate(tsk, ctx); + if (!err) + cpt_dump_registers(tsk, ctx); + + return err; +} -- 1.5.6 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/