Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752025AbaGKA6p (ORCPT ); Thu, 10 Jul 2014 20:58:45 -0400 Received: from mail4.hitachi.co.jp ([133.145.228.5]:60507 "EHLO mail4.hitachi.co.jp" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751984AbaGKA6m (ORCPT ); Thu, 10 Jul 2014 20:58:42 -0400 X-AuditID: 85900ec0-d272ab9000001514-9e-53bf36bfa92e Subject: [PATCH V4 5/5] trace-cmd/record: Add --virt option for record mode From: Yoshihiro YUNOMAE To: Steven Rostedt Cc: Hidehiro Kawai , Masami Hiramatsu , yrl.pp-manager.tt@hitachi.com, linux-kernel@vger.kernel.org, Aaron Fabbri Date: Fri, 11 Jul 2014 00:58:36 +0000 Message-ID: <20140711005836.25516.63968.stgit@yuno-kbuild.novalocal> In-Reply-To: <20140711005824.25516.24498.stgit@yuno-kbuild.novalocal> References: <20140711005824.25516.24498.stgit@yuno-kbuild.novalocal> User-Agent: StGit/0.17-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-Brightmail-Tracker: AAAAAA== Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Add --virt option for record mode for a virtualization environment. If we use this option on a guest, we can send trace data in low-overhead. This is because guests can send trace data to a host without copying the data by using splice(2). The format is: trace-cmd record --virt -e sched* The client using virtio-serial does not wait for the connection message "tracecmd" from the server. The client sends the connection message MSG_TCONNECT first. This feature can use from kernel-3.6 which supports splice_read for ftrace and splice_write for virtio-serial. Changes in V4: Rebase for current trace-cmd-v2.4 Add usage of --virt for record in trace-usage.c Divide tracecmd_msg_connect_to_server() into two functions (tracecmd_msg_connect_to_server() and tracecmd_msg_send_init_data_virt(fd)) Changes in V3: Change _nw/_NW to _net/_NET Signed-off-by: Yoshihiro YUNOMAE --- Documentation/trace-cmd-record.1.txt | 11 ++++- trace-cmd.h | 4 +- trace-msg.c | 79 +++++++++++++++++++++++++++++++--- trace-msg.h | 4 ++ trace-record.c | 71 ++++++++++++++++++++++++++++--- trace-usage.c | 3 + 6 files changed, 158 insertions(+), 14 deletions(-) diff --git a/Documentation/trace-cmd-record.1.txt b/Documentation/trace-cmd-record.1.txt index 9e63eb4..c0de074 100644 --- a/Documentation/trace-cmd-record.1.txt +++ b/Documentation/trace-cmd-record.1.txt @@ -258,6 +258,15 @@ OPTIONS timestamp to gettimeofday which will allow wall time output from the timestamps reading the created 'trace.dat' file. +*--virt*:: + This option is usded on a guest in a virtualization environment. If a host + is running "trace-cmd virt-server", this option is used to have the data + sent to the host with virtio-serial like *-N* option. (see also + trace-cmd-virt-server(1)) + + Note: This option is not supported with latency tracer plugins: + wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff + EXAMPLES -------- @@ -320,7 +329,7 @@ SEE ALSO -------- trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), -trace-cmd-list(1), trace-cmd-listen(1) +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-virt-server(1) AUTHOR ------ diff --git a/trace-cmd.h b/trace-cmd.h index c4e5beb..1c1b0c3 100644 --- a/trace-cmd.h +++ b/trace-cmd.h @@ -250,7 +250,9 @@ void tracecmd_stat_cpu(struct trace_seq *s, int cpu); long tracecmd_flush_recording(struct tracecmd_recorder *recorder); /* for clients */ -int tracecmd_msg_send_init_data(int fd); +int tracecmd_msg_connect_to_server(int fd); +int tracecmd_msg_send_init_data_net(int fd); +int tracecmd_msg_send_init_data_virt(int fd); int tracecmd_msg_metadata_send(int fd, char *buf, int size); int tracecmd_msg_finish_sending_metadata(int fd); void tracecmd_msg_send_close_msg(void); diff --git a/trace-msg.c b/trace-msg.c index 0d606dc..7ca31d6 100644 --- a/trace-msg.c +++ b/trace-msg.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -72,6 +73,7 @@ int cpu_count; static int psfd; unsigned int page_size; int *client_ports; +int *virt_sfds; bool send_metadata; /* for server */ @@ -272,12 +274,20 @@ static int make_rinit(struct tracecmd_msg *msg) return 0; } +static int make_error_msg(u32 len, struct tracecmd_msg *msg) +{ + bufcpy(msg, TRACECMD_MSG_HDR_LEN, errmsg, len); + return 0; +} + static u32 tracecmd_msg_get_body_length(u32 cmd) { struct tracecmd_msg *msg; u32 len = 0; switch (cmd) { + case MSG_ERROR: + return ntohl(errmsg->size); case MSG_RCONNECT: return sizeof(msg->data.rconnect.str.size) + CONNECTION_MSGSIZE; case MSG_TINIT: @@ -305,6 +315,7 @@ static u32 tracecmd_msg_get_body_length(u32 cmd) + sizeof(msg->data.rinit.port_array); case MSG_SENDMETA: return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN; + case MSG_TCONNECT: case MSG_CLOSE: case MSG_FINMETA: break; @@ -313,15 +324,18 @@ static u32 tracecmd_msg_get_body_length(u32 cmd) return 0; } -static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg) +static int tracecmd_msg_make_body(u32 cmd, u32 len, struct tracecmd_msg *msg) { switch (cmd) { + case MSG_ERROR: + return make_error_msg(len, msg); case MSG_RCONNECT: return make_rconnect(CONNECTION_MSG, CONNECTION_MSGSIZE, msg); case MSG_TINIT: return make_tinit(msg); case MSG_RINIT: return make_rinit(msg); + case MSG_TCONNECT: case MSG_CLOSE: case MSG_SENDMETA: /* meta data is not stored here. */ case MSG_FINMETA: @@ -346,7 +360,7 @@ static int tracecmd_msg_create(u32 cmd, struct tracecmd_msg **msg) if (ret < 0) return ret; - ret = tracecmd_msg_make_body(cmd, *msg); + ret = tracecmd_msg_make_body(cmd, len, *msg); if (ret < 0) free(*msg); @@ -375,6 +389,12 @@ static int tracecmd_msg_send(int fd, u32 cmd) return ret; } +static void tracecmd_msg_send_error(int fd, struct tracecmd_msg *msg) +{ + errmsg = msg; + tracecmd_msg_send(fd, MSG_ERROR); +} + static int tracecmd_msg_read_extra(int fd, void *buf, u32 size, int *n) { int r = 0; @@ -499,9 +519,10 @@ static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_m return 0; } -int tracecmd_msg_send_init_data(int fd) +static int tracecmd_msg_send_init_data(int fd, bool net) { char buf[TRACECMD_MSG_MAX_LEN]; + char path[PATH_MAX]; struct tracecmd_msg *msg; int i, cpus; int ret; @@ -512,9 +533,24 @@ int tracecmd_msg_send_init_data(int fd) return ret; cpus = ntohl(msg->data.rinit.cpus); - client_ports = malloc_or_die(sizeof(int) * cpus); - for (i = 0; i < cpus; i++) - client_ports[i] = ntohl(msg->data.rinit.port_array[i]); + if (net) { + client_ports = malloc_or_die(sizeof(int) * cpus); + for (i = 0; i < cpus; i++) + client_ports[i] = + ntohl(msg->data.rinit.port_array[i]); + } else { + virt_sfds = malloc_or_die(sizeof(int) * cpus); + + /* Open data paths of virtio-serial */ + for (i = 0; i < cpus; i++) { + snprintf(path, PATH_MAX, TRACE_PATH_CPU, i); + virt_sfds[i] = open(path, O_WRONLY); + if (virt_sfds[i] < 0) { + warning("Cannot open %s", TRACE_PATH_CPU, i); + return -errno; + } + } + } /* Next, send meta data */ send_metadata = true; @@ -522,6 +558,37 @@ int tracecmd_msg_send_init_data(int fd) return 0; } +int tracecmd_msg_send_init_data_net(int fd) +{ + return tracecmd_msg_send_init_data(fd, true); +} + +int tracecmd_msg_send_init_data_virt(int fd) +{ + return tracecmd_msg_send_init_data(fd, false); +} + +int tracecmd_msg_connect_to_server(int fd) +{ + char buf[TRACECMD_MSG_MAX_LEN]; + struct tracecmd_msg *msg; + int ret; + + msg = (struct tracecmd_msg *)buf; + /* connect to a server */ + ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TCONNECT, msg); + if (ret < 0) { + if (ret == -EPROTONOSUPPORT) + goto error; + } + + return ret; + +error: + tracecmd_msg_send_error(fd, msg); + return ret; +} + static bool process_option(struct tracecmd_msg_opt *opt) { /* currently the only option we have is to us TCP */ diff --git a/trace-msg.h b/trace-msg.h index b23e72b..502c1bf 100644 --- a/trace-msg.h +++ b/trace-msg.h @@ -2,6 +2,9 @@ #define _TRACE_MSG_H_ #include +#define VIRTIO_PORTS "/dev/virtio-ports/" +#define AGENT_CTL_PATH VIRTIO_PORTS "agent-ctl-path" +#define TRACE_PATH_CPU VIRTIO_PORTS "trace-path-cpu%d" #define UDP_MAX_PACKET (65536 - 20) #define V2_MAGIC "677768\0" @@ -17,6 +20,7 @@ extern int cpu_count; extern unsigned int page_size; extern int *client_ports; extern bool send_metadata; +extern int *virt_sfds; /* for server */ extern bool done; diff --git a/trace-record.c b/trace-record.c index 79ce3a1..e56d294 100644 --- a/trace-record.c +++ b/trace-record.c @@ -77,6 +77,9 @@ static struct tracecmd_output *network_handle; /* Max size to let a per cpu file get */ static int max_kb; +struct tracecmd_output *virt_handle; +static bool virt; + static int do_ptrace; static int filter_task; @@ -1787,6 +1790,9 @@ static int create_recorder(struct buffer_instance *instance, int cpu, int extrac if (client_ports) { connect_port(cpu); recorder = tracecmd_create_recorder_fd(client_ports[cpu], cpu, recorder_flags); + } else if (virt_sfds) { + recorder = tracecmd_create_recorder_fd(virt_sfds[cpu], cpu, + recorder_flags); } else { file = get_temp_file(instance, cpu); recorder = create_recorder_instance(instance, file, cpu); @@ -1822,7 +1828,7 @@ static void check_first_msg_from_server(int fd) die("server not tracecmd server"); } -static void communicate_with_listener_v1(int fd) +static void communicate_with_listener_v1_net(int fd) { char buf[BUFSIZ]; ssize_t n; @@ -1885,9 +1891,9 @@ static void communicate_with_listener_v1(int fd) } } -static void communicate_with_listener_v2(int fd) +static void communicate_with_listener_v2_net(int fd) { - if (tracecmd_msg_send_init_data(fd) < 0) + if (tracecmd_msg_send_init_data_net(fd) < 0) die("Cannot communicate with server"); } @@ -1925,6 +1931,15 @@ static void check_protocol_version(int fd) } } +static void communicate_with_listener_virt(int fd) +{ + if (tracecmd_msg_connect_to_server(fd) < 0) + die("Cannot communicate with server"); + + if (tracecmd_msg_send_init_data_virt(fd) < 0) + die("Cannot send init data"); +} + static void setup_network(void) { struct addrinfo hints; @@ -1980,11 +1995,11 @@ again: close(sfd); goto again; } - communicate_with_listener_v2(sfd); + communicate_with_listener_v2_net(sfd); } if (proto_ver == V1_PROTOCOL) - communicate_with_listener_v1(sfd); + communicate_with_listener_v1_net(sfd); /* Now create the handle through this socket */ network_handle = tracecmd_create_init_fd_glob(sfd, listed_events); @@ -1995,6 +2010,21 @@ again: /* OK, we are all set, let'r rip! */ } +static void setup_virtio(void) +{ + int fd; + + fd = open(AGENT_CTL_PATH, O_RDWR); + if (fd < 0) + die("Cannot open %s", AGENT_CTL_PATH); + + communicate_with_listener_virt(fd); + + /* Now create the handle through this socket */ + virt_handle = tracecmd_create_init_fd_glob(fd, listed_events); + tracecmd_msg_finish_sending_metadata(fd); +} + static void finish_network(void) { if (proto_ver == V2_PROTOCOL) @@ -2003,6 +2033,13 @@ static void finish_network(void) free(host); } +static void finish_virt(void) +{ + tracecmd_msg_send_close_msg(); + free(virt_handle); + free(virt_sfds); +} + static void start_threads(void) { struct buffer_instance *instance; @@ -2010,6 +2047,8 @@ static void start_threads(void) if (host) setup_network(); + else if (virt) + setup_virtio(); /* make a thread for every CPU we have */ pids = malloc_or_die(sizeof(*pids) * cpu_count * (buffers + 1)); @@ -2079,6 +2118,9 @@ static void record_data(char *date2ts) if (host) { finish_network(); return; + } else if (virt) { + finish_virt(); + return; } if (latency) @@ -2732,6 +2774,7 @@ static void record_all_events(void) } enum { + OPT_virt = 252, OPT_nosplice = 253, OPT_funcstack = 254, OPT_date = 255, @@ -2885,6 +2928,7 @@ void trace_record (int argc, char **argv) {"date", no_argument, NULL, OPT_date}, {"func-stack", no_argument, NULL, OPT_funcstack}, {"nosplice", no_argument, NULL, OPT_nosplice}, + {"virt", no_argument, NULL, OPT_virt}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; @@ -3015,6 +3059,8 @@ void trace_record (int argc, char **argv) case 'o': if (host) die("-o incompatible with -N"); + if (virt) + die("-o incompatible with --virt"); if (!record && !extract) die("start does not take output\n" "Did you mean 'record'?"); @@ -3046,6 +3092,8 @@ void trace_record (int argc, char **argv) case 'N': if (!record) die("-N only available with record"); + if (virt) + die("-N incompatible with --virt"); if (output) die("-N incompatible with -o"); host = optarg; @@ -3061,6 +3109,8 @@ void trace_record (int argc, char **argv) instance->cpumask = optarg; break; case 't': + if (virt) + die("-t incompatible with --virt"); use_tcp = 1; break; case 'b': @@ -3085,6 +3135,17 @@ void trace_record (int argc, char **argv) case OPT_nosplice: recorder_flags |= TRACECMD_RECORD_NOSPLICE; break; + case OPT_virt: + if (!record) + die("--virt only available with record"); + if (host) + die("--virt incompatible with -N"); + if (output) + die("--virt incompatible with -o"); + if (use_tcp) + die("--virt incompatible with -t"); + virt = true; + break; default: usage(argv); } diff --git a/trace-usage.c b/trace-usage.c index f96a5ba..45865f0 100644 --- a/trace-usage.c +++ b/trace-usage.c @@ -19,7 +19,7 @@ static struct usage_help usage_help[] = { " %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n" " [-s usecs][-O option ][-l func][-g func][-n func] \\\n" " [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n" - " [-m max]\n" + " [-m max][--virt]\n" " -e run command with event enabled\n" " -f filter for previous -e event\n" " -R trigger for previous -e event\n" @@ -48,6 +48,7 @@ static struct usage_help usage_help[] = { " -i do not fail if an event is not found\n" " --func-stack perform a stack trace for function tracer\n" " (use with caution)\n" + " --virt to connect to virt-server\n" }, { "start", -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/