Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752111AbbEZHCt (ORCPT ); Tue, 26 May 2015 03:02:49 -0400 Received: from [133.145.228.5] ([133.145.228.5]:41526 "EHLO mail4.hitachi.co.jp" rhost-flags-FAIL-FAIL-OK-OK) by vger.kernel.org with ESMTP id S1751434AbbEZG6k (ORCPT ); Tue, 26 May 2015 02:58:40 -0400 X-AuditID: 85900ec0-9e1cab9000001a57-a6-5564197c5d7c Subject: [PATCH trace-cmd V6 5/7] trace-cmd/record: Add --virt option for record mode From: Masami Hiramatsu To: Steven Rostedt Cc: Yoshihiro YUNOMAE , Aaron Fabbri , linux-kernel@vger.kernel.org, cti.systems-productivity-manager.ts@hitachi.com, Divya Vyas , Hidehiro Kawai , yoshihiro.yunomae@aktsk.jp Date: Tue, 26 May 2015 15:55:32 +0900 Message-ID: <20150526065532.16023.53739.stgit@localhost.localdomain> In-Reply-To: <20150526065522.16023.30813.stgit@localhost.localdomain> References: <20150526065522.16023.30813.stgit@localhost.localdomain> User-Agent: StGit/0.17.1-dirty MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: 7bit X-Brightmail-Tracker: AAAAAA== Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 14568 Lines: 494 From: Yoshihiro YUNOMAE Add --virt option for record mode for a virtualization environment. If we use this option on a guest, we can send trace data in low-overhead. This is because guests can send trace data to a host without copying the data by using splice(2). The format is: trace-cmd record --virt -e sched* The client using virtio-serial does not wait for the connection message "tracecmd" from the server. The client sends the connection message MSG_TCONNECT first. This feature can use from kernel-3.6 which supports splice_read for ftrace and splice_write for virtio-serial. Signed-off-by: Yoshihiro YUNOMAE Signed-off-by: Masami Hiramatsu --- Changes in V4: Rebase for current trace-cmd-v2.4 Add usage of --virt for record in trace-usage.c Divide tracecmd_msg_connect_to_server() into two functions (tracecmd_msg_connect_to_server() and tracecmd_msg_send_init_data_virt(fd)) Changes in V3: Change _nw/_NW to _net/_NET --- Documentation/trace-cmd-record.1.txt | 12 +++++ trace-cmd.h | 4 +- trace-msg.c | 79 +++++++++++++++++++++++++++++++--- trace-msg.h | 4 ++ trace-record.c | 71 ++++++++++++++++++++++++++++--- trace-usage.c | 3 + 6 files changed, 160 insertions(+), 13 deletions(-) diff --git a/Documentation/trace-cmd-record.1.txt b/Documentation/trace-cmd-record.1.txt index 2a368fe..7b9981a 100644 --- a/Documentation/trace-cmd-record.1.txt +++ b/Documentation/trace-cmd-record.1.txt @@ -290,6 +290,14 @@ OPTIONS Have output go to stderr instead of stdout, but the output of the command executed will not be changed. This is useful if you want to monitor the output of the command being executed, but not see the output from trace-cmd. +*--virt*:: + This option is usded on a guest in a virtualization environment. If a host + is running "trace-cmd virt-server", this option is used to have the data + sent to the host with virtio-serial like *-N* option. (see also + trace-cmd-virt-server(1)) + + Note: This option is not supported with latency tracer plugins: + wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff EXAMPLES -------- @@ -390,7 +398,11 @@ SEE ALSO -------- trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), +<<<<<<< current trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) +======= +trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-virt-server(1) +>>>>>>> patched AUTHOR ------ diff --git a/trace-cmd.h b/trace-cmd.h index a93920f..fbfe3bf 100644 --- a/trace-cmd.h +++ b/trace-cmd.h @@ -265,7 +265,9 @@ void tracecmd_stat_cpu(struct trace_seq *s, int cpu); long tracecmd_flush_recording(struct tracecmd_recorder *recorder); /* for clients */ -int tracecmd_msg_send_init_data(int fd); +int tracecmd_msg_connect_to_server(int fd); +int tracecmd_msg_send_init_data_net(int fd); +int tracecmd_msg_send_init_data_virt(int fd); int tracecmd_msg_metadata_send(int fd, const char *buf, int size); int tracecmd_msg_finish_sending_metadata(int fd); void tracecmd_msg_send_close_msg(void); diff --git a/trace-msg.c b/trace-msg.c index 717089c..b5173ee 100644 --- a/trace-msg.c +++ b/trace-msg.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -70,6 +71,7 @@ int cpu_count; static int psfd; unsigned int page_size; int *client_ports; +int *virt_sfds; bool send_metadata; /* for server */ @@ -270,12 +272,20 @@ static int make_rinit(struct tracecmd_msg *msg) return 0; } +static int make_error_msg(u32 len, struct tracecmd_msg *msg) +{ + bufcpy(msg, TRACECMD_MSG_HDR_LEN, errmsg, len); + return 0; +} + static u32 tracecmd_msg_get_body_length(u32 cmd) { struct tracecmd_msg *msg; u32 len = 0; switch (cmd) { + case MSG_ERROR: + return ntohl(errmsg->size); case MSG_RCONNECT: return sizeof(msg->data.rconnect.str.size) + sizeof(CONNECT_MSG); @@ -304,6 +314,7 @@ static u32 tracecmd_msg_get_body_length(u32 cmd) + sizeof(msg->data.rinit.port_array); case MSG_SENDMETA: return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN; + case MSG_TCONNECT: case MSG_CLOSE: case MSG_FINMETA: break; @@ -312,15 +323,18 @@ static u32 tracecmd_msg_get_body_length(u32 cmd) return 0; } -static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg) +static int tracecmd_msg_make_body(u32 cmd, u32 len, struct tracecmd_msg *msg) { switch (cmd) { + case MSG_ERROR: + return make_error_msg(len, msg); case MSG_RCONNECT: return make_rconnect(CONNECT_MSG, sizeof(CONNECT_MSG), msg); case MSG_TINIT: return make_tinit(msg); case MSG_RINIT: return make_rinit(msg); + case MSG_TCONNECT: case MSG_CLOSE: case MSG_SENDMETA: /* meta data is not stored here. */ case MSG_FINMETA: @@ -345,7 +359,7 @@ static int tracecmd_msg_create(u32 cmd, struct tracecmd_msg **msg) if (ret < 0) return ret; - ret = tracecmd_msg_make_body(cmd, *msg); + ret = tracecmd_msg_make_body(cmd, len, *msg); if (ret < 0) free(*msg); @@ -374,6 +388,12 @@ static int tracecmd_msg_send(int fd, u32 cmd) return ret; } +static void tracecmd_msg_send_error(int fd, struct tracecmd_msg *msg) +{ + errmsg = msg; + tracecmd_msg_send(fd, MSG_ERROR); +} + static int tracecmd_msg_read_extra(int fd, void *buf, u32 size, int *n) { int r = 0; @@ -498,9 +518,10 @@ static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_m return 0; } -int tracecmd_msg_send_init_data(int fd) +static int tracecmd_msg_send_init_data(int fd, bool net) { char buf[TRACECMD_MSG_MAX_LEN]; + char path[PATH_MAX]; struct tracecmd_msg *msg; int i, cpus; int ret; @@ -511,9 +532,24 @@ int tracecmd_msg_send_init_data(int fd) return ret; cpus = ntohl(msg->data.rinit.cpus); - client_ports = malloc_or_die(sizeof(int) * cpus); - for (i = 0; i < cpus; i++) - client_ports[i] = ntohl(msg->data.rinit.port_array[i]); + if (net) { + client_ports = malloc_or_die(sizeof(int) * cpus); + for (i = 0; i < cpus; i++) + client_ports[i] = + ntohl(msg->data.rinit.port_array[i]); + } else { + virt_sfds = malloc_or_die(sizeof(int) * cpus); + + /* Open data paths of virtio-serial */ + for (i = 0; i < cpus; i++) { + snprintf(path, PATH_MAX, TRACE_PATH_CPU, i); + virt_sfds[i] = open(path, O_WRONLY); + if (virt_sfds[i] < 0) { + warning("Cannot open %s", TRACE_PATH_CPU, i); + return -errno; + } + } + } /* Next, send meta data */ send_metadata = true; @@ -521,6 +557,37 @@ int tracecmd_msg_send_init_data(int fd) return 0; } +int tracecmd_msg_send_init_data_net(int fd) +{ + return tracecmd_msg_send_init_data(fd, true); +} + +int tracecmd_msg_send_init_data_virt(int fd) +{ + return tracecmd_msg_send_init_data(fd, false); +} + +int tracecmd_msg_connect_to_server(int fd) +{ + char buf[TRACECMD_MSG_MAX_LEN]; + struct tracecmd_msg *msg; + int ret; + + msg = (struct tracecmd_msg *)buf; + /* connect to a server */ + ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TCONNECT, msg); + if (ret < 0) { + if (ret == -EPROTONOSUPPORT) + goto error; + } + + return ret; + +error: + tracecmd_msg_send_error(fd, msg); + return ret; +} + static bool process_option(struct tracecmd_msg_opt *opt) { /* currently the only option we have is to us TCP */ diff --git a/trace-msg.h b/trace-msg.h index b23e72b..502c1bf 100644 --- a/trace-msg.h +++ b/trace-msg.h @@ -2,6 +2,9 @@ #define _TRACE_MSG_H_ #include +#define VIRTIO_PORTS "/dev/virtio-ports/" +#define AGENT_CTL_PATH VIRTIO_PORTS "agent-ctl-path" +#define TRACE_PATH_CPU VIRTIO_PORTS "trace-path-cpu%d" #define UDP_MAX_PACKET (65536 - 20) #define V2_MAGIC "677768\0" @@ -17,6 +20,7 @@ extern int cpu_count; extern unsigned int page_size; extern int *client_ports; extern bool send_metadata; +extern int *virt_sfds; /* for server */ extern bool done; diff --git a/trace-record.c b/trace-record.c index 89f4883..8b8f6db 100644 --- a/trace-record.c +++ b/trace-record.c @@ -95,6 +95,9 @@ static struct tracecmd_output *network_handle; /* Max size to let a per cpu file get */ static int max_kb; +struct tracecmd_output *virt_handle; +static bool virt; + static int do_ptrace; static int filter_task; @@ -2341,6 +2344,9 @@ static int create_recorder(struct buffer_instance *instance, int cpu, if (client_ports) { connect_port(cpu); recorder = tracecmd_create_recorder_fd(client_ports[cpu], cpu, recorder_flags); + } else if (virt_sfds) { + recorder = tracecmd_create_recorder_fd(virt_sfds[cpu], cpu, + recorder_flags); } else { file = get_temp_file(instance, cpu); recorder = create_recorder_instance(instance, file, cpu, brass); @@ -2376,7 +2382,7 @@ static void check_first_msg_from_server(int fd) die("server not tracecmd server"); } -static void communicate_with_listener_v1(int fd) +static void communicate_with_listener_v1_net(int fd) { char buf[BUFSIZ]; ssize_t n; @@ -2439,9 +2445,9 @@ static void communicate_with_listener_v1(int fd) } } -static void communicate_with_listener_v2(int fd) +static void communicate_with_listener_v2_net(int fd) { - if (tracecmd_msg_send_init_data(fd) < 0) + if (tracecmd_msg_send_init_data_net(fd) < 0) die("Cannot communicate with server"); } @@ -2485,6 +2491,15 @@ static void check_protocol_version(int fd) } } +static void communicate_with_listener_virt(int fd) +{ + if (tracecmd_msg_connect_to_server(fd) < 0) + die("Cannot communicate with server"); + + if (tracecmd_msg_send_init_data_virt(fd) < 0) + die("Cannot send init data"); +} + static void setup_network(void) { struct addrinfo hints; @@ -2540,11 +2555,11 @@ again: close(sfd); goto again; } - communicate_with_listener_v2(sfd); + communicate_with_listener_v2_net(sfd); } if (proto_ver == V1_PROTOCOL) - communicate_with_listener_v1(sfd); + communicate_with_listener_v1_net(sfd); /* Now create the handle through this socket */ network_handle = tracecmd_create_init_fd_glob(sfd, listed_events); @@ -2555,6 +2570,21 @@ again: /* OK, we are all set, let'r rip! */ } +static void setup_virtio(void) +{ + int fd; + + fd = open(AGENT_CTL_PATH, O_RDWR); + if (fd < 0) + die("Cannot open %s", AGENT_CTL_PATH); + + communicate_with_listener_virt(fd); + + /* Now create the handle through this socket */ + virt_handle = tracecmd_create_init_fd_glob(fd, listed_events); + tracecmd_msg_finish_sending_metadata(fd); +} + static void finish_network(void) { if (proto_ver == V2_PROTOCOL) @@ -2563,6 +2593,13 @@ static void finish_network(void) free(host); } +static void finish_virt(void) +{ + tracecmd_msg_send_close_msg(); + free(virt_handle); + free(virt_sfds); +} + static void start_threads(enum trace_type type, int global) { int profile = (type & TRACE_TYPE_PROFILE) == TRACE_TYPE_PROFILE; @@ -2573,6 +2610,8 @@ static void start_threads(enum trace_type type, int global) if (host) setup_network(); + else if (virt) + setup_virtio(); /* make a thread for every CPU we have */ pids = malloc_or_die(sizeof(*pids) * cpu_count * (buffers + 1)); @@ -2707,6 +2746,9 @@ static void record_data(char *date2ts) if (host) { finish_network(); return; + } else if (virt) { + finish_virt(); + return; } if (latency) @@ -3774,6 +3816,7 @@ static void add_hook(struct buffer_instance *instance, const char *arg) } enum { + OPT_virt = 250, OPT_stderr = 251, OPT_profile = 252, OPT_nosplice = 253, @@ -3942,6 +3985,7 @@ void trace_record (int argc, char **argv) {"nosplice", no_argument, NULL, OPT_nosplice}, {"profile", no_argument, NULL, OPT_profile}, {"stderr", no_argument, NULL, OPT_stderr}, + {"virt", no_argument, NULL, OPT_virt}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; @@ -4061,6 +4105,8 @@ void trace_record (int argc, char **argv) case 'o': if (host) die("-o incompatible with -N"); + if (virt) + die("-o incompatible with --virt"); if (start) die("start does not take output\n" "Did you mean 'record'?"); @@ -4120,6 +4166,8 @@ void trace_record (int argc, char **argv) case 'N': if (!record && !extract) die("-N only available with record or extract"); + if (virt) + die("-N incompatible with --virt"); if (output) die("-N incompatible with -o"); host = optarg; @@ -4135,6 +4183,8 @@ void trace_record (int argc, char **argv) instance->cpumask = optarg; break; case 't': + if (virt) + die("-t incompatible with --virt"); use_tcp = 1; break; case 'b': @@ -4173,6 +4223,17 @@ void trace_record (int argc, char **argv) close(1); dup2(2, 1); break; + case OPT_virt: + if (!record) + die("--virt only available with record"); + if (host) + die("--virt incompatible with -N"); + if (output) + die("--virt incompatible with -o"); + if (use_tcp) + die("--virt incompatible with -t"); + virt = true; + break; default: usage(argv); } diff --git a/trace-usage.c b/trace-usage.c index 3d9b821..23cb124 100644 --- a/trace-usage.c +++ b/trace-usage.c @@ -19,7 +19,7 @@ static struct usage_help usage_help[] = { " %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n" " [-s usecs][-O option ][-l func][-g func][-n func] \\\n" " [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n" - " [-m max][-C clock]\n" + " [-m max][-C clock][--virt]\n" " -e run command with event enabled\n" " -f filter for previous -e event\n" " -R trigger for previous -e event\n" @@ -51,6 +51,7 @@ static struct usage_help usage_help[] = { " --profile enable tracing options needed for report --profile\n" " --func-stack perform a stack trace for function tracer\n" " (use with caution)\n" + " --virt to connect to virt-server\n" }, { "start", -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/