Hi,
This is the version 6 series of virtio-trace for trace-cmd.
The previous series is here; https://lkml.org/lkml/2014/12/22/79
I took over this work from Yoshihiro Yunomae. This version updates
previous patch on the latest trace-cmd tree and recover Yoshihiro's
signed-off and original authorship.
How to use
==========
1. Run virt-server on a host
# trace-cmd virt-server --dom guest1 -c 2
2. Set up of virtio-serial pipe of guest1 on the host
Add the following tags to domain XML files.
# virsh edit guest1
<channel type='unix'>
<source mode='connect' path='/tmp/trace-cmd/virt/agent-ctl-path'/>
<target type='virtio' name='agent-ctl-path'/>
</channel>
<channel type='pipe'>
<source path='/tmp/trace-cmd/virt/guest1/trace-path-cpu0'/>
<target type='virtio' name='trace-path-cpu0'/>
</channel>
<channel type='pipe'>
<source path='/tmp/trace-cmd/virt/guest1/trace-path-cpu1'/>
<target type='virtio' name='trace-path-cpu1'/>
</channel>
3. Boot the guest
# virsh start guest1
4. Run the guest1's client(see trace-cmd-record(1) with the *--virt* option)
# trace-cmd record -e sched* --virt
If you want to boot another guest sends trace-data via virtio-serial,
you will manually make the guest domain directory and trace data I/Fs.
- Make guest domain directory on the host
# mkdir -p /tmp/trace-cmd/virt/<DOMAIN>
# chmod 710 /tmp/trace-cmd/virt/<DOMAIN>
# chgrp qemu /tmp/trace-cmd/virt/<DOMAIN>
- Make FIFO on the host
# mkfifo /tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu{0,1,...,X}.{in,out}
TODO
====
- Don't use fixed directory and fifos. Make it flexible.
- Don't depend on the libvirt. We can find fifos in /proc/<pid>/fd/*.
- Cleanup the code. It is not well structured now.
Thank you,
---
Yoshihiro YUNOMAE (7):
trace-cmd: Support -N option for trace-cmd extract
trace-cmd/listen: Introduce trace-msg protocol (protocol v2)
trace-cmd/msg: Use poll(2) to wait for a message
trace-cmd/virt-server: Add virt-server mode for a virtualization environment
trace-cmd/record: Add --virt option for record mode
trace-cmd/virt-server: Add --dom option which makes a domain directory to virt-server
trace-cmd: Use pid instead of libvirt virt domain name
Documentation/Protocol.txt | 163 +++++
Documentation/trace-cmd-record.1.txt | 12
Documentation/trace-cmd-virt-server.1.txt | 113 ++++
Makefile | 3
trace-cmd.c | 3
trace-cmd.h | 15 +
trace-listen.c | 687 +++++++++++++++++++----
trace-msg.c | 870 +++++++++++++++++++++++++++++
trace-msg.h | 31 +
trace-output.c | 4
trace-record.c | 158 +++++
trace-recorder.c | 50 +-
trace-usage.c | 18 +
13 files changed, 1991 insertions(+), 136 deletions(-)
create mode 100644 Documentation/Protocol.txt
create mode 100644 Documentation/trace-cmd-virt-server.1.txt
create mode 100644 trace-msg.c
create mode 100644 trace-msg.h
--
Masami HIRAMATSU
Linux Technology Research Center, System Productivity Research Dept.
Center for Technology Innovation - Systems Engineering
Hitachi, Ltd., Research & Development Group
E-mail: [email protected]
From: Yoshihiro YUNOMAE <[email protected]>
There is no reason to prohibit supporting -N command in extract
mode, since both record and extract read trace logs from ftrace
and save it.
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
trace-record.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/trace-record.c b/trace-record.c
index 9874a12..c387aff 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -520,6 +520,7 @@ static void stop_threads(enum trace_type type)
static int create_recorder(struct buffer_instance *instance, int cpu,
enum trace_type type, int *brass);
+static void setup_network(void);
static void flush_threads(void)
{
@@ -529,6 +530,9 @@ static void flush_threads(void)
if (!cpu_count)
return;
+ if (host)
+ setup_network();
+
for (i = 0; i < cpu_count; i++) {
/* Extract doesn't support sub buffers yet */
ret = create_recorder(&top_instance, i, TRACE_TYPE_EXTRACT, NULL);
@@ -4047,8 +4051,8 @@ void trace_record (int argc, char **argv)
rt_prio = atoi(optarg);
break;
case 'N':
- if (!record)
- die("-N only available with record");
+ if (!record && !extract)
+ die("-N only available with record or extract");
if (output)
die("-N incompatible with -o");
host = optarg;
From: Yoshihiro YUNOMAE <[email protected]>
Introduce new trace-msg protocol (protocol V2) for more
flexible messaging. V1 protocol which is currently used
by trace-cmd server and client, is based on a simple
text messages. It is impossible to extend the protocol
without breaking backward compatibility. The V2 protocol
introduced by this patch is a binary message-based protocol
and it is able to extend by just adding message tags.
<How to test>
[1] Backward compatibility checks
We need to test backward compatibility of this patch for old
trace-cmds(client/server). So, this patch was tested for [2]
command checks in following 3 types:
<client> <server>
new old
old new
new new
[2] Command checks
- server (common)
# trace-cmd listen -p 12345
1) record
- client
# trace-cmd record -e sched -N <server IP>:12345
^C
2) record + multiple buffers
- client
# trace-cmd record -B foo -e sched -N <server IP>:12345
^C
3) extract
- client
# ./trace-cmd start -e sched
# sleep 5
# ./trace-cmd stop
# ./trace-cmd extract -N <server IP>:12345
4) extract + snapshot
- client
# ./trace-cmd start -e sched
# sleep 5
# ./trace-cmd snapshot -s
# ./trace-cmd stop
# ./trace-cmd extract -N <server IP>:12345 -s
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in v6: Update to the latest master.
Fix build errors.
Changes in V5: Client sends "-1V2\0<MAGIC_NUMBER>\00" instead of
"V2\0<MAGIC_NUMBER>\0" not to make old server create
zero length file.
Also add a protocol documentation.
Cleanup source code.
Change meaningless loop in tracecmd_msg_collect_metadata().
Changes in V4: Fix some typos, cleanups and rebase for current trace-cmd-v2.4
Change the argument of tracecmd_msg_recv()
Changes in V3: Change the license of trace-msg.c to LGPL v2.1
Changes in V2: Regacy protocol support in order to keep backward compatibility
---
Documentation/Protocol.txt | 119 ++++++++
Makefile | 3
trace-cmd.h | 11 +
trace-listen.c | 84 ++++-
trace-msg.c | 682 ++++++++++++++++++++++++++++++++++++++++++++
trace-msg.h | 27 ++
trace-output.c | 4
trace-record.c | 89 +++++-
8 files changed, 982 insertions(+), 37 deletions(-)
create mode 100644 Documentation/Protocol.txt
create mode 100644 trace-msg.c
create mode 100644 trace-msg.h
diff --git a/Documentation/Protocol.txt b/Documentation/Protocol.txt
new file mode 100644
index 0000000..49f7766
--- /dev/null
+++ b/Documentation/Protocol.txt
@@ -0,0 +1,119 @@
+Trace-cmd Protocols
+===================
+
+Index
+=====
+1. What is the trace-cmd protocol?
+2. Trace-cmd Protocol V1 (Obsolete)
+3. Trace-cmd Protocol V2
+
+
+1. What is the trace-cmd protocol?
+==================================
+ Trace-cmd can run as a remote-trace agent(server) and a client, which
+communicate over network and passing the trace data. Trace-cmd protocol
+is used for the communication between the server and the client.
+ There are 2 versions of the trace-cmd protocol. V1 protocol was simple
+text-based but hard to extend. On the other hand, V2 protocol is message
+based and extensible.
+
+
+2. Trace-cmd Protocol V1 (Obsolete)
+========================
+
+The old trace-cmd which supports V1 protocol works as follows;
+
+ <server(local)> <client(remote)>
+ listen to socket fd
+ connect to socket fd
+ accept the client
+ send "tracecmd"
+ +------------> receive "tracecmd"
+ check "tracecmd"
+ send cpus
+ receive cpus <------------+
+ print "cpus=XXX"
+ send pagesize
+ |
+ receive pagesize <--------+
+ print "pagesize=XXX"
+ send options
+ |
+ receive options <---------+
+ understand options
+ send port_array
+ +------------> receive port_array
+ understand port_array
+ send meta data
+ receive meta data <-------+
+ record meta data
+ (snip)
+ read block
+ --- start sending trace data on child processes ---
+
+ --- When client finishes sending trace data ---
+ close(socket fd)
+ read size = 0
+ close(socket fd)
+
+All messages are unstructured character strings and the messaging
+order and contents are fixed. It is impossible to extend the
+protocol without breaking the compatibility.
+
+
+3. Trace-cmd Protocol V2
+========================
+
+From the protocol V2, the structured binary message "trace-msg" is
+introduced as the communication protocol.
+
+ <server> <client>
+ listen to socket fd
+ connect to socket fd
+ accept the client
+ send "tracecmd"
+ +------------> receive "tracecmd"
+ check "tracecmd"
+ send "-1V2\0<MAGIC_NUMBER>\0" as the v2 protocol
+ receive "-1V2" <----------+
+ check "-1V2"
+ check <MAGIC_NUMBER>
+ send "V2"
+ +---------------> receive "V2"
+ check "V2"
+ send MSG_TINIT with cpus, pagesize and options
+ receive MSG_TINIT <-------+
+ perse the parameters
+ send MSG_RINIT with port_array
+ +----------------> receive MSG_RINIT
+ get port_array
+ send meta data(MSG_SENDMETA)
+ receive MSG_SENDMETA <----+
+ record meta data
+ (snip)
+ send a message to finish sending meta data
+ | (MSG_FINMETA)
+ receive MSG_FINMETA <-----+
+ read block
+ --- start sending trace data on child processes ---
+
+ --- When client finishes sending trace data ---
+ send MSG_CLOSE
+ receive MSG_CLOSE <-------+
+ close(socket fd) close(socket fd)
+
+In this version, after the client checks "tracecmd", it sends
+"-1V2\0<MAGIC_NUMBER>\0". This is for the backward compatibility.
+When the newer client tries to connect to the old server and sends
+this string to the server, the old server parses it to get the
+number of CPUs. Since "-1V2" actually becomes -1 and this is a
+wrong value, the server refuses the client. Then, the client gets
+a connection error because the server is old, so it can try to
+connect with V1 protocol again.
+
+On the other hand, if new server gets a connection from an old
+client, it can easily check whether the client uses V1 protocol
+or not by checking the first message from the client. If client
+sends a positive number, it should be a V1 protocol client.
+
+
diff --git a/Makefile b/Makefile
index 63f7e79..59a5a0c 100644
--- a/Makefile
+++ b/Makefile
@@ -320,7 +320,8 @@ PEVENT_LIB_OBJS = event-parse.o trace-seq.o parse-filter.o parse-utils.o
TCMD_LIB_OBJS = $(PEVENT_LIB_OBJS) trace-util.o trace-input.o trace-ftrace.o \
trace-output.o trace-record.o trace-recorder.o \
trace-restore.o trace-usage.o trace-blk-hack.o \
- kbuffer-parse.o event-plugin.o trace-hooks.o
+ kbuffer-parse.o event-plugin.o trace-hooks.o \
+ trace-msg.o
PLUGIN_OBJS =
PLUGIN_OBJS += plugin_jbd2.o
diff --git a/trace-cmd.h b/trace-cmd.h
index 7bce2a5..1261e23 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -263,6 +263,17 @@ void tracecmd_stop_recording(struct tracecmd_recorder *recorder);
void tracecmd_stat_cpu(struct trace_seq *s, int cpu);
long tracecmd_flush_recording(struct tracecmd_recorder *recorder);
+/* for clients */
+int tracecmd_msg_send_init_data(int fd);
+int tracecmd_msg_metadata_send(int fd, const char *buf, int size);
+int tracecmd_msg_finish_sending_metadata(int fd);
+void tracecmd_msg_send_close_msg(void);
+
+/* for server */
+int tracecmd_msg_initial_setting(int fd, int *cpus, int *pagesize);
+int tracecmd_msg_send_port_array(int fd, int total_cpus, int *ports);
+int tracecmd_msg_collect_metadata(int ifd, int ofd);
+
/* --- Plugin handling --- */
extern struct pevent_plugin_option trace_ftrace_options[];
diff --git a/trace-listen.c b/trace-listen.c
index 18672b0..17ab184 100644
--- a/trace-listen.c
+++ b/trace-listen.c
@@ -33,6 +33,7 @@
#include <errno.h>
#include "trace-local.h"
+#include "trace-msg.h"
#define MAX_OPTION_SIZE 4096
@@ -45,10 +46,10 @@ static FILE *logfp;
static int debug;
-static int use_tcp;
-
static int backlog = 5;
+static int proto_ver;
+
#define TEMP_FILE_STR "%s.%s:%s.cpu%d", output_file, host, port, cpu
static char *get_temp_file(const char *host, const char *port, int cpu)
{
@@ -112,7 +113,6 @@ static int process_option(char *option)
return 0;
}
-static int done;
static void finish(int sig)
{
done = 1;
@@ -144,7 +144,7 @@ static void __plog(const char *prefix, const char *fmt, va_list ap,
fprintf(fp, "%.*s", r, buf);
}
-static void plog(const char *fmt, ...)
+void plog(const char *fmt, ...)
{
va_list ap;
@@ -153,7 +153,7 @@ static void plog(const char *fmt, ...)
va_end(ap);
}
-static void pdie(const char *fmt, ...)
+void pdie(const char *fmt, ...)
{
va_list ap;
char *str = "";
@@ -305,25 +305,15 @@ static int open_udp(const char *node, const char *port, int *pid,
return num_port;
}
-static int communicate_with_client(int fd, int *cpus, int *pagesize)
+/* Setup client who is using the v1 protocol */
+static int client_initial_setting(int fd, char *buf, int *cpus, int *pagesize)
{
- char buf[BUFSIZ];
char *option;
int options;
int size;
int n, s, t, i;
- /* Let the client know what we are */
- write(fd, "tracecmd", 8);
-
- /* read back the CPU count */
- n = read_string(fd, buf, BUFSIZ);
- if (n == BUFSIZ)
- /** ERROR **/
- return -1;
-
*cpus = atoi(buf);
-
plog("cpus=%d\n", *cpus);
if (*cpus < 0)
return -1;
@@ -376,6 +366,41 @@ static int communicate_with_client(int fd, int *cpus, int *pagesize)
return -1;
}
+ return 0;
+}
+
+static int communicate_with_client(int fd, int *cpus, int *pagesize)
+{
+ char buf[BUFSIZ];
+ int n;
+
+ /* Let the client know what we are */
+ write(fd, "tracecmd", 8);
+
+ /* read back the CPU count */
+ n = read_string(fd, buf, BUFSIZ);
+ if (n == BUFSIZ)
+ /** ERROR **/
+ return -1;
+
+ /* Is the client using the new protocol? */
+ if (memcmp(buf, "-1V2", 4) == 0) {
+ read(fd, buf, sizeof(V2_MAGIC));
+ if (memcmp(buf, V2_MAGIC, strlen(V2_MAGIC)) != 0) {
+ plog("Invalid magic number %s", buf);
+ return -1;
+ }
+ proto_ver = V2_PROTOCOL;
+
+ /* Let the client know we use v2 protocol */
+ write(fd, "V2", 2);
+
+ /* read the CPU count, the page size, and options */
+ if (tracecmd_msg_initial_setting(fd, cpus, pagesize) < 0)
+ return -1;
+ } else if (client_initial_setting(fd, buf, cpus, pagesize) < 0)
+ return -1;
+
if (use_tcp)
plog("Using TCP for live connection\n");
@@ -442,14 +467,20 @@ static int *create_all_readers(int cpus, const char *node, const char *port,
start_port = udp_port + 1;
}
- /* send the client a comma deliminated set of port numbers */
- for (cpu = 0; cpu < cpus; cpu++) {
- snprintf(buf, BUFSIZ, "%s%d",
- cpu ? "," : "", port_array[cpu]);
- write(fd, buf, strlen(buf));
+ if (proto_ver == V2_PROTOCOL) {
+ /* send set of port numbers to the client */
+ if (tracecmd_msg_send_port_array(fd, cpus, port_array) < 0)
+ goto out_free;
+ } else {
+ /* send the client a comma deliminated set of port numbers */
+ for (cpu = 0; cpu < cpus; cpu++) {
+ snprintf(buf, BUFSIZ, "%s%d",
+ cpu ? "," : "", port_array[cpu]);
+ write(fd, buf, strlen(buf));
+ }
+ /* end with null terminator */
+ write(fd, "\0", 1);
}
- /* end with null terminator */
- write(fd, "\0", 1);
return pid_array;
@@ -528,7 +559,10 @@ static void process_client(const char *node, const char *port, int fd)
return;
/* Now we are ready to start reading data from the client */
- collect_metadata_from_client(fd, ofd);
+ if (proto_ver == V2_PROTOCOL)
+ tracecmd_msg_collect_metadata(fd, ofd);
+ else
+ collect_metadata_from_client(fd, ofd);
/* wait a little to let our readers finish reading */
sleep(1);
diff --git a/trace-msg.c b/trace-msg.c
new file mode 100644
index 0000000..5669dee
--- /dev/null
+++ b/trace-msg.c
@@ -0,0 +1,682 @@
+/*
+ * trace-msg.c : define message protocol for communication between clients and
+ * a server
+ *
+ * Copyright (C) 2013 Hitachi, Ltd.
+ * Created by Yoshihiro YUNOMAE <[email protected]>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <errno.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <linux/types.h>
+
+#include "trace-cmd-local.h"
+#include "trace-msg.h"
+
+typedef __u32 u32;
+typedef __be32 be32;
+
+#define TRACECMD_MSG_MAX_LEN BUFSIZ
+
+ /* size + cmd */
+#define TRACECMD_MSG_HDR_LEN ((sizeof(be32)) + (sizeof(be32)))
+
+ /* + size of the metadata */
+#define TRACECMD_MSG_META_MIN_LEN \
+ ((TRACECMD_MSG_HDR_LEN) + (sizeof(be32)))
+
+ /* - header size for error msg */
+#define TRACECMD_MSG_META_MAX_LEN \
+((TRACECMD_MSG_MAX_LEN) - (TRACECMD_MSG_META_MIN_LEN) - TRACECMD_MSG_HDR_LEN)
+
+ /* size + opt_cmd + size of str */
+#define TRACECMD_OPT_MIN_LEN \
+ ((sizeof(be32)) + (sizeof(be32)) + (sizeof(be32)))
+
+
+#define CPU_MAX 256
+
+/* for both client and server */
+bool use_tcp;
+int cpu_count;
+
+/* for client */
+static int psfd;
+unsigned int page_size;
+int *client_ports;
+bool send_metadata;
+
+/* for server */
+static int *port_array;
+bool done;
+
+struct tracecmd_msg_str {
+ be32 size;
+ char *buf;
+} __attribute__((packed));
+
+struct tracecmd_msg_opt {
+ be32 size;
+ be32 opt_cmd;
+ struct tracecmd_msg_str str;
+};
+
+struct tracecmd_msg_tinit {
+ be32 cpus;
+ be32 page_size;
+ be32 opt_num;
+ struct tracecmd_msg_opt *opt;
+} __attribute__((packed));
+
+struct tracecmd_msg_rinit {
+ be32 cpus;
+ be32 port_array[CPU_MAX];
+} __attribute__((packed));
+
+struct tracecmd_msg_meta {
+ struct tracecmd_msg_str str;
+};
+
+struct tracecmd_msg_error {
+ be32 size;
+ be32 cmd;
+ union {
+ struct tracecmd_msg_tinit tinit;
+ struct tracecmd_msg_rinit rinit;
+ struct tracecmd_msg_meta meta;
+ } data;
+} __attribute__((packed));
+
+enum tracecmd_msg_cmd {
+ MSG_CLOSE = 1,
+ MSG_TINIT = 4,
+ MSG_RINIT = 5,
+ MSG_SENDMETA = 6,
+ MSG_FINMETA = 7,
+};
+
+struct tracecmd_msg {
+ be32 size;
+ be32 cmd;
+ union {
+ struct tracecmd_msg_tinit tinit;
+ struct tracecmd_msg_rinit rinit;
+ struct tracecmd_msg_meta meta;
+ struct tracecmd_msg_error err;
+ } data;
+} __attribute__((packed));
+
+struct tracecmd_msg *errmsg;
+
+static ssize_t msg_do_write_check(int fd, struct tracecmd_msg *msg)
+{
+ return __do_write_check(fd, msg, ntohl(msg->size));
+}
+
+static void tracecmd_msg_init(u32 cmd, u32 len, struct tracecmd_msg *msg)
+{
+ memset(msg, 0, len);
+ msg->size = htonl(len);
+ msg->cmd = htonl(cmd);
+}
+
+static int tracecmd_msg_alloc(u32 cmd, u32 len, struct tracecmd_msg **msg)
+{
+ len += TRACECMD_MSG_HDR_LEN;
+ *msg = malloc(len);
+ if (!*msg)
+ return -ENOMEM;
+
+ tracecmd_msg_init(cmd, len, *msg);
+ return 0;
+}
+
+static void bufcpy(void *dest, u32 offset, const void *buf, u32 buflen)
+{
+ memcpy(dest+offset, buf, buflen);
+}
+
+enum msg_opt_command {
+ MSGOPT_USETCP = 1,
+};
+
+static int add_option_to_tinit(u32 cmd, const char *buf,
+ struct tracecmd_msg *msg, int offset)
+{
+ struct tracecmd_msg_opt *opt;
+ u32 len = TRACECMD_OPT_MIN_LEN;
+ u32 buflen = 0;
+
+ if (buf) {
+ buflen = strlen(buf);
+ len += buflen;
+ }
+
+ opt = malloc(len);
+ if (!opt)
+ return -ENOMEM;
+
+ opt->size = htonl(len);
+ opt->opt_cmd = htonl(cmd);
+ opt->str.size = htonl(buflen);
+
+ if (buf)
+ bufcpy(opt, TRACECMD_OPT_MIN_LEN, buf, buflen);
+
+ /* add option to msg */
+ bufcpy(msg, offset, opt, ntohl(opt->size));
+
+ free(opt);
+ return len;
+}
+
+static int add_options_to_tinit(struct tracecmd_msg *msg)
+{
+ int offset = offsetof(struct tracecmd_msg, data.tinit.opt);
+ int ret;
+
+ if (use_tcp) {
+ ret = add_option_to_tinit(MSGOPT_USETCP, NULL, msg, offset);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int make_tinit(struct tracecmd_msg *msg)
+{
+ int opt_num = 0;
+ int ret = 0;
+
+ if (use_tcp)
+ opt_num++;
+
+ if (opt_num) {
+ ret = add_options_to_tinit(msg);
+ if (ret < 0)
+ return ret;
+ }
+
+ msg->data.tinit.cpus = htonl(cpu_count);
+ msg->data.tinit.page_size = htonl(page_size);
+ msg->data.tinit.opt_num = htonl(opt_num);
+
+ return 0;
+}
+
+static int make_rinit(struct tracecmd_msg *msg)
+{
+ int i;
+ u32 offset = TRACECMD_MSG_HDR_LEN;
+ be32 port;
+
+ msg->data.rinit.cpus = htonl(cpu_count);
+
+ for (i = 0; i < cpu_count; i++) {
+ /* + rrqports->cpus or rrqports->port_array[i] */
+ offset += sizeof(be32);
+ port = htonl(port_array[i]);
+ bufcpy(msg, offset, &port, sizeof(be32) * cpu_count);
+ }
+
+ return 0;
+}
+
+static u32 tracecmd_msg_get_body_length(u32 cmd)
+{
+ struct tracecmd_msg *msg;
+ u32 len = 0;
+
+ switch (cmd) {
+ case MSG_TINIT:
+ len = sizeof(msg->data.tinit.cpus)
+ + sizeof(msg->data.tinit.page_size)
+ + sizeof(msg->data.tinit.opt_num);
+
+ /*
+ * If we are using IPV4 and our page size is greater than
+ * or equal to 64K, we need to punt and use TCP. :-(
+ */
+
+ /* TODO, test for ipv4 */
+ if (page_size >= UDP_MAX_PACKET) {
+ warning("page size too big for UDP using TCP in live read");
+ use_tcp = true;
+ }
+
+ if (use_tcp)
+ len += TRACECMD_OPT_MIN_LEN;
+
+ return len;
+ case MSG_RINIT:
+ return sizeof(msg->data.rinit.cpus)
+ + sizeof(msg->data.rinit.port_array);
+ case MSG_SENDMETA:
+ return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN;
+ case MSG_CLOSE:
+ case MSG_FINMETA:
+ break;
+ }
+
+ return 0;
+}
+
+static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg)
+{
+ switch (cmd) {
+ case MSG_TINIT:
+ return make_tinit(msg);
+ case MSG_RINIT:
+ return make_rinit(msg);
+ case MSG_CLOSE:
+ case MSG_SENDMETA: /* meta data is not stored here. */
+ case MSG_FINMETA:
+ break;
+ }
+
+ return 0;
+}
+
+static int tracecmd_msg_create(u32 cmd, struct tracecmd_msg **msg)
+{
+ u32 len = 0;
+ int ret = 0;
+
+ len = tracecmd_msg_get_body_length(cmd);
+ if (len > (TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN)) {
+ plog("Exceed maximum message size cmd=%d\n", cmd);
+ return -EINVAL;
+ }
+
+ ret = tracecmd_msg_alloc(cmd, len, msg);
+ if (ret < 0)
+ return ret;
+
+ ret = tracecmd_msg_make_body(cmd, *msg);
+ if (ret < 0)
+ free(*msg);
+
+ return ret;
+}
+
+static int tracecmd_msg_send(int fd, u32 cmd)
+{
+ struct tracecmd_msg *msg = NULL;
+ int ret = 0;
+
+ if (cmd > MSG_FINMETA) {
+ plog("Unsupported command: %d\n", cmd);
+ return -EINVAL;
+ }
+
+ ret = tracecmd_msg_create(cmd, &msg);
+ if (ret < 0)
+ return ret;
+
+ ret = msg_do_write_check(fd, msg);
+ if (ret < 0)
+ ret = -ECOMM;
+
+ free(msg);
+ return ret;
+}
+
+static int tracecmd_msg_read_extra(int fd, void *buf, u32 size, int *n)
+{
+ int r = 0;
+
+ do {
+ r = read(fd, buf + *n, size);
+ if (r < 0) {
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ } else if (!r)
+ return -ENOTCONN;
+ size -= r;
+ *n += r;
+ } while (size);
+
+ return 0;
+}
+
+/*
+ * Read header information of msg first, then read all data
+ */
+static int tracecmd_msg_recv(int fd, struct tracecmd_msg *msg)
+{
+ u32 size = 0;
+ int n = 0;
+ int ret;
+
+ ret = tracecmd_msg_read_extra(fd, msg, TRACECMD_MSG_HDR_LEN, &n);
+ if (ret < 0)
+ return ret;
+
+ size = ntohl(msg->size);
+ if (size > TRACECMD_MSG_MAX_LEN)
+ /* too big */
+ goto error;
+ else if (size < TRACECMD_MSG_HDR_LEN)
+ /* too small */
+ goto error;
+ else if (size > TRACECMD_MSG_HDR_LEN) {
+ size -= TRACECMD_MSG_HDR_LEN;
+ return tracecmd_msg_read_extra(fd, msg, size, &n);
+ }
+
+ return 0;
+error:
+ plog("Receive an invalid message(size=%d)\n", size);
+ return -ENOMSG;
+}
+
+static void *tracecmd_msg_buf_access(struct tracecmd_msg *msg, int offset)
+{
+ return (void *)msg + offset;
+}
+
+static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg)
+{
+ u32 cmd;
+ int ret;
+
+ ret = tracecmd_msg_recv(fd, msg);
+ if (ret < 0)
+ return ret;
+
+ cmd = ntohl(msg->cmd);
+ if (cmd == MSG_CLOSE)
+ return -ECONNABORTED;
+
+ return 0;
+}
+
+static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_msg *msg)
+{
+ int ret;
+
+ ret = tracecmd_msg_send(fd, cmd);
+ if (ret < 0)
+ return ret;
+
+ ret = tracecmd_msg_wait_for_msg(fd, msg);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int tracecmd_msg_send_init_data(int fd)
+{
+ char buf[TRACECMD_MSG_MAX_LEN];
+ struct tracecmd_msg *msg;
+ int i, cpus;
+ int ret;
+
+ msg = (struct tracecmd_msg *)buf;
+ ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TINIT, msg);
+ if (ret < 0)
+ return ret;
+
+ cpus = ntohl(msg->data.rinit.cpus);
+ client_ports = malloc_or_die(sizeof(int) * cpus);
+ for (i = 0; i < cpus; i++)
+ client_ports[i] = ntohl(msg->data.rinit.port_array[i]);
+
+ /* Next, send meta data */
+ send_metadata = true;
+
+ return 0;
+}
+
+static bool process_option(struct tracecmd_msg_opt *opt)
+{
+ /* currently the only option we have is to us TCP */
+ if (ntohl(opt->opt_cmd) == MSGOPT_USETCP) {
+ use_tcp = true;
+ return true;
+ }
+ return false;
+}
+
+static void error_operation_for_server(struct tracecmd_msg *msg)
+{
+ u32 cmd;
+
+ cmd = ntohl(msg->cmd);
+
+ warning("Message: cmd=%d size=%d\n", cmd, ntohl(msg->size));
+}
+
+#define MAX_OPTION_SIZE 4096
+
+int tracecmd_msg_initial_setting(int fd, int *cpus, int *pagesize)
+{
+ struct tracecmd_msg *msg;
+ struct tracecmd_msg_opt *opt;
+ char buf[TRACECMD_MSG_MAX_LEN];
+ int offset = offsetof(struct tracecmd_msg, data.tinit.opt);
+ int options, i, s;
+ int ret;
+ u32 size = 0;
+ u32 cmd;
+
+ msg = (struct tracecmd_msg *)buf;
+ ret = tracecmd_msg_recv(fd, msg);
+ if (ret < 0)
+ return ret;
+
+ cmd = ntohl(msg->cmd);
+ if (cmd != MSG_TINIT) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ *cpus = ntohl(msg->data.tinit.cpus);
+ plog("cpus=%d\n", *cpus);
+ if (*cpus < 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ *pagesize = ntohl(msg->data.tinit.page_size);
+ plog("pagesize=%d\n", *pagesize);
+ if (*pagesize <= 0) {
+ ret = -EINVAL;
+ goto error;
+ }
+
+ options = ntohl(msg->data.tinit.opt_num);
+ for (i = 0; i < options; i++) {
+ offset += size;
+ opt = tracecmd_msg_buf_access(msg, offset);
+ size = ntohl(opt->size);
+ /* prevent a client from killing us */
+ if (size > MAX_OPTION_SIZE) {
+ plog("Exceed MAX_OPTION_SIZE\n");
+ ret = -EINVAL;
+ goto error;
+ }
+ s = process_option(opt);
+ /* do we understand this option? */
+ if (!s) {
+ plog("Cannot understand(%d:%d:%d)\n",
+ i, ntohl(opt->size), ntohl(opt->opt_cmd));
+ ret = -EINVAL;
+ goto error;
+ }
+ }
+
+ return 0;
+
+error:
+ error_operation_for_server(msg);
+ return ret;
+}
+
+int tracecmd_msg_send_port_array(int fd, int total_cpus, int *ports)
+{
+ int ret;
+
+ cpu_count = total_cpus;
+ port_array = ports;
+
+ ret = tracecmd_msg_send(fd, MSG_RINIT);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void tracecmd_msg_send_close_msg(void)
+{
+ tracecmd_msg_send(psfd, MSG_CLOSE);
+}
+
+static void make_meta(const char *buf, int buflen, struct tracecmd_msg *msg)
+{
+ int offset = offsetof(struct tracecmd_msg, data.meta.str.buf);
+
+ msg->data.meta.str.size = htonl(buflen);
+ bufcpy(msg, offset, buf, buflen);
+}
+
+int tracecmd_msg_metadata_send(int fd, const char *buf, int size)
+{
+ struct tracecmd_msg *msg;
+ int n, len;
+ int ret;
+ int count = 0;
+
+ ret = tracecmd_msg_create(MSG_SENDMETA, &msg);
+ if (ret < 0)
+ return ret;
+
+ n = size;
+ do {
+ if (n > TRACECMD_MSG_META_MAX_LEN) {
+ make_meta(buf+count, TRACECMD_MSG_META_MAX_LEN, msg);
+ n -= TRACECMD_MSG_META_MAX_LEN;
+ count += TRACECMD_MSG_META_MAX_LEN;
+ } else {
+ make_meta(buf+count, n, msg);
+ /*
+ * TRACECMD_MSG_META_MAX_LEN is stored in msg->size,
+ * so update the size to the correct value.
+ */
+ len = TRACECMD_MSG_META_MIN_LEN + n;
+ msg->size = htonl(len);
+ n = 0;
+ }
+
+ ret = msg_do_write_check(fd, msg);
+ if (ret < 0)
+ break;
+ } while (n);
+
+ free(msg);
+ return ret;
+}
+
+int tracecmd_msg_finish_sending_metadata(int fd)
+{
+ int ret;
+
+ ret = tracecmd_msg_send(fd, MSG_FINMETA);
+ if (ret < 0)
+ return ret;
+
+ /* psfd will be used for closing */
+ psfd = fd;
+ return 0;
+}
+
+int tracecmd_msg_collect_metadata(int ifd, int ofd)
+{
+ struct tracecmd_msg *msg;
+ char buf[TRACECMD_MSG_MAX_LEN];
+ u32 s, t, n, cmd;
+ int offset = TRACECMD_MSG_META_MIN_LEN;
+ int ret;
+
+ msg = (struct tracecmd_msg *)buf;
+
+ do {
+ ret = tracecmd_msg_recv(ifd, msg);
+ if (ret < 0) {
+ warning("reading client");
+ return ret;
+ }
+
+ cmd = ntohl(msg->cmd);
+ if (cmd == MSG_FINMETA) {
+ /* Finish receiving meta data */
+ break;
+ } else if (cmd != MSG_SENDMETA)
+ goto error;
+
+ n = ntohl(msg->data.meta.str.size);
+ t = n;
+ s = 0;
+ do {
+ s = write(ofd, buf+s+offset, t);
+ if (s < 0) {
+ if (errno == EINTR)
+ continue;
+ warning("writing to file");
+ return -errno;
+ }
+ t -= s;
+ s = n - t;
+ } while (t);
+ } while (!done);
+
+ /* check the finish message of the client */
+ if (!done) {
+ ret = tracecmd_msg_recv(ifd, msg);
+ if (ret < 0) {
+ warning("reading client");
+ return ret;
+ }
+
+ msg = (struct tracecmd_msg *)buf;
+ cmd = ntohl(msg->cmd);
+ if (cmd != MSG_CLOSE) {
+ warning("Not accept the message %d", ntohl(msg->cmd));
+ ret = -EINVAL;
+ goto error;
+ }
+ /* Finish this connection */
+ }
+
+ return 0;
+
+error:
+ error_operation_for_server(msg);
+ return ret;
+}
diff --git a/trace-msg.h b/trace-msg.h
new file mode 100644
index 0000000..b23e72b
--- /dev/null
+++ b/trace-msg.h
@@ -0,0 +1,27 @@
+#ifndef _TRACE_MSG_H_
+#define _TRACE_MSG_H_
+
+#include <stdbool.h>
+
+#define UDP_MAX_PACKET (65536 - 20)
+#define V2_MAGIC "677768\0"
+
+#define V1_PROTOCOL 1
+#define V2_PROTOCOL 2
+
+/* for both client and server */
+extern bool use_tcp;
+extern int cpu_count;
+
+/* for client */
+extern unsigned int page_size;
+extern int *client_ports;
+extern bool send_metadata;
+
+/* for server */
+extern bool done;
+
+void plog(const char *fmt, ...);
+void pdie(const char *fmt, ...);
+
+#endif /* _TRACE_MSG_H_ */
diff --git a/trace-output.c b/trace-output.c
index 2141d10..11d7827 100644
--- a/trace-output.c
+++ b/trace-output.c
@@ -37,6 +37,7 @@
#include "trace-cmd-local.h"
#include "list.h"
+#include "trace-msg.h"
#include "version.h"
/* We can't depend on the host size for size_t, all must be 64 bit */
@@ -82,6 +83,9 @@ struct list_event_system {
static stsize_t
do_write_check(struct tracecmd_output *handle, const void *data, tsize_t size)
{
+ if (send_metadata)
+ return tracecmd_msg_metadata_send(handle->fd, data, size);
+
return __do_write_check(handle->fd, data, size);
}
diff --git a/trace-record.c b/trace-record.c
index c387aff..89f4883 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -46,6 +46,7 @@
#include <errno.h>
#include "trace-local.h"
+#include "trace-msg.h"
#define _STR(x) #x
#define STR(x) _STR(x)
@@ -72,17 +73,14 @@ enum trace_type {
static int rt_prio;
-static int use_tcp;
-
static int keep;
-static unsigned int page_size;
+unsigned int page_size;
static const char *output_file = "trace.dat";
static int latency;
static int sleep_time = 1000;
-static int cpu_count;
static int recorder_threads;
static struct pid_record_data *pids;
static int buffers;
@@ -91,7 +89,6 @@ static int buffers;
static int clear_function_filters;
static char *host;
-static int *client_ports;
static int sfd;
static struct tracecmd_output *network_handle;
@@ -113,6 +110,7 @@ static unsigned recorder_flags;
/* Try a few times to get an accurate date */
static int date2ts_tries = 5;
+static int proto_ver = V2_PROTOCOL;
static struct func_list *graph_funcs;
static int func_stack;
@@ -2367,20 +2365,26 @@ static int create_recorder(struct buffer_instance *instance, int cpu,
exit(0);
}
-static void communicate_with_listener(int fd)
+static void check_first_msg_from_server(int fd)
{
char buf[BUFSIZ];
- ssize_t n;
- int cpu, i;
- n = read(fd, buf, 8);
+ read(fd, buf, 8);
/* Make sure the server is the tracecmd server */
if (memcmp(buf, "tracecmd", 8) != 0)
die("server not tracecmd server");
+}
- /* write the number of CPUs we have (in ASCII) */
+static void communicate_with_listener_v1(int fd)
+{
+ char buf[BUFSIZ];
+ ssize_t n;
+ int cpu, i;
+ check_first_msg_from_server(fd);
+
+ /* write the number of CPUs we have (in ASCII) */
sprintf(buf, "%d", cpu_count);
/* include \0 */
@@ -2435,6 +2439,52 @@ static void communicate_with_listener(int fd)
}
}
+static void communicate_with_listener_v2(int fd)
+{
+ if (tracecmd_msg_send_init_data(fd) < 0)
+ die("Cannot communicate with server");
+}
+
+static void check_protocol_version(int fd)
+{
+ char buf[BUFSIZ];
+ int ret;
+
+ check_first_msg_from_server(fd);
+
+ /*
+ * Write dummy CPU number(-1) in order to make old server not create
+ * zero length file, the protocol version(V2), the magic number,
+ * and the dummy option(0) (in ASCII). The client understands whether
+ * the client uses the v2 protocol or not by checking a reply message
+ * from the server. If the message is "V2", the server uses v2
+ * protocol. On the other hands, if the message is just number strings,
+ * the server returned port numbers. So, in that time, the client
+ * understands the server uses the v1 protocol. However, the old server
+ * tells the client port numbers after reading cpu_count, page_size,
+ * and option. So, we add the dummy number (the magic number and 0
+ * option) to the first client message.
+ */
+ ret = write(fd, "-1V2\0"V2_MAGIC"\0", strlen(V2_MAGIC)+6);
+ if (ret < 0)
+ die("Cannot send initial message");
+
+ /* read a reply message */
+ ret = read(fd, buf, BUFSIZ);
+ if (ret < 0) {
+ if (errno == ECONNRESET) {
+ /* the server uses the v1 protocol, so we'll use it */
+ proto_ver = V1_PROTOCOL;
+ plog("Use the v1 protocol\n");
+ } else
+ die("Cannot read initial message");
+ } else {
+ if (memcmp(buf, "V2", 2) != 0)
+ die("Cannot handle the protocol %s", buf);
+ /* OK, let's use v2 protocol */
+ }
+}
+
static void setup_network(void)
{
struct addrinfo hints;
@@ -2462,6 +2512,7 @@ static void setup_network(void)
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
+again:
s = getaddrinfo(server, port, &hints, &result);
if (s != 0)
die("getaddrinfo: %s", gai_strerror(s));
@@ -2482,16 +2533,32 @@ static void setup_network(void)
freeaddrinfo(result);
- communicate_with_listener(sfd);
+ if (proto_ver == V2_PROTOCOL) {
+ check_protocol_version(sfd);
+ if (proto_ver == V1_PROTOCOL) {
+ /* reconnect to the server for using the v1 protocol */
+ close(sfd);
+ goto again;
+ }
+ communicate_with_listener_v2(sfd);
+ }
+
+ if (proto_ver == V1_PROTOCOL)
+ communicate_with_listener_v1(sfd);
/* Now create the handle through this socket */
network_handle = tracecmd_create_init_fd_glob(sfd, listed_events);
+ if (proto_ver == V2_PROTOCOL)
+ tracecmd_msg_finish_sending_metadata(sfd);
+
/* OK, we are all set, let'r rip! */
}
static void finish_network(void)
{
+ if (proto_ver == V2_PROTOCOL)
+ tracecmd_msg_send_close_msg();
close(sfd);
free(host);
}
From: Yoshihiro YUNOMAE <[email protected]>
Use poll(2) to wait for a message. If a client/server cannot send a message for
any reasons, the current server/client will wait in a blocking read operation.
So, we use poll(2) for avoiding remaining in a blocking state.
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in V4: Change the argument of tracecmd_msg_recv_wait()
Fix some typos
---
trace-msg.c | 42 ++++++++++++++++++++++++++++++++++++------
1 file changed, 36 insertions(+), 6 deletions(-)
diff --git a/trace-msg.c b/trace-msg.c
index 5669dee..e3d4f3f 100644
--- a/trace-msg.c
+++ b/trace-msg.c
@@ -395,6 +395,27 @@ error:
return -ENOMSG;
}
+#define MSG_WAIT_MSEC 5000
+
+/*
+ * A return value of 0 indicates time-out
+ */
+static int tracecmd_msg_recv_wait(int fd, struct tracecmd_msg *msg)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+ ret = poll(&pfd, 1, MSG_WAIT_MSEC);
+ if (ret < 0)
+ return -errno;
+ else if (ret == 0)
+ return -ETIMEDOUT;
+
+ return tracecmd_msg_recv(fd, msg);
+}
+
static void *tracecmd_msg_buf_access(struct tracecmd_msg *msg, int offset)
{
return (void *)msg + offset;
@@ -405,9 +426,12 @@ static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg)
u32 cmd;
int ret;
- ret = tracecmd_msg_recv(fd, msg);
- if (ret < 0)
+ ret = tracecmd_msg_recv_wait(fd, msg);
+ if (ret < 0) {
+ if (ret == -ETIMEDOUT)
+ warning("Connection timed out\n");
return ret;
+ }
cmd = ntohl(msg->cmd);
if (cmd == MSG_CLOSE)
@@ -487,9 +511,12 @@ int tracecmd_msg_initial_setting(int fd, int *cpus, int *pagesize)
u32 cmd;
msg = (struct tracecmd_msg *)buf;
- ret = tracecmd_msg_recv(fd, msg);
- if (ret < 0)
+ ret = tracecmd_msg_recv_wait(fd, msg);
+ if (ret < 0) {
+ if (ret == -ETIMEDOUT)
+ warning("Connection timed out\n");
return ret;
+ }
cmd = ntohl(msg->cmd);
if (cmd != MSG_TINIT) {
@@ -627,9 +654,12 @@ int tracecmd_msg_collect_metadata(int ifd, int ofd)
msg = (struct tracecmd_msg *)buf;
do {
- ret = tracecmd_msg_recv(ifd, msg);
+ ret = tracecmd_msg_recv_wait(ifd, msg);
if (ret < 0) {
- warning("reading client");
+ if (ret == -ETIMEDOUT)
+ warning("Connection timed out\n");
+ else
+ warning("reading client");
return ret;
}
From: Yoshihiro YUNOMAE <[email protected]>
Add the virt-server mode for a virtualization environment
based on the listen mode. This mode works as a client/server
mode over not TCP/UDP but virtio-serial channel. Since the
troughput of trace-data can be huge, traditional IP network
easily gets higher overhead. Using virtio-serial can reduce
overhead because it can skip guest/host TCP/IP network stack.
virt-server uses two kinds of virtio-serial I/Fs:
(1) agent-ctl-path(UNIX domain socket)
=> control path of an agent trace-cmd each guest
(2) trace-path-cpuX(named pipe)
=> trace data path each vcpu
Those I/Fs must be defined as below paths:
(1) /tmp/trace-cmd/virt/agent-ctl-path
(2) /tmp/trace-cmd/virt/<guest domain>/trace-path-cpuX
If we run virt-server, agent-ctl-path I/F is automatically created because
virt-server operates as a server mode of UNIX domain socket. However,
trace-path-cpuX is not automatically created because we need to separate
trace data for each guests.
Over the virtio-serial, V2 protocol is slightly changed since
the server can not notice when the client connects. The detail
is described in Documentation/Protocol.txt.
NOTE:
This feature requests to disable(or make permissive) selinux
since qemu has to open a (non-registered) unix domain socket.
<How to set up>
1. Run virt-server on a host before booting guests
# trace-cmd virt-server
2. Make guest domain directory
# mkdir -p /tmp/trace-cmd/virt/<domain>
# chmod 710 /tmp/trace-cmd/virt/<domain>
# chgrp qemu /tmp/trace-cmd/virt/<domain>
3. Make FIFO on the host
# mkfifo /tmp/trace-cmd/virt/<domain>/trace-path-cpu{0,1,...,X}.{in,out}
4. Set up virtio-serial pipes of the guest on the host
Add the following tags to domain XML files.
# virsh edit <domain>
<channel type='unix'>
<source mode='connect' path='/tmp/trace-cmd/virt/agent-ctl-path'/>
<target type='virtio' name='agent-ctl-path'/>
</channel>
<channel type='pipe'>
<source path='/tmp/trace-cmd/virt/<domain>/trace-path-cpu0'/>
<target type='virtio' name='trace-path-cpu0'/>
</channel>
... (cpu1, cpu2, ...)
5. Boot the guest
# virsh start <domain>
6. Check I/F of virtio-serial on the guest
# ls /dev/virtio-ports
...
agent-ctl-path
...
trace-path-cpu0
...
Next, the user will run trace-cmd with record --virt options or other options
for virtualization on the guest.
This patch adds only minimum features of virt-server as follows:
<Features>
- virt-server subcommand
- Create I/F directory(/tmp/trace-cmd/virt/)
- Use named pipe I/Fs of virtio-serial for trace data paths
- Use UNIX domain socket for connecting clients on guests
- Use splice(2) for collecting trace data of guests
<Restrictions>
- libvirt is required for finding guest domain name
- User must setup fifos by hand
- Do not support hotplug VCPUs
- Interface directory is fixed
- SELinux should be disabled
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in V5: Change patch description
Update protocol document
Changes in V4: Fix some typos and cleanup
Changes in V3: Change _nw/_NW to _net/_NET
---
Documentation/Protocol.txt | 44 +++
Documentation/trace-cmd-virt-server.1.txt | 89 ++++++
trace-cmd.c | 3
trace-cmd.h | 2
trace-listen.c | 467 ++++++++++++++++++++++++-----
trace-msg.c | 105 ++++++-
trace-recorder.c | 50 ++-
trace-usage.c | 10 +
8 files changed, 667 insertions(+), 103 deletions(-)
create mode 100644 Documentation/trace-cmd-virt-server.1.txt
diff --git a/Documentation/Protocol.txt b/Documentation/Protocol.txt
index 49f7766..52df89e 100644
--- a/Documentation/Protocol.txt
+++ b/Documentation/Protocol.txt
@@ -6,6 +6,7 @@ Index
1. What is the trace-cmd protocol?
2. Trace-cmd Protocol V1 (Obsolete)
3. Trace-cmd Protocol V2
+4. Trace-cmd Protocol V2 in virt-server mode
1. What is the trace-cmd protocol?
@@ -117,3 +118,46 @@ or not by checking the first message from the client. If client
sends a positive number, it should be a V1 protocol client.
+4. Trace-cmd Protocol V2 in virt-server mode
+============================================
+
+In the virt-server mode, trace-cmd uses a control channel and
+trace data channels of virtio-serial to transfar trace data.
+
+Since the virtio-serial channel is just a character device
+on the guest, the server can not notice when a client attaches
+to (means opens) the channel. Thus, the server waits for the
+connection message MSG_TCONNECT from the client on the control
+channel. The protocol flow is as follows;
+
+ <server> <client>
+ Open a control channel
+ wait for MSG_TCONNECT
+ open a virtio-serial channel
+ send MSG_TCONNECT
+ receive MSG_TCONNECT <----+
+ send MSG_RCONNECT
+ +---------------> receive MSG_RCONNECT
+ check "tracecmd-V2"
+ send MSG_TINIT with cpus, pagesize and options
+ receive MSG_TINIT <-------+
+ perse the parameters
+ send MSG_RINIT with port_array
+ +----------------> receive MSG_RINIT
+ get port_array
+ send meta data(MSG_SENDMETA)
+ receive MSG_SENDMETA <----+
+ record meta data
+ (snip)
+ send a message to finish sending meta data
+ | (MSG_FINMETA)
+ receive MSG_FINMETA <-----+
+ read block
+ --- start sending trace data on child processes ---
+
+ --- When client finishes sending trace data ---
+ send MSG_CLOSE
+ receive MSG_CLOSE <-------+
+ close the virtio-serial channel
+
+
diff --git a/Documentation/trace-cmd-virt-server.1.txt b/Documentation/trace-cmd-virt-server.1.txt
new file mode 100644
index 0000000..b775745
--- /dev/null
+++ b/Documentation/trace-cmd-virt-server.1.txt
@@ -0,0 +1,89 @@
+TRACE-CMD-VIRT-SERVER(1)
+========================
+
+NAME
+----
+trace-cmd-virt-server - listen for incoming connection to record tracing of
+ guests' clients
+
+SYNOPSIS
+--------
+*trace-cmd virt-server ['OPTIONS']
+
+DESCRIPTION
+-----------
+The trace-cmd(1) virt-server sets up UNIX domain socket I/F for communicating
+with guests' clients that run 'trace-cmd-record(1)' with the *--virt* option.
+When a connection is made, and the guest's client sends data, it will create a
+file called 'trace.DOMAIN.dat'. Where DOMAIN is the name of the guest named
+by libvirt.
+
+OPTIONS
+-------
+*-D*::
+ This options causes trace-cmd listen to go into a daemon mode and run in
+ the background.
+
+*-d* 'dir'::
+ This option specifies a directory to write the data files into.
+
+*-o* 'filename'::
+ This option overrides the default 'trace' in the 'trace.DOMAIN.dat' that
+ is created when guest's client connects.
+
+*-l* 'filename'::
+ This option writes the output messages to a log file instead of standard output.
+
+SETTING
+-------
+Here, an example is written as follows:
+
+1. Run virt-server on a host
+ # trace-cmd virt-server
+
+2. Make guest domain directory
+ # mkdir -p /tmp/trace-cmd/virt/<DOMAIN>
+ # chmod 710 /tmp/trace-cmd/virt/<DOMAIN>
+ # chgrp qemu /tmp/trace-cmd/virt/<DOMAIN>
+
+3. Make FIFO on the host
+ # mkfifo /tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu{0,1,...,X}.{in,out}
+
+4. Set up of virtio-serial pipe of a guest on the host
+ Add the following tags to domain XML files.
+ # virsh edit <guest domain>
+ <channel type='unix'>
+ <source mode='connect' path='/tmp/trace-cmd/virt/agent-ctl-path'/>
+ <target type='virtio' name='agent-ctl-path'/>
+ </channel>
+ <channel type='pipe'>
+ <source path='/tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu0'/>
+ <target type='virtio' name='trace-path-cpu0'/>
+ </channel>
+ ... (cpu1, cpu2, ...)
+
+5. Boot the guest
+ # virsh start <DOMAIN>
+
+6. Run the guest's client(see trace-cmd-record(1) with the *--virt* option)
+ # trace-cmd record -e sched* --virt
+
+SEE ALSO
+--------
+trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
+trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1),
+trace-cmd-split(1), trace-cmd-list(1)
+
+AUTHOR
+------
+Written by Masami Hiramatsu <[email protected]>
+
+RESOURCES
+---------
+git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/trace-cmd.git
+
+COPYING
+-------
+Copyright \(C) 2013,2104 Hitachi, Ltd. Free use of this software is
+granted under the terms of the GNU Public License (GPL).
+
diff --git a/trace-cmd.c b/trace-cmd.c
index 4c5b564..29a2bb8 100644
--- a/trace-cmd.c
+++ b/trace-cmd.c
@@ -425,7 +425,8 @@ int main (int argc, char **argv)
} else if (strcmp(argv[1], "mem") == 0) {
trace_mem(argc, argv);
exit(0);
- } else if (strcmp(argv[1], "listen") == 0) {
+ } else if (strcmp(argv[1], "listen") == 0 ||
+ strcmp(argv[1], "virt-server") == 0) {
trace_listen(argc, argv);
exit(0);
} else if (strcmp(argv[1], "split") == 0) {
diff --git a/trace-cmd.h b/trace-cmd.h
index 1261e23..a93920f 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -257,6 +257,7 @@ struct tracecmd_recorder *tracecmd_create_recorder_maxkb(const char *file, int c
struct tracecmd_recorder *tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *buffer);
struct tracecmd_recorder *tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, const char *buffer);
struct tracecmd_recorder *tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, const char *buffer, int maxkb);
+struct tracecmd_recorder *tracecmd_create_recorder_virt(const char *file, int cpu, int trace_fd);
int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep);
void tracecmd_stop_recording(struct tracecmd_recorder *recorder);
@@ -270,6 +271,7 @@ int tracecmd_msg_finish_sending_metadata(int fd);
void tracecmd_msg_send_close_msg(void);
/* for server */
+int tracecmd_msg_set_connection(int fd, const char *domain);
int tracecmd_msg_initial_setting(int fd, int *cpus, int *pagesize);
int tracecmd_msg_send_port_array(int fd, int total_cpus, int *ports);
int tracecmd_msg_collect_metadata(int ifd, int ofd);
diff --git a/trace-listen.c b/trace-listen.c
index 17ab184..718680f 100644
--- a/trace-listen.c
+++ b/trace-listen.c
@@ -23,9 +23,13 @@
#include <stdlib.h>
#include <string.h>
#include <getopt.h>
+#include <grp.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
+#include <sys/epoll.h>
+#include <sys/un.h>
#include <netdb.h>
#include <unistd.h>
#include <fcntl.h>
@@ -50,19 +54,42 @@ static int backlog = 5;
static int proto_ver;
-#define TEMP_FILE_STR "%s.%s:%s.cpu%d", output_file, host, port, cpu
-static char *get_temp_file(const char *host, const char *port, int cpu)
+enum {
+ NET = 1,
+ VIRT = 2,
+};
+
+#define TEMP_FILE_STR_NET "%s.%s:%s.cpu%d", output_file, host, port, cpu
+#define TEMP_FILE_STR_VIRT "%s.%s:%d.cpu%d", output_file, domain, virtpid, cpu
+static char *get_temp_file(const char *host, const char *port,
+ const char *domain, int virtpid, int cpu, int mode)
{
char *file = NULL;
int size;
- size = snprintf(file, 0, TEMP_FILE_STR);
- file = malloc_or_die(size + 1);
- sprintf(file, TEMP_FILE_STR);
+ if (mode == NET) {
+ size = snprintf(file, 0, TEMP_FILE_STR_NET);
+ file = malloc_or_die(size + 1);
+ sprintf(file, TEMP_FILE_STR_NET);
+ } else if (mode == VIRT) {
+ size = snprintf(file, 0, TEMP_FILE_STR_VIRT);
+ file = malloc_or_die(size + 1);
+ sprintf(file, TEMP_FILE_STR_VIRT);
+ }
return file;
}
+static char *get_temp_file_net(const char *host, const char *port, int cpu)
+{
+ return get_temp_file(host, port, NULL, 0, cpu, NET);
+}
+
+static char *get_temp_file_virt(const char *domain, int virtpid, int cpu)
+{
+ return get_temp_file(NULL, NULL, domain, virtpid, cpu, VIRT);
+}
+
static void put_temp_file(char *file)
{
free(file);
@@ -81,11 +108,15 @@ static void signal_setup(int sig, sighandler_t handle)
sigaction(sig, &action, NULL);
}
-static void delete_temp_file(const char *host, const char *port, int cpu)
+static void delete_temp_file(const char *host, const char *port,
+ const char *domain, int virtpid, int cpu, int mode)
{
char file[MAX_PATH];
- snprintf(file, MAX_PATH, TEMP_FILE_STR);
+ if (mode == NET)
+ snprintf(file, MAX_PATH, TEMP_FILE_STR_NET);
+ else if (mode == VIRT)
+ snprintf(file, MAX_PATH, TEMP_FILE_STR_VIRT);
unlink(file);
}
@@ -113,8 +144,12 @@ static int process_option(char *option)
return 0;
}
+static struct tracecmd_recorder *recorder;
+
static void finish(int sig)
{
+ if (recorder)
+ tracecmd_stop_recording(recorder);
done = 1;
}
@@ -184,7 +219,7 @@ static void process_udp_child(int sfd, const char *host, const char *port,
signal_setup(SIGUSR1, finish);
- tempfile = get_temp_file(host, port, cpu);
+ tempfile = get_temp_file_net(host, port, cpu);
fd = open(tempfile, O_WRONLY | O_TRUNC | O_CREAT, 0644);
if (fd < 0)
pdie("creating %s", tempfile);
@@ -225,6 +260,28 @@ static void process_udp_child(int sfd, const char *host, const char *port,
exit(0);
}
+#define SLEEP_DEFAULT 1000
+
+static void process_virt_child(int fd, int cpu, int pagesize,
+ const char *domain, int virtpid)
+{
+ char *tempfile;
+
+ signal_setup(SIGUSR1, finish);
+ tempfile = get_temp_file_virt(domain, virtpid, cpu);
+
+ recorder = tracecmd_create_recorder_virt(tempfile, cpu, fd);
+
+ do {
+ if (tracecmd_start_recording(recorder, SLEEP_DEFAULT) < 0)
+ break;
+ } while (!done);
+
+ tracecmd_free_recorder(recorder);
+ put_temp_file(tempfile);
+ exit(0);
+}
+
#define START_PORT_SEARCH 1500
#define MAX_PORT_SEARCH 6000
@@ -272,20 +329,37 @@ static int udp_bind_a_port(int start_port, int *sfd)
return num_port;
}
-static void fork_udp_reader(int sfd, const char *node, const char *port,
- int *pid, int cpu, int pagesize)
+static void fork_reader(int sfd, const char *node, const char *port,
+ int *pid, int cpu, int pagesize, const char *domain,
+ int virtpid, int mode)
{
*pid = fork();
if (*pid < 0)
- pdie("creating udp reader");
+ pdie("creating reader");
- if (!*pid)
- process_udp_child(sfd, node, port, cpu, pagesize);
+ if (!*pid) {
+ if (mode == NET)
+ process_udp_child(sfd, node, port, cpu, pagesize);
+ else if (mode == VIRT)
+ process_virt_child(sfd, cpu, pagesize, domain, virtpid);
+ }
close(sfd);
}
+static void fork_udp_reader(int sfd, const char *node, const char *port,
+ int *pid, int cpu, int pagesize)
+{
+ fork_reader(sfd, node, port, pid, cpu, pagesize, NULL, 0, NET);
+}
+
+static void fork_virt_reader(int sfd, int *pid, int cpu, int pagesize,
+ const char *domain, int virtpid)
+{
+ fork_reader(sfd, NULL, NULL, pid, cpu, pagesize, domain, virtpid, VIRT);
+}
+
static int open_udp(const char *node, const char *port, int *pid,
int cpu, int pagesize, int start_port)
{
@@ -305,6 +379,29 @@ static int open_udp(const char *node, const char *port, int *pid,
return num_port;
}
+#define TRACE_CMD_DIR "/tmp/trace-cmd/"
+#define VIRT_DIR TRACE_CMD_DIR "virt/"
+#define VIRT_TRACE_CTL_SOCK VIRT_DIR "agent-ctl-path"
+#define TRACE_PATH_DOMAIN_CPU VIRT_DIR "%s/trace-path-cpu%d.out"
+
+static int open_virtio_serial_pipe(int *pid, int cpu, int pagesize,
+ const char *domain, int virtpid)
+{
+ char buf[PATH_MAX];
+ int fd;
+
+ snprintf(buf, PATH_MAX, TRACE_PATH_DOMAIN_CPU, domain, cpu);
+ fd = open(buf, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ warning("open %s", buf);
+ return fd;
+ }
+
+ fork_virt_reader(fd, pid, cpu, pagesize, domain, virtpid);
+
+ return fd;
+}
+
/* Setup client who is using the v1 protocol */
static int client_initial_setting(int fd, char *buf, int *cpus, int *pagesize)
{
@@ -369,7 +466,7 @@ static int client_initial_setting(int fd, char *buf, int *cpus, int *pagesize)
return 0;
}
-static int communicate_with_client(int fd, int *cpus, int *pagesize)
+static int communicate_with_client_net(int fd, int *cpus, int *pagesize)
{
char buf[BUFSIZ];
int n;
@@ -407,12 +504,32 @@ static int communicate_with_client(int fd, int *cpus, int *pagesize)
return 0;
}
-static int create_client_file(const char *node, const char *port)
+static int communicate_with_client_virt(int fd, const char *domain, int *cpus, int *pagesize)
+{
+ proto_ver = V2_PROTOCOL;
+
+ if (tracecmd_msg_set_connection(fd, domain) < 0)
+ return -1;
+
+ /* read the CPU count, the page size, and options */
+ if (tracecmd_msg_initial_setting(fd, cpus, pagesize) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int create_client_file(const char *node, const char *port,
+ const char *domain, int pid, int mode)
{
char buf[BUFSIZ];
int ofd;
- snprintf(buf, BUFSIZ, "%s.%s:%s.dat", output_file, node, port);
+ if (mode == NET)
+ snprintf(buf, BUFSIZ, "%s.%s:%s.dat", output_file, node, port);
+ else if (mode == VIRT)
+ snprintf(buf, BUFSIZ, "%s.%s:%d.dat", output_file, domain, pid);
+ else
+ plog("create_client_file: Unsupported mode %d", mode);
ofd = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0644);
if (ofd < 0)
@@ -421,7 +538,8 @@ static int create_client_file(const char *node, const char *port)
}
static void destroy_all_readers(int cpus, int *pid_array, const char *node,
- const char *port)
+ const char *port, const char *domain,
+ int virtpid, int mode)
{
int cpu;
@@ -429,42 +547,50 @@ static void destroy_all_readers(int cpus, int *pid_array, const char *node,
if (pid_array[cpu] > 0) {
kill(pid_array[cpu], SIGKILL);
waitpid(pid_array[cpu], NULL, 0);
- delete_temp_file(node, port, cpu);
+ delete_temp_file(node, port, domain, virtpid, cpu, mode);
pid_array[cpu] = 0;
}
}
}
static int *create_all_readers(int cpus, const char *node, const char *port,
- int pagesize, int fd)
+ const char *domain, int virtpid, int pagesize,
+ int fd, int mode)
{
char buf[BUFSIZ];
- int *port_array;
+ int *port_array = NULL;
int *pid_array;
int start_port;
int udp_port;
int cpu;
int pid;
- port_array = malloc_or_die(sizeof(int) * cpus);
+ if (mode == NET) {
+ port_array = malloc_or_die(sizeof(int) * cpus);
+ start_port = START_PORT_SEARCH;
+ }
pid_array = malloc_or_die(sizeof(int) * cpus);
memset(pid_array, 0, sizeof(int) * cpus);
- start_port = START_PORT_SEARCH;
-
- /* Now create a UDP port for each CPU */
+ /* Now create a reader for each CPU */
for (cpu = 0; cpu < cpus; cpu++) {
- udp_port = open_udp(node, port, &pid, cpu,
- pagesize, start_port);
- if (udp_port < 0)
- goto out_free;
- port_array[cpu] = udp_port;
+ if (node) {
+ udp_port = open_udp(node, port, &pid, cpu,
+ pagesize, start_port);
+ if (udp_port < 0)
+ goto out_free;
+ port_array[cpu] = udp_port;
+ /*
+ * Due to some bugging finding ports,
+ * force search after last port
+ */
+ start_port = udp_port + 1;
+ } else {
+ if (open_virtio_serial_pipe(&pid, cpu, pagesize,
+ domain, virtpid) < 0)
+ goto out_free;
+ }
pid_array[cpu] = pid;
- /*
- * Due to some bugging finding ports,
- * force search after last port
- */
- start_port = udp_port + 1;
}
if (proto_ver == V2_PROTOCOL) {
@@ -485,7 +611,7 @@ static int *create_all_readers(int cpus, const char *node, const char *port,
return pid_array;
out_free:
- destroy_all_readers(cpus, pid_array, node, port);
+ destroy_all_readers(cpus, pid_array, node, port, domain, virtpid, mode);
return NULL;
}
@@ -527,7 +653,8 @@ static void stop_all_readers(int cpus, int *pid_array)
}
static void put_together_file(int cpus, int ofd, const char *node,
- const char *port)
+ const char *port, const char *domain, int virtpid,
+ int mode)
{
char **temp_files;
int cpu;
@@ -536,25 +663,33 @@ static void put_together_file(int cpus, int ofd, const char *node,
temp_files = malloc_or_die(sizeof(*temp_files) * cpus);
for (cpu = 0; cpu < cpus; cpu++)
- temp_files[cpu] = get_temp_file(node, port, cpu);
+ temp_files[cpu] = get_temp_file(node, port, domain,
+ virtpid, cpu, mode);
tracecmd_attach_cpu_data_fd(ofd, cpus, temp_files);
free(temp_files);
}
-static void process_client(const char *node, const char *port, int fd)
+static void process_client(int fd, const char *node, const char *port,
+ const char *domain, int virtpid, int mode)
{
int *pid_array;
int pagesize;
int cpus;
int ofd;
- if (communicate_with_client(fd, &cpus, &pagesize) < 0)
- return;
-
- ofd = create_client_file(node, port);
-
- pid_array = create_all_readers(cpus, node, port, pagesize, fd);
+ if (mode == NET) {
+ if (communicate_with_client_net(fd, &cpus, &pagesize) < 0)
+ return;
+ } else if (mode == VIRT) {
+ if (communicate_with_client_virt(fd, domain, &cpus, &pagesize) < 0)
+ return;
+ } else
+ pdie("process_client: Unsupported mode %d", mode);
+
+ ofd = create_client_file(node, port, domain, virtpid, mode);
+ pid_array = create_all_readers(cpus, node, port, domain, virtpid,
+ pagesize, fd, mode);
if (!pid_array)
return;
@@ -573,9 +708,22 @@ static void process_client(const char *node, const char *port, int fd)
/* wait a little to have the readers clean up */
sleep(1);
- put_together_file(cpus, ofd, node, port);
+ put_together_file(cpus, ofd, node, port, domain, virtpid, mode);
+
+ destroy_all_readers(cpus, pid_array, node, port, domain, virtpid, mode);
+}
+
+static void process_client_net(int fd, const char *node, const char *port)
+{
+ process_client(fd, node, port, NULL, 0, NET);
+}
- destroy_all_readers(cpus, pid_array, node, port);
+static void process_client_virt(int fd, const char *domain, int virtpid)
+{
+ /* keep connection to qemu if clients on guests finish operation */
+ do {
+ process_client(fd, NULL, NULL, domain, virtpid, VIRT);
+ } while (!done);
}
static int do_fork(int cfd)
@@ -602,32 +750,104 @@ static int do_fork(int cfd)
return 0;
}
-static int do_connection(int cfd, struct sockaddr_storage *peer_addr,
- socklen_t peer_addr_len)
+static int get_virtpid(int cfd)
{
- char host[NI_MAXHOST], service[NI_MAXSERV];
- int s;
+ struct ucred cr;
+ socklen_t cl;
int ret;
- ret = do_fork(cfd);
- if (ret)
+ cl = sizeof(cr);
+ ret = getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &cr, &cl);
+ if (ret < 0)
return ret;
- s = getnameinfo((struct sockaddr *)peer_addr, peer_addr_len,
- host, NI_MAXHOST,
- service, NI_MAXSERV, NI_NUMERICSERV);
+ return cr.pid;
+}
- if (s == 0)
- plog("Connected with %s:%s\n",
- host, service);
- else {
- plog("Error with getnameinfo: %s\n",
- gai_strerror(s));
- close(cfd);
- return -1;
+#define LIBVIRT_DOMAIN_PATH "/var/run/libvirt/qemu/"
+
+/* We can convert pid to domain name of a guest when we use libvirt. */
+static char *get_guest_domain_from_pid(int pid)
+{
+ struct dirent *dirent;
+ char file_name[NAME_MAX];
+ char *file_name_ret, *domain;
+ char buf[BUFSIZ];
+ DIR *dir;
+ size_t doml;
+ int fd;
+
+ dir = opendir(LIBVIRT_DOMAIN_PATH);
+ if (!dir) {
+ if (errno == ENOENT)
+ warning("Only support for using libvirt");
+ return NULL;
+ }
+
+ for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
+ snprintf(file_name, NAME_MAX, LIBVIRT_DOMAIN_PATH"%s",
+ dirent->d_name);
+ file_name_ret = strstr(file_name, ".pid");
+ if (file_name_ret) {
+ fd = open(file_name, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+ if (read(fd, buf, BUFSIZ) < 0)
+ return NULL;
+
+ if (pid == atoi(buf)) {
+ /* not include /var/run/libvirt/qemu */
+ doml = (size_t)(file_name_ret - file_name)
+ - strlen(LIBVIRT_DOMAIN_PATH);
+ domain = strndup(file_name +
+ strlen(LIBVIRT_DOMAIN_PATH),
+ doml);
+ plog("start %s:%d\n", domain, pid);
+ return domain;
+ }
+ }
}
- process_client(host, service, cfd);
+ return NULL;
+}
+
+static int do_connection(int cfd, struct sockaddr *peer_addr,
+ socklen_t peer_addr_len, int mode)
+{
+ char host[NI_MAXHOST], service[NI_MAXSERV];
+ int s, ret, virtpid;
+ char *domain = NULL;
+
+ if (mode == VIRT) {
+ virtpid = get_virtpid(cfd);
+ if (virtpid < 0)
+ return virtpid;
+
+ domain = get_guest_domain_from_pid(virtpid);
+ if (!domain)
+ return -1;
+ }
+
+ ret = do_fork(cfd);
+ if (ret)
+ return ret;
+
+ if (mode == NET) {
+ s = getnameinfo(peer_addr, peer_addr_len, host, NI_MAXHOST,
+ service, NI_MAXSERV, NI_NUMERICSERV);
+
+ if (s == 0)
+ plog("Connected with %s:%s\n",
+ host, service);
+ else {
+ plog("Error with getnameinfo: %s\n",
+ gai_strerror(s));
+ close(cfd);
+ return -1;
+ }
+ process_client_net(cfd, host, service);
+ } else if (mode == VIRT)
+ process_client_virt(cfd, domain, virtpid);
close(cfd);
@@ -681,12 +901,11 @@ static void remove_process(int pid)
static void kill_clients(void)
{
- int status;
int i;
for (i = 0; i < saved_pids; i++) {
kill(client_pids[i], SIGINT);
- waitpid(client_pids[i], &status, 0);
+ waitpid(client_pids[i], NULL, 0);
}
saved_pids = 0;
@@ -705,31 +924,38 @@ static void clean_up(int sig)
} while (ret > 0);
}
-static void do_accept_loop(int sfd)
+static void do_accept_loop(int sfd, int mode)
{
- struct sockaddr_storage peer_addr;
- socklen_t peer_addr_len;
+ struct sockaddr addr;
+ socklen_t addrlen;
int cfd, pid;
- peer_addr_len = sizeof(peer_addr);
+ if (mode == NET)
+ addrlen = sizeof(struct sockaddr_storage);
+ else if (mode == VIRT)
+ addrlen = sizeof(struct sockaddr_un);
+ else
+ pdie("do_accept_loop: Unsupported mode %d", mode);
do {
- cfd = accept(sfd, (struct sockaddr *)&peer_addr,
- &peer_addr_len);
+ cfd = accept(sfd, &addr, &addrlen);
printf("connected!\n");
if (cfd < 0 && errno == EINTR)
continue;
if (cfd < 0)
pdie("connecting");
- pid = do_connection(cfd, &peer_addr, peer_addr_len);
+ if (mode == NET)
+ pid = do_connection(cfd, &addr, addrlen, mode);
+ else if (mode == VIRT)
+ pid = do_connection(cfd, NULL, 0, mode);
if (pid > 0)
add_process(pid);
} while (!done);
}
-static void do_listen(char *port)
+static void do_listen_net(char *port)
{
struct addrinfo hints;
struct addrinfo *result, *rp;
@@ -767,8 +993,64 @@ static void do_listen(char *port)
if (listen(sfd, backlog) < 0)
pdie("listen");
- do_accept_loop(sfd);
+ do_accept_loop(sfd, NET);
+
+ kill_clients();
+}
+
+static void make_virt_if_dir(void)
+{
+ struct group *group;
+
+ if (mkdir(TRACE_CMD_DIR, 0710) < 0) {
+ if (errno != EEXIST)
+ pdie("mkdir %s", TRACE_CMD_DIR);
+ }
+ /* QEMU operates as qemu:qemu */
+ chmod(TRACE_CMD_DIR, 0710);
+ group = getgrnam("qemu");
+ if (chown(TRACE_CMD_DIR, -1, group->gr_gid) < 0)
+ pdie("chown %s", TRACE_CMD_DIR);
+
+ if (mkdir(VIRT_DIR, 0710) < 0) {
+ if (errno != EEXIST)
+ pdie("mkdir %s", VIRT_DIR);
+ }
+ chmod(VIRT_DIR, 0710);
+ if (chown(VIRT_DIR, -1, group->gr_gid) < 0)
+ pdie("chown %s", VIRT_DIR);
+}
+
+static void do_listen_virt(void)
+{
+ struct sockaddr_un un_server;
+ struct group *group;
+ socklen_t slen;
+ int sfd;
+
+ make_virt_if_dir();
+
+ slen = sizeof(un_server);
+ sfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sfd < 0)
+ pdie("socket");
+
+ un_server.sun_family = AF_UNIX;
+ snprintf(un_server.sun_path, PATH_MAX, VIRT_TRACE_CTL_SOCK);
+
+ if (bind(sfd, (struct sockaddr *)&un_server, slen) < 0)
+ pdie("bind");
+ chmod(VIRT_TRACE_CTL_SOCK, 0660);
+ group = getgrnam("qemu");
+ if (chown(VIRT_TRACE_CTL_SOCK, -1, group->gr_gid) < 0)
+ pdie("fchown %s", VIRT_TRACE_CTL_SOCK);
+
+ if (listen(sfd, backlog) < 0)
+ pdie("listen");
+
+ do_accept_loop(sfd, VIRT);
+ unlink(VIRT_TRACE_CTL_SOCK);
kill_clients();
}
@@ -782,17 +1064,33 @@ enum {
OPT_debug = 255,
};
+static void parse_args_net(int c, char **argv, char **port)
+{
+ switch (c) {
+ case 'p':
+ *port = optarg;
+ break;
+ default:
+ usage(argv);
+ }
+}
+
void trace_listen(int argc, char **argv)
{
char *logfile = NULL;
char *port = NULL;
int daemon = 0;
+ int mode = 0;
int c;
if (argc < 2)
usage(argv);
- if (strcmp(argv[1], "listen") != 0)
+ if (strcmp(argv[1], "listen") == 0)
+ mode = NET;
+ else if (strcmp(argv[1], "virt-server") == 0)
+ mode = VIRT;
+ else
usage(argv);
for (;;) {
@@ -812,9 +1110,6 @@ void trace_listen(int argc, char **argv)
case 'h':
usage(argv);
break;
- case 'p':
- port = optarg;
- break;
case 'd':
output_dir = optarg;
break;
@@ -831,11 +1126,14 @@ void trace_listen(int argc, char **argv)
debug = 1;
break;
default:
- usage(argv);
+ if (mode == NET)
+ parse_args_net(c, argv, &port);
+ else
+ usage(argv);
}
}
- if (!port)
+ if (!port && mode == NET)
usage(argv);
if ((argc - optind) >= 2)
@@ -863,7 +1161,12 @@ void trace_listen(int argc, char **argv)
signal_setup(SIGINT, finish);
signal_setup(SIGTERM, finish);
- do_listen(port);
+ if (mode == NET)
+ do_listen_net(port);
+ else if (mode == VIRT)
+ do_listen_virt();
+ else
+ ; /* Not reached */
return;
}
diff --git a/trace-msg.c b/trace-msg.c
index e3d4f3f..717089c 100644
--- a/trace-msg.c
+++ b/trace-msg.c
@@ -59,6 +59,9 @@ typedef __be32 be32;
#define CPU_MAX 256
+/* use CONNECT_MSG as a protocol version of trace-msg */
+#define CONNECT_MSG "tracecmd-V2"
+
/* for both client and server */
bool use_tcp;
int cpu_count;
@@ -78,6 +81,10 @@ struct tracecmd_msg_str {
char *buf;
} __attribute__((packed));
+struct tracecmd_msg_rconnect {
+ struct tracecmd_msg_str str;
+};
+
struct tracecmd_msg_opt {
be32 size;
be32 opt_cmd;
@@ -104,6 +111,7 @@ struct tracecmd_msg_error {
be32 size;
be32 cmd;
union {
+ struct tracecmd_msg_rconnect rconnect;
struct tracecmd_msg_tinit tinit;
struct tracecmd_msg_rinit rinit;
struct tracecmd_msg_meta meta;
@@ -111,7 +119,10 @@ struct tracecmd_msg_error {
} __attribute__((packed));
enum tracecmd_msg_cmd {
+ MSG_ERROR = 0,
MSG_CLOSE = 1,
+ MSG_TCONNECT = 2,
+ MSG_RCONNECT = 3,
MSG_TINIT = 4,
MSG_RINIT = 5,
MSG_SENDMETA = 6,
@@ -122,6 +133,7 @@ struct tracecmd_msg {
be32 size;
be32 cmd;
union {
+ struct tracecmd_msg_rconnect rconnect;
struct tracecmd_msg_tinit tinit;
struct tracecmd_msg_rinit rinit;
struct tracecmd_msg_meta meta;
@@ -159,6 +171,16 @@ static void bufcpy(void *dest, u32 offset, const void *buf, u32 buflen)
memcpy(dest+offset, buf, buflen);
}
+static int make_rconnect(const char *buf, int buflen, struct tracecmd_msg *msg)
+{
+ u32 offset = offsetof(struct tracecmd_msg, data.rconnect.str.buf);
+
+ msg->data.rconnect.str.size = htonl(buflen);
+ bufcpy(msg, offset, buf, buflen);
+
+ return 0;
+}
+
enum msg_opt_command {
MSGOPT_USETCP = 1,
};
@@ -236,11 +258,13 @@ static int make_rinit(struct tracecmd_msg *msg)
msg->data.rinit.cpus = htonl(cpu_count);
- for (i = 0; i < cpu_count; i++) {
- /* + rrqports->cpus or rrqports->port_array[i] */
- offset += sizeof(be32);
- port = htonl(port_array[i]);
- bufcpy(msg, offset, &port, sizeof(be32) * cpu_count);
+ if (port_array) {
+ for (i = 0; i < cpu_count; i++) {
+ /* + rrqports->cpus or rrqports->port_array[i] */
+ offset += sizeof(be32);
+ port = htonl(port_array[i]);
+ bufcpy(msg, offset, &port, sizeof(be32) * cpu_count);
+ }
}
return 0;
@@ -252,6 +276,9 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
u32 len = 0;
switch (cmd) {
+ case MSG_RCONNECT:
+ return sizeof(msg->data.rconnect.str.size)
+ + sizeof(CONNECT_MSG);
case MSG_TINIT:
len = sizeof(msg->data.tinit.cpus)
+ sizeof(msg->data.tinit.page_size)
@@ -288,6 +315,8 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg)
{
switch (cmd) {
+ case MSG_RCONNECT:
+ return make_rconnect(CONNECT_MSG, sizeof(CONNECT_MSG), msg);
case MSG_TINIT:
return make_tinit(msg);
case MSG_RINIT:
@@ -423,6 +452,8 @@ static void *tracecmd_msg_buf_access(struct tracecmd_msg *msg, int offset)
static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg)
{
+ int offset = TRACECMD_MSG_HDR_LEN;
+ char *buf;
u32 cmd;
int ret;
@@ -434,8 +465,20 @@ static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg)
}
cmd = ntohl(msg->cmd);
- if (cmd == MSG_CLOSE)
+ switch (cmd) {
+ case MSG_RCONNECT:
+ offset += sizeof(msg->data.rconnect.str.size);
+ buf = tracecmd_msg_buf_access(msg, offset);
+ /* Make sure the server is the tracecmd server */
+ if (memcmp(buf, CONNECT_MSG,
+ ntohl(msg->data.rconnect.str.size) - 1) != 0) {
+ warning("server not tracecmd server");
+ return -EPROTONOSUPPORT;
+ }
+ break;
+ case MSG_CLOSE:
return -ECONNABORTED;
+ }
return 0;
}
@@ -494,7 +537,55 @@ static void error_operation_for_server(struct tracecmd_msg *msg)
cmd = ntohl(msg->cmd);
- warning("Message: cmd=%d size=%d\n", cmd, ntohl(msg->size));
+ if (cmd == MSG_ERROR)
+ plog("Receive error message: cmd=%d size=%d\n",
+ ntohl(msg->data.err.cmd), ntohl(msg->data.err.size));
+ else
+ warning("Message: cmd=%d size=%d\n", cmd, ntohl(msg->size));
+}
+
+int tracecmd_msg_set_connection(int fd, const char *domain)
+{
+ struct tracecmd_msg *msg;
+ char buf[TRACECMD_MSG_MAX_LEN] = {};
+ u32 cmd;
+ int ret;
+
+ msg = (struct tracecmd_msg *)buf;
+
+ /*
+ * Wait for connection msg by a client first.
+ * If a client uses virtio-serial, a connection message will
+ * not be sent immediately after accept(). connect() is called
+ * in QEMU, so the client can send the connection message
+ * after guest boots. Therefore, the virt-server patiently
+ * waits for the connection request of a client.
+ */
+ ret = tracecmd_msg_recv(fd, msg);
+ if (ret < 0) {
+ if (!buf[0]) {
+ /* No data means QEMU has already died. */
+ close(fd);
+ die("Connection refuesd: %s", domain);
+ }
+ return -ENOMSG;
+ }
+
+ cmd = ntohl(msg->cmd);
+ if (cmd == MSG_CLOSE)
+ return -ECONNABORTED;
+ else if (cmd != MSG_TCONNECT)
+ return -EINVAL;
+
+ ret = tracecmd_msg_send(fd, MSG_RCONNECT);
+ if (ret < 0)
+ goto error;
+
+ return 0;
+
+error:
+ error_operation_for_server(msg);
+ return ret;
}
#define MAX_OPTION_SIZE 4096
diff --git a/trace-recorder.c b/trace-recorder.c
index 66cad98..ad80d82 100644
--- a/trace-recorder.c
+++ b/trace-recorder.c
@@ -155,19 +155,23 @@ tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags,
recorder->fd1 = fd;
recorder->fd2 = fd2;
- path = malloc_or_die(strlen(buffer) + 40);
- if (!path)
- goto out_free;
+ if (buffer) {
+ path = malloc_or_die(strlen(buffer) + 40);
+ if (!path)
+ goto out_free;
- if (flags & TRACECMD_RECORD_SNAPSHOT)
- sprintf(path, "%s/per_cpu/cpu%d/snapshot_raw", buffer, cpu);
- else
- sprintf(path, "%s/per_cpu/cpu%d/trace_pipe_raw", buffer, cpu);
- recorder->trace_fd = open(path, O_RDONLY);
- if (recorder->trace_fd < 0)
- goto out_free;
+ if (flags & TRACECMD_RECORD_SNAPSHOT)
+ sprintf(path, "%s/per_cpu/cpu%d/snapshot_raw",
+ buffer, cpu);
+ else
+ sprintf(path, "%s/per_cpu/cpu%d/trace_pipe_raw",
+ buffer, cpu);
+ recorder->trace_fd = open(path, O_RDONLY);
+ if (recorder->trace_fd < 0)
+ goto out_free;
- free(path);
+ free(path);
+ }
if ((recorder->flags & TRACECMD_RECORD_NOSPLICE) == 0) {
ret = pipe(recorder->brass);
@@ -190,8 +194,9 @@ tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, const char *
return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, buffer, 0);
}
-struct tracecmd_recorder *
-tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, const char *buffer)
+static struct tracecmd_recorder *
+__tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
+ const char *buffer)
{
struct tracecmd_recorder *recorder;
int fd;
@@ -254,6 +259,25 @@ tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags,
goto out;
}
+struct tracecmd_recorder *
+tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags,
+ const char *buffer)
+{
+ return __tracecmd_create_buffer_recorder(file, cpu, flags, buffer);
+}
+
+struct tracecmd_recorder *
+tracecmd_create_recorder_virt(const char *file, int cpu, int trace_fd)
+{
+ struct tracecmd_recorder *recorder;
+
+ recorder = __tracecmd_create_buffer_recorder(file, cpu, 0, NULL);
+ if (recorder)
+ recorder->trace_fd = trace_fd;
+
+ return recorder;
+}
+
struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags)
{
const char *tracing;
diff --git a/trace-usage.c b/trace-usage.c
index 520b14b..3d9b821 100644
--- a/trace-usage.c
+++ b/trace-usage.c
@@ -212,6 +212,16 @@ static struct usage_help usage_help[] = {
" -l logfile to write messages to.\n"
},
{
+ "virt-server",
+ "listen on a virtio-serial for trace clients",
+ " %s virt-server [-o file][-d dir][-l logfile]\n"
+ " Creates a socket to listen for clients.\n"
+ " -D create it in daemon mode.\n"
+ " -o file name to use for clients.\n"
+ " -d diretory to store client files.\n"
+ " -l logfile to write messages to.\n"
+ },
+ {
"list",
"list the available events, plugins or options",
" %s list [-e [regex]][-t][-o][-f [regex]]\n"
From: Yoshihiro YUNOMAE <[email protected]>
Add --virt option for record mode for a virtualization environment.
If we use this option on a guest, we can send trace data in low-overhead.
This is because guests can send trace data to a host without copying the data
by using splice(2).
The format is:
trace-cmd record --virt -e sched*
<Note>
The client using virtio-serial does not wait for the connection message
"tracecmd" from the server. The client sends the connection message
MSG_TCONNECT first.
<Restriction>
This feature can use from kernel-3.6 which supports splice_read for ftrace
and splice_write for virtio-serial.
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in V4: Rebase for current trace-cmd-v2.4
Add usage of --virt for record in trace-usage.c
Divide tracecmd_msg_connect_to_server() into two functions
(tracecmd_msg_connect_to_server() and
tracecmd_msg_send_init_data_virt(fd))
Changes in V3: Change _nw/_NW to _net/_NET
---
Documentation/trace-cmd-record.1.txt | 12 +++++
trace-cmd.h | 4 +-
trace-msg.c | 79 +++++++++++++++++++++++++++++++---
trace-msg.h | 4 ++
trace-record.c | 71 ++++++++++++++++++++++++++++---
trace-usage.c | 3 +
6 files changed, 160 insertions(+), 13 deletions(-)
diff --git a/Documentation/trace-cmd-record.1.txt b/Documentation/trace-cmd-record.1.txt
index 2a368fe..7b9981a 100644
--- a/Documentation/trace-cmd-record.1.txt
+++ b/Documentation/trace-cmd-record.1.txt
@@ -290,6 +290,14 @@ OPTIONS
Have output go to stderr instead of stdout, but the output of the command
executed will not be changed. This is useful if you want to monitor the
output of the command being executed, but not see the output from trace-cmd.
+*--virt*::
+ This option is usded on a guest in a virtualization environment. If a host
+ is running "trace-cmd virt-server", this option is used to have the data
+ sent to the host with virtio-serial like *-N* option. (see also
+ trace-cmd-virt-server(1))
+
+ Note: This option is not supported with latency tracer plugins:
+ wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff
EXAMPLES
--------
@@ -390,7 +398,11 @@ SEE ALSO
--------
trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1),
trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
+<<<<<<< current
trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1)
+=======
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-virt-server(1)
+>>>>>>> patched
AUTHOR
------
diff --git a/trace-cmd.h b/trace-cmd.h
index a93920f..fbfe3bf 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -265,7 +265,9 @@ void tracecmd_stat_cpu(struct trace_seq *s, int cpu);
long tracecmd_flush_recording(struct tracecmd_recorder *recorder);
/* for clients */
-int tracecmd_msg_send_init_data(int fd);
+int tracecmd_msg_connect_to_server(int fd);
+int tracecmd_msg_send_init_data_net(int fd);
+int tracecmd_msg_send_init_data_virt(int fd);
int tracecmd_msg_metadata_send(int fd, const char *buf, int size);
int tracecmd_msg_finish_sending_metadata(int fd);
void tracecmd_msg_send_close_msg(void);
diff --git a/trace-msg.c b/trace-msg.c
index 717089c..b5173ee 100644
--- a/trace-msg.c
+++ b/trace-msg.c
@@ -30,6 +30,7 @@
#include <stdio.h>
#include <unistd.h>
#include <arpa/inet.h>
+#include <sys/stat.h>
#include <sys/types.h>
#include <linux/types.h>
@@ -70,6 +71,7 @@ int cpu_count;
static int psfd;
unsigned int page_size;
int *client_ports;
+int *virt_sfds;
bool send_metadata;
/* for server */
@@ -270,12 +272,20 @@ static int make_rinit(struct tracecmd_msg *msg)
return 0;
}
+static int make_error_msg(u32 len, struct tracecmd_msg *msg)
+{
+ bufcpy(msg, TRACECMD_MSG_HDR_LEN, errmsg, len);
+ return 0;
+}
+
static u32 tracecmd_msg_get_body_length(u32 cmd)
{
struct tracecmd_msg *msg;
u32 len = 0;
switch (cmd) {
+ case MSG_ERROR:
+ return ntohl(errmsg->size);
case MSG_RCONNECT:
return sizeof(msg->data.rconnect.str.size)
+ sizeof(CONNECT_MSG);
@@ -304,6 +314,7 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
+ sizeof(msg->data.rinit.port_array);
case MSG_SENDMETA:
return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN;
+ case MSG_TCONNECT:
case MSG_CLOSE:
case MSG_FINMETA:
break;
@@ -312,15 +323,18 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
return 0;
}
-static int tracecmd_msg_make_body(u32 cmd, struct tracecmd_msg *msg)
+static int tracecmd_msg_make_body(u32 cmd, u32 len, struct tracecmd_msg *msg)
{
switch (cmd) {
+ case MSG_ERROR:
+ return make_error_msg(len, msg);
case MSG_RCONNECT:
return make_rconnect(CONNECT_MSG, sizeof(CONNECT_MSG), msg);
case MSG_TINIT:
return make_tinit(msg);
case MSG_RINIT:
return make_rinit(msg);
+ case MSG_TCONNECT:
case MSG_CLOSE:
case MSG_SENDMETA: /* meta data is not stored here. */
case MSG_FINMETA:
@@ -345,7 +359,7 @@ static int tracecmd_msg_create(u32 cmd, struct tracecmd_msg **msg)
if (ret < 0)
return ret;
- ret = tracecmd_msg_make_body(cmd, *msg);
+ ret = tracecmd_msg_make_body(cmd, len, *msg);
if (ret < 0)
free(*msg);
@@ -374,6 +388,12 @@ static int tracecmd_msg_send(int fd, u32 cmd)
return ret;
}
+static void tracecmd_msg_send_error(int fd, struct tracecmd_msg *msg)
+{
+ errmsg = msg;
+ tracecmd_msg_send(fd, MSG_ERROR);
+}
+
static int tracecmd_msg_read_extra(int fd, void *buf, u32 size, int *n)
{
int r = 0;
@@ -498,9 +518,10 @@ static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_m
return 0;
}
-int tracecmd_msg_send_init_data(int fd)
+static int tracecmd_msg_send_init_data(int fd, bool net)
{
char buf[TRACECMD_MSG_MAX_LEN];
+ char path[PATH_MAX];
struct tracecmd_msg *msg;
int i, cpus;
int ret;
@@ -511,9 +532,24 @@ int tracecmd_msg_send_init_data(int fd)
return ret;
cpus = ntohl(msg->data.rinit.cpus);
- client_ports = malloc_or_die(sizeof(int) * cpus);
- for (i = 0; i < cpus; i++)
- client_ports[i] = ntohl(msg->data.rinit.port_array[i]);
+ if (net) {
+ client_ports = malloc_or_die(sizeof(int) * cpus);
+ for (i = 0; i < cpus; i++)
+ client_ports[i] =
+ ntohl(msg->data.rinit.port_array[i]);
+ } else {
+ virt_sfds = malloc_or_die(sizeof(int) * cpus);
+
+ /* Open data paths of virtio-serial */
+ for (i = 0; i < cpus; i++) {
+ snprintf(path, PATH_MAX, TRACE_PATH_CPU, i);
+ virt_sfds[i] = open(path, O_WRONLY);
+ if (virt_sfds[i] < 0) {
+ warning("Cannot open %s", TRACE_PATH_CPU, i);
+ return -errno;
+ }
+ }
+ }
/* Next, send meta data */
send_metadata = true;
@@ -521,6 +557,37 @@ int tracecmd_msg_send_init_data(int fd)
return 0;
}
+int tracecmd_msg_send_init_data_net(int fd)
+{
+ return tracecmd_msg_send_init_data(fd, true);
+}
+
+int tracecmd_msg_send_init_data_virt(int fd)
+{
+ return tracecmd_msg_send_init_data(fd, false);
+}
+
+int tracecmd_msg_connect_to_server(int fd)
+{
+ char buf[TRACECMD_MSG_MAX_LEN];
+ struct tracecmd_msg *msg;
+ int ret;
+
+ msg = (struct tracecmd_msg *)buf;
+ /* connect to a server */
+ ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TCONNECT, msg);
+ if (ret < 0) {
+ if (ret == -EPROTONOSUPPORT)
+ goto error;
+ }
+
+ return ret;
+
+error:
+ tracecmd_msg_send_error(fd, msg);
+ return ret;
+}
+
static bool process_option(struct tracecmd_msg_opt *opt)
{
/* currently the only option we have is to us TCP */
diff --git a/trace-msg.h b/trace-msg.h
index b23e72b..502c1bf 100644
--- a/trace-msg.h
+++ b/trace-msg.h
@@ -2,6 +2,9 @@
#define _TRACE_MSG_H_
#include <stdbool.h>
+#define VIRTIO_PORTS "/dev/virtio-ports/"
+#define AGENT_CTL_PATH VIRTIO_PORTS "agent-ctl-path"
+#define TRACE_PATH_CPU VIRTIO_PORTS "trace-path-cpu%d"
#define UDP_MAX_PACKET (65536 - 20)
#define V2_MAGIC "677768\0"
@@ -17,6 +20,7 @@ extern int cpu_count;
extern unsigned int page_size;
extern int *client_ports;
extern bool send_metadata;
+extern int *virt_sfds;
/* for server */
extern bool done;
diff --git a/trace-record.c b/trace-record.c
index 89f4883..8b8f6db 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -95,6 +95,9 @@ static struct tracecmd_output *network_handle;
/* Max size to let a per cpu file get */
static int max_kb;
+struct tracecmd_output *virt_handle;
+static bool virt;
+
static int do_ptrace;
static int filter_task;
@@ -2341,6 +2344,9 @@ static int create_recorder(struct buffer_instance *instance, int cpu,
if (client_ports) {
connect_port(cpu);
recorder = tracecmd_create_recorder_fd(client_ports[cpu], cpu, recorder_flags);
+ } else if (virt_sfds) {
+ recorder = tracecmd_create_recorder_fd(virt_sfds[cpu], cpu,
+ recorder_flags);
} else {
file = get_temp_file(instance, cpu);
recorder = create_recorder_instance(instance, file, cpu, brass);
@@ -2376,7 +2382,7 @@ static void check_first_msg_from_server(int fd)
die("server not tracecmd server");
}
-static void communicate_with_listener_v1(int fd)
+static void communicate_with_listener_v1_net(int fd)
{
char buf[BUFSIZ];
ssize_t n;
@@ -2439,9 +2445,9 @@ static void communicate_with_listener_v1(int fd)
}
}
-static void communicate_with_listener_v2(int fd)
+static void communicate_with_listener_v2_net(int fd)
{
- if (tracecmd_msg_send_init_data(fd) < 0)
+ if (tracecmd_msg_send_init_data_net(fd) < 0)
die("Cannot communicate with server");
}
@@ -2485,6 +2491,15 @@ static void check_protocol_version(int fd)
}
}
+static void communicate_with_listener_virt(int fd)
+{
+ if (tracecmd_msg_connect_to_server(fd) < 0)
+ die("Cannot communicate with server");
+
+ if (tracecmd_msg_send_init_data_virt(fd) < 0)
+ die("Cannot send init data");
+}
+
static void setup_network(void)
{
struct addrinfo hints;
@@ -2540,11 +2555,11 @@ again:
close(sfd);
goto again;
}
- communicate_with_listener_v2(sfd);
+ communicate_with_listener_v2_net(sfd);
}
if (proto_ver == V1_PROTOCOL)
- communicate_with_listener_v1(sfd);
+ communicate_with_listener_v1_net(sfd);
/* Now create the handle through this socket */
network_handle = tracecmd_create_init_fd_glob(sfd, listed_events);
@@ -2555,6 +2570,21 @@ again:
/* OK, we are all set, let'r rip! */
}
+static void setup_virtio(void)
+{
+ int fd;
+
+ fd = open(AGENT_CTL_PATH, O_RDWR);
+ if (fd < 0)
+ die("Cannot open %s", AGENT_CTL_PATH);
+
+ communicate_with_listener_virt(fd);
+
+ /* Now create the handle through this socket */
+ virt_handle = tracecmd_create_init_fd_glob(fd, listed_events);
+ tracecmd_msg_finish_sending_metadata(fd);
+}
+
static void finish_network(void)
{
if (proto_ver == V2_PROTOCOL)
@@ -2563,6 +2593,13 @@ static void finish_network(void)
free(host);
}
+static void finish_virt(void)
+{
+ tracecmd_msg_send_close_msg();
+ free(virt_handle);
+ free(virt_sfds);
+}
+
static void start_threads(enum trace_type type, int global)
{
int profile = (type & TRACE_TYPE_PROFILE) == TRACE_TYPE_PROFILE;
@@ -2573,6 +2610,8 @@ static void start_threads(enum trace_type type, int global)
if (host)
setup_network();
+ else if (virt)
+ setup_virtio();
/* make a thread for every CPU we have */
pids = malloc_or_die(sizeof(*pids) * cpu_count * (buffers + 1));
@@ -2707,6 +2746,9 @@ static void record_data(char *date2ts)
if (host) {
finish_network();
return;
+ } else if (virt) {
+ finish_virt();
+ return;
}
if (latency)
@@ -3774,6 +3816,7 @@ static void add_hook(struct buffer_instance *instance, const char *arg)
}
enum {
+ OPT_virt = 250,
OPT_stderr = 251,
OPT_profile = 252,
OPT_nosplice = 253,
@@ -3942,6 +3985,7 @@ void trace_record (int argc, char **argv)
{"nosplice", no_argument, NULL, OPT_nosplice},
{"profile", no_argument, NULL, OPT_profile},
{"stderr", no_argument, NULL, OPT_stderr},
+ {"virt", no_argument, NULL, OPT_virt},
{"help", no_argument, NULL, '?'},
{NULL, 0, NULL, 0}
};
@@ -4061,6 +4105,8 @@ void trace_record (int argc, char **argv)
case 'o':
if (host)
die("-o incompatible with -N");
+ if (virt)
+ die("-o incompatible with --virt");
if (start)
die("start does not take output\n"
"Did you mean 'record'?");
@@ -4120,6 +4166,8 @@ void trace_record (int argc, char **argv)
case 'N':
if (!record && !extract)
die("-N only available with record or extract");
+ if (virt)
+ die("-N incompatible with --virt");
if (output)
die("-N incompatible with -o");
host = optarg;
@@ -4135,6 +4183,8 @@ void trace_record (int argc, char **argv)
instance->cpumask = optarg;
break;
case 't':
+ if (virt)
+ die("-t incompatible with --virt");
use_tcp = 1;
break;
case 'b':
@@ -4173,6 +4223,17 @@ void trace_record (int argc, char **argv)
close(1);
dup2(2, 1);
break;
+ case OPT_virt:
+ if (!record)
+ die("--virt only available with record");
+ if (host)
+ die("--virt incompatible with -N");
+ if (output)
+ die("--virt incompatible with -o");
+ if (use_tcp)
+ die("--virt incompatible with -t");
+ virt = true;
+ break;
default:
usage(argv);
}
diff --git a/trace-usage.c b/trace-usage.c
index 3d9b821..23cb124 100644
--- a/trace-usage.c
+++ b/trace-usage.c
@@ -19,7 +19,7 @@ static struct usage_help usage_help[] = {
" %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n"
" [-s usecs][-O option ][-l func][-g func][-n func] \\\n"
" [-P pid][-N host:port][-t][-r prio][-b size][-B buf][command ...]\n"
- " [-m max][-C clock]\n"
+ " [-m max][-C clock][--virt]\n"
" -e run command with event enabled\n"
" -f filter for previous -e event\n"
" -R trigger for previous -e event\n"
@@ -51,6 +51,7 @@ static struct usage_help usage_help[] = {
" --profile enable tracing options needed for report --profile\n"
" --func-stack perform a stack trace for function tracer\n"
" (use with caution)\n"
+ " --virt to connect to virt-server\n"
},
{
"start",
From: Yoshihiro YUNOMAE <[email protected]>
Add --dom option which makes a domain directory to virt-server. When a user
already knows domain name of a guest before running virt-server, trace-cmd
should automatically set up I/Fs of the guest. By adding --dom option,
trace-cmd creates a domain directory with 0710 and qemu group.
This patch adds additional options for --dom as follows:
-m <permission>
This option changes the permission of domain directory. If you don't use
this option, the default permission is 0710.
-g <group>
This option changes group of domain directory. If you don't use this option,
the default group is qemu.
-c <cpu>
This option creates trace data I/Fs(trace-path-cpu*.{in,out}) for each CPU
of 'domain'. If you don't use this option, those files are not created.
Here, an example you use this option is written as follows:
- trace-cmd creates a guest1 directory with trace data I/Fs of 2 CPUs.
# trace-cmd virt-server --dom guest1 -c 2
- trace-cmd creates guest2 and guest3 directories
# trace-cmd virt-server --dom guest2 -c 3 --dom guest3 -c 1
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
Changes in V5: Update document.
Changes in V4: Introduce parse_args_virt()
Add usage of virt-server in trace-usage.c
---
Documentation/trace-cmd-virt-server.1.txt | 58 ++++++++---
trace-listen.c | 151 ++++++++++++++++++++++++++---
trace-usage.c | 5 +
3 files changed, 179 insertions(+), 35 deletions(-)
diff --git a/Documentation/trace-cmd-virt-server.1.txt b/Documentation/trace-cmd-virt-server.1.txt
index b775745..19f2b23 100644
--- a/Documentation/trace-cmd-virt-server.1.txt
+++ b/Documentation/trace-cmd-virt-server.1.txt
@@ -34,40 +34,64 @@ OPTIONS
*-l* 'filename'::
This option writes the output messages to a log file instead of standard output.
+*--dom* 'domain'::
+ This option makes a directory for the 'domain'. You can use additional options
+ *-m*, *-g*, *-c* after this option for the 'domain'. If you don't use these
+ additional options, the directory is made as 0710 and qemu group and
+ trace data I/Fs(trace-path-cpu*.{in,out}) are not created.
+
+*-m* 'permission'::
+ This option changes the permission of 'domain' directory. If you don't use
+ this option, the default permission is 0710.
+
+*-g* 'group'::
+ This option changes group of 'domain' directory. If you don't use this option,
+ the default group is qemu.
+
+*-c* 'cpu'::
+ This option creates trace data I/Fs(trace-path-cpu*.{in,out}) for each CPU
+ of 'domain'. If you don't use this option, those files are not created.
+
SETTING
-------
Here, an example is written as follows:
-1. Run virt-server on a host
- # trace-cmd virt-server
-
-2. Make guest domain directory
- # mkdir -p /tmp/trace-cmd/virt/<DOMAIN>
- # chmod 710 /tmp/trace-cmd/virt/<DOMAIN>
- # chgrp qemu /tmp/trace-cmd/virt/<DOMAIN>
-
-3. Make FIFO on the host
- # mkfifo /tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu{0,1,...,X}.{in,out}
+1. Run virt-server with initializing guest interfaces on a host
+ # trace-cmd virt-server --dom "GUEST" -c 2
-4. Set up of virtio-serial pipe of a guest on the host
+2. Set up of virtio-serial pipe of GUEST on the host
Add the following tags to domain XML files.
- # virsh edit <guest domain>
+ # virsh edit "GUEST"
<channel type='unix'>
<source mode='connect' path='/tmp/trace-cmd/virt/agent-ctl-path'/>
<target type='virtio' name='agent-ctl-path'/>
</channel>
<channel type='pipe'>
- <source path='/tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu0'/>
+ <source path='/tmp/trace-cmd/virt/guest1/trace-path-cpu0'/>
<target type='virtio' name='trace-path-cpu0'/>
</channel>
- ... (cpu1, cpu2, ...)
+ <channel type='pipe'>
+ <source path='/tmp/trace-cmd/virt/guest1/trace-path-cpu1'/>
+ <target type='virtio' name='trace-path-cpu1'/>
+ </channel>
-5. Boot the guest
- # virsh start <DOMAIN>
+3. Boot the guest
+ # virsh start "GUEST"
-6. Run the guest's client(see trace-cmd-record(1) with the *--virt* option)
+4. Run the guest1's client(see trace-cmd-record(1) with the *--virt* option)
# trace-cmd record -e sched* --virt
+If you want to boot another guest sends trace-data via virtio-serial,
+you will manually make the guest domain directory and trace data I/Fs.
+
+- Make guest domain directory on the host
+ # mkdir -p /tmp/trace-cmd/virt/<DOMAIN>
+ # chmod 710 /tmp/trace-cmd/virt/<DOMAIN>
+ # chgrp qemu /tmp/trace-cmd/virt/<DOMAIN>
+
+- Make FIFO on the host
+ # mkfifo /tmp/trace-cmd/virt/<DOMAIN>/trace-path-cpu{0,1,...,X}.{in,out}
+
SEE ALSO
--------
trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1),
diff --git a/trace-listen.c b/trace-listen.c
index 718680f..f6d53d2 100644
--- a/trace-listen.c
+++ b/trace-listen.c
@@ -54,11 +54,21 @@ static int backlog = 5;
static int proto_ver;
+struct domain_dir {
+ struct domain_dir *next;
+ char *name;
+ char *group;
+ mode_t perms;
+ int cpu;
+};
+
enum {
NET = 1,
VIRT = 2,
};
+struct domain_dir *dom_dir_list;
+
#define TEMP_FILE_STR_NET "%s.%s:%s.cpu%d", output_file, host, port, cpu
#define TEMP_FILE_STR_VIRT "%s.%s:%d.cpu%d", output_file, domain, virtpid, cpu
static char *get_temp_file(const char *host, const char *port,
@@ -382,7 +392,9 @@ static int open_udp(const char *node, const char *port, int *pid,
#define TRACE_CMD_DIR "/tmp/trace-cmd/"
#define VIRT_DIR TRACE_CMD_DIR "virt/"
#define VIRT_TRACE_CTL_SOCK VIRT_DIR "agent-ctl-path"
-#define TRACE_PATH_DOMAIN_CPU VIRT_DIR "%s/trace-path-cpu%d.out"
+#define VIRT_DOMAIN_DIR VIRT_DIR "%s/"
+#define TRACE_PATH_DOMAIN_CPU_O VIRT_DOMAIN_DIR "trace-path-cpu%d.out"
+#define TRACE_PATH_DOMAIN_CPU_I VIRT_DOMAIN_DIR "trace-path-cpu%d.in"
static int open_virtio_serial_pipe(int *pid, int cpu, int pagesize,
const char *domain, int virtpid)
@@ -390,7 +402,7 @@ static int open_virtio_serial_pipe(int *pid, int cpu, int pagesize,
char buf[PATH_MAX];
int fd;
- snprintf(buf, PATH_MAX, TRACE_PATH_DOMAIN_CPU, domain, cpu);
+ snprintf(buf, PATH_MAX, TRACE_PATH_DOMAIN_CPU_O, domain, cpu);
fd = open(buf, O_RDONLY | O_NONBLOCK);
if (fd < 0) {
warning("open %s", buf);
@@ -998,27 +1010,89 @@ static void do_listen_net(char *port)
kill_clients();
}
-static void make_virt_if_dir(void)
+#define for_each_domain(i) for (i = dom_dir_list; i; i = (i)->next)
+
+static void make_dir_virt(const char *path, mode_t perms, const char *gr_name)
{
struct group *group;
- if (mkdir(TRACE_CMD_DIR, 0710) < 0) {
+ if (mkdir(path, perms) < 0) {
if (errno != EEXIST)
- pdie("mkdir %s", TRACE_CMD_DIR);
+ pdie("mkdir %s", path);
}
- /* QEMU operates as qemu:qemu */
- chmod(TRACE_CMD_DIR, 0710);
- group = getgrnam("qemu");
- if (chown(TRACE_CMD_DIR, -1, group->gr_gid) < 0)
- pdie("chown %s", TRACE_CMD_DIR);
+ chmod(path, perms);
- if (mkdir(VIRT_DIR, 0710) < 0) {
- if (errno != EEXIST)
- pdie("mkdir %s", VIRT_DIR);
+ group = getgrnam(gr_name);
+ if (!group)
+ pdie("getgrnam %s", gr_name);
+ if (chown(path, -1, group->gr_gid) < 0)
+ pdie("chown %s", path);
+}
+
+static void make_traceif_in_dom_dir(const char *name, int cpu)
+{
+ char fifo_in[PATH_MAX];
+ char fifo_out[PATH_MAX];
+ int i;
+
+ for (i = 0; i < cpu; i++) {
+ snprintf(fifo_in, PATH_MAX, TRACE_PATH_DOMAIN_CPU_I, name, i);
+ snprintf(fifo_out, PATH_MAX, TRACE_PATH_DOMAIN_CPU_O, name, i);
+ if (mkfifo(fifo_in, 0644) < 0) {
+ if (errno != EEXIST)
+ pdie("mkfifo %s", fifo_in);
+ }
+ if (mkfifo(fifo_out, 0644) < 0) {
+ if (errno != EEXIST)
+ pdie("mkfifo %s", fifo_out);
+ }
}
- chmod(VIRT_DIR, 0710);
- if (chown(VIRT_DIR, -1, group->gr_gid) < 0)
- pdie("chown %s", VIRT_DIR);
+ plog("CPUS: %d\n", cpu);
+}
+
+static void make_domain_dirs(void)
+{
+ struct domain_dir *dom_dir;
+ char gr_name[5] = "qemu";
+ char buf[PATH_MAX];
+ mode_t perms;
+
+ for_each_domain(dom_dir) {
+ snprintf(buf, PATH_MAX, VIRT_DOMAIN_DIR, dom_dir->name);
+
+ if (dom_dir->perms)
+ perms = dom_dir->perms;
+ else
+ perms = 0710;
+
+ if (dom_dir->group)
+ make_dir_virt(buf, perms, dom_dir->group);
+ else
+ make_dir_virt(buf, perms, gr_name);
+
+ plog("---\n"
+ "Process Directory: %s\n"
+ "Directory permission: %o\n"
+ "Group: %s\n", buf, perms, dom_dir->group ? dom_dir->group : gr_name);
+
+ if (dom_dir->cpu)
+ make_traceif_in_dom_dir(dom_dir->name, dom_dir->cpu);
+ }
+
+ plog("---\n");
+ free(dom_dir_list);
+}
+
+static void make_virt_if_dir(void)
+{
+ char gr_name[5] = "qemu";
+
+ /* QEMU operates as qemu:qemu */
+ make_dir_virt(TRACE_CMD_DIR, 0710, gr_name);
+ make_dir_virt(VIRT_DIR, 0710, gr_name);
+
+ if (dom_dir_list)
+ make_domain_dirs();
}
static void do_listen_virt(void)
@@ -1060,7 +1134,14 @@ static void start_daemon(void)
die("starting daemon");
}
+static void add_dom_dir(struct domain_dir *dom_dir)
+{
+ dom_dir->next = dom_dir_list;
+ dom_dir_list = dom_dir;
+}
+
enum {
+ OPT_dom = 254,
OPT_debug = 255,
};
@@ -1075,6 +1156,37 @@ static void parse_args_net(int c, char **argv, char **port)
}
}
+static void parse_args_virt(int c, char **argv)
+{
+ static struct domain_dir *dom_dir;
+
+ switch (c) {
+ case 'm':
+ if (!dom_dir)
+ die("-m needs --dom <domain>");
+ dom_dir->perms = strtol(optarg, NULL, 8);
+ break;
+ case 'g':
+ if (!dom_dir)
+ die("-g needs --dom <domain>");
+ dom_dir->group = optarg;
+ break;
+ case 'c':
+ if (!dom_dir)
+ die("-c needs --dom <domain>");
+ dom_dir->cpu = atoi(optarg);
+ break;
+ case OPT_dom:
+ dom_dir = malloc_or_die(sizeof(*dom_dir));
+ memset(dom_dir, 0, sizeof(*dom_dir));
+ dom_dir->name = optarg;
+ add_dom_dir(dom_dir);
+ break;
+ default:
+ usage(argv);
+ }
+}
+
void trace_listen(int argc, char **argv)
{
char *logfile = NULL;
@@ -1097,12 +1209,13 @@ void trace_listen(int argc, char **argv)
int option_index = 0;
static struct option long_options[] = {
{"port", required_argument, NULL, 'p'},
+ {"dom", required_argument, NULL, OPT_dom},
{"help", no_argument, NULL, '?'},
{"debug", no_argument, NULL, OPT_debug},
{NULL, 0, NULL, 0}
};
- c = getopt_long (argc-1, argv+1, "+hp:o:d:l:D",
+ c = getopt_long (argc-1, argv+1, "+hp:o:d:l:Dm:g:c:",
long_options, &option_index);
if (c == -1)
break;
@@ -1128,12 +1241,14 @@ void trace_listen(int argc, char **argv)
default:
if (mode == NET)
parse_args_net(c, argv, &port);
+ else if (mode == VIRT)
+ parse_args_virt(c, argv);
else
usage(argv);
}
}
- if (!port && mode == NET)
+ if (!port && (mode == NET))
usage(argv);
if ((argc - optind) >= 2)
diff --git a/trace-usage.c b/trace-usage.c
index 23cb124..caba1f9 100644
--- a/trace-usage.c
+++ b/trace-usage.c
@@ -216,11 +216,16 @@ static struct usage_help usage_help[] = {
"virt-server",
"listen on a virtio-serial for trace clients",
" %s virt-server [-o file][-d dir][-l logfile]\n"
+ " [--dom domain [-m permisson] [-g group] [-c cpu]]\n"
" Creates a socket to listen for clients.\n"
" -D create it in daemon mode.\n"
" -o file name to use for clients.\n"
" -d diretory to store client files.\n"
" -l logfile to write messages to.\n"
+ " --dom create domain direcroty in /tmp/trace-cmd/virt and folling directory permissions/group names and FIFO files will be changed here\n"
+ " -m changes the permission of domain directory.\n"
+ " -g changes group of domain directory.\n"
+ " -c creates trace data I/F(trace-path-cpu*.{in, out} files) in domain directory.\n"
},
{
"list",
From: Yoshihiro YUNOMAE <[email protected]>
Use pid instead of domain name if libvirt is not found.
This also fix the trace fifo lookup routine to find it
from /proc/<PID>/fd.
With this change, we can use trace-cmd virt-server for
directly running qemu virtual machines.
Signed-off-by: Yoshihiro YUNOMAE <[email protected]>
Signed-off-by: Masami Hiramatsu <[email protected]>
---
trace-listen.c | 47 ++++++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 13 deletions(-)
diff --git a/trace-listen.c b/trace-listen.c
index f6d53d2..f40b3a5 100644
--- a/trace-listen.c
+++ b/trace-listen.c
@@ -35,6 +35,7 @@
#include <fcntl.h>
#include <signal.h>
#include <errno.h>
+#include <fnmatch.h>
#include "trace-local.h"
#include "trace-msg.h"
@@ -396,20 +397,41 @@ static int open_udp(const char *node, const char *port, int *pid,
#define TRACE_PATH_DOMAIN_CPU_O VIRT_DOMAIN_DIR "trace-path-cpu%d.out"
#define TRACE_PATH_DOMAIN_CPU_I VIRT_DOMAIN_DIR "trace-path-cpu%d.in"
+#define TRACE_PATH_PATTERN VIRT_DIR "*/trace-path-cpu%d.out"
+
static int open_virtio_serial_pipe(int *pid, int cpu, int pagesize,
const char *domain, int virtpid)
{
+ char path[PATH_MAX];
char buf[PATH_MAX];
- int fd;
-
- snprintf(buf, PATH_MAX, TRACE_PATH_DOMAIN_CPU_O, domain, cpu);
- fd = open(buf, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- warning("open %s", buf);
- return fd;
+ int fd = -ENOENT;
+ DIR *dir;
+ struct dirent *ent;
+ struct stat st;
+
+ snprintf(path, PATH_MAX, "/proc/%d/fd", virtpid);
+ dir = opendir(path);
+ if (!dir)
+ return -errno;
+ while ((ent = readdir(dir)) != NULL) {
+ snprintf(path, PATH_MAX, "/proc/%d/fd/%s",
+ virtpid, ent->d_name);
+ if (readlink(path, buf, PATH_MAX) < 0)
+ continue;
+ snprintf(path, PATH_MAX, TRACE_PATH_PATTERN, cpu);
+ /* Find the pipe which matchs pattern */
+ if ((fnmatch(path, buf, FNM_PATHNAME) == 0) &&
+ (stat(buf, &st) == 0) && (S_ISFIFO(st.st_mode))) {
+ fd = open(buf, O_RDONLY | O_NONBLOCK);
+ break;
+ }
}
+ closedir(dir);
- fork_virt_reader(fd, pid, cpu, pagesize, domain, virtpid);
+ if (fd < 0)
+ warning("open %s: %d", buf, fd);
+ else
+ fork_virt_reader(fd, pid, cpu, pagesize, domain, virtpid);
return fd;
}
@@ -762,6 +784,7 @@ static int do_fork(int cfd)
return 0;
}
+/* Get client (VM) pid from unix domain socket */
static int get_virtpid(int cfd)
{
struct ucred cr;
@@ -790,11 +813,8 @@ static char *get_guest_domain_from_pid(int pid)
int fd;
dir = opendir(LIBVIRT_DOMAIN_PATH);
- if (!dir) {
- if (errno == ENOENT)
- warning("Only support for using libvirt");
+ if (!dir)
return NULL;
- }
for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
snprintf(file_name, NAME_MAX, LIBVIRT_DOMAIN_PATH"%s",
@@ -837,7 +857,8 @@ static int do_connection(int cfd, struct sockaddr *peer_addr,
domain = get_guest_domain_from_pid(virtpid);
if (!domain)
- return -1;
+ if (asprintf(&domain, "%d", virtpid) < 0)
+ return -ENOMEM;
}
ret = do_fork(cfd);