Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id ; Sat, 19 Oct 2002 19:16:56 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id ; Sat, 19 Oct 2002 19:13:01 -0400 Received: from nameservices.net ([208.234.25.16]:25209 "EHLO opersys.com") by vger.kernel.org with ESMTP id ; Sat, 19 Oct 2002 19:07:48 -0400 Message-ID: <3DB1E86A.20570EDA@opersys.com> Date: Sat, 19 Oct 2002 19:19:06 -0400 From: Karim Yaghmour Reply-To: karim@opersys.com Organization: Opersys inc. X-Mailer: Mozilla 4.79 [en] (X11; U; Linux 2.4.19 i686) X-Accept-Language: en MIME-Version: 1.0 To: linux-kernel , LTT-Dev Subject: [PATCH] LTT for 2.5.44 3/10: Trace subsystem 2/2 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 57074 Lines: 1859 [This is the second part of kernel/trace.c] + +/** + * init_buffer_control: - Init buffer control struct for new tracing run. + * @buf_ctrl: buffer control struct to be initialized + * @use_lockless: which tracing scheme to use, 1 for lockless + * @buffer_number_bits: number of bits in index word for buffer number + * @offset_bits: number of bits in index word to use for buffer offset + * + * Sanity of param values should be checked by caller. i.e. bufno_bits and + * offset_bits must reflect sane buffer sizes/numbers. + */ +static void init_buffer_control(struct buffer_control * buf_ctrl, + int use_lockless, + u8 buffer_number_bits, + u8 offset_bits) +{ + unsigned i, j; + int n_buffers = TRACE_MAX_BUFFER_NUMBER(buffer_number_bits); + + using_lockless = use_lockless; + buffer_switches_pending = 0; + + for(i = 0; i < num_cpus; i++) { + _buffer_id(buf_ctrl, i) = 0; + _events_lost(buf_ctrl, i) = 0; + /* Set things up to trigger per-cpu initialization */ + if(using_tsc) + atomic_set(&_waiting_for_cpu(buf_ctrl, i), + LTT_INITIALIZE_TRACE); + else + atomic_set(&_waiting_for_cpu(buf_ctrl, i), + LTT_NOTHING_TO_DO); + atomic_set(&_waiting_for_cpu_async(buf_ctrl, i), + LTT_NOTHING_TO_DO); + _trace_buffer(buf_ctrl, i) = trace_buf + (i * cpu_buf_size); + + if(using_lockless == 0) { + atomic_set(&_signal_sent(buf_ctrl, i), 0); + write_buf(i) = trace_buffer(i); + read_buf(i) = trace_buffer(i) + buf_size; + write_buf_end(i) = write_buf(i) + buf_size; + read_buf_end(i) = read_buf(i) + buf_size; + current_write_pos(i) = write_buf(i); + read_limit(i) = read_buf(i); + write_limit(i) = write_buf_end(i) + - TRACER_LAST_EVENT_SIZE; + } else { + _index(buf_ctrl, i) = start_reserve; + _bufno_bits(buf_ctrl, i) = buffer_number_bits; + _n_buffers(buf_ctrl, i) = + TRACE_MAX_BUFFER_NUMBER(buffer_number_bits); + _offset_bits(buf_ctrl, i) = offset_bits; + _offset_mask(buf_ctrl, i) = + TRACE_BUFFER_OFFSET_MASK(offset_bits); + _index_mask(buf_ctrl, i) = + (1UL << (buffer_number_bits + offset_bits)) - 1; + _buffers_produced(buf_ctrl, i) = 0; + _buffers_consumed(buf_ctrl, i) = 0; + _buffers_full(buf_ctrl, i) = 0; + + /* When a new buffer is switched to, TRACE_BUFFER_SIZE + is subtracted from its fill_count in order to + initialize it to the empty state. The reason it's + done this way is because an intervening event may + have already been written to the buffer while we + were in the process of switching and thus blindly + initializing to 0 would erase that event. The first + buffer is initialized to 0 and the others are + initialized to TRACE_BUFFER_SIZE because the very + first buffer we ever see won't be initialized in + that way by the switching code and since there's + never been an event, we know it should be 0 and that + it must be explicitly initialized that way before + logging begins. sStartReserve is is factored into + the end-of-buffer processing, so isn't added to the + fill counts here, except for the first. */ + atomic_set(&_fill_count(buf_ctrl, i, 0), + (int)start_reserve); + for(j = 1; j < n_buffers; j++) + atomic_set(&_fill_count(buf_ctrl, i, j), + (int)TRACE_BUFFER_SIZE(offset_bits)); + + } + } +} + +/** + * trace: - Tracing function per se. + * @event_id: ID of event as defined in linux/trace.h + * @event_struct: struct describing the event + * @cpu_id: the CPU associated with the event + * + * Returns: + * 0, if everything went OK (event got registered) + * -ENODEV, no tracing daemon opened the driver. + * -ENOMEM, no more memory to store events. + * -EBUSY, tracer not started yet. + */ +int trace(u8 event_id, + void *event_struct, + u8 cpu_id) +{ + int var_data_len = 0; /* Length of variable length data to be copied, if any */ + void *var_data_beg = NULL; /* Begining of variable length data to be copied */ + int send_signal = 0; /* Should the daemon be summoned */ + uint16_t data_size; /* Size of tracing data */ + struct siginfo daemon_sig_info; /* Signal information */ + struct timeval time_stamp; /* Event time */ + unsigned long int flags; /* CPU flags for lock */ + trace_time_delta time_delta; /* The time elapsed between now and the last event */ + struct task_struct *incoming_process = NULL; /* Pointer to incoming process */ + + /* Is there a tracing daemon */ + if (daemon_task_struct == NULL) + return -ENODEV; + + /* Execute any tasks waiting for this CPU */ + if(atomic_read(&waiting_for_cpu(cpu_id)) != 0) + do_waiting_tasks(cpu_id); + + /* Do we trace the event */ + if ((tracer_started == 1) || (event_id == TRACE_EV_START) || (event_id == TRACE_EV_BUFFER_START)) + goto TraceEvent; + + return -EBUSY; + +TraceEvent: + /* Are we monitoring this event */ + if (!ltt_test_bit(event_id, &traced_events)) + return 0; + + /* Always let the start event pass, whatever the IDs */ + if ((event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START)) { + /* Is this a scheduling change */ + if (event_id == TRACE_EV_SCHEDCHANGE) { + /* Get pointer to incoming process */ + incoming_process = (struct task_struct *) (((trace_schedchange *) event_struct)->in); + + /* Set PID information in schedchange event */ + (((trace_schedchange *) event_struct)->in) = incoming_process->pid; + } + /* Are we monitoring a particular process */ + if ((tracing_pid == 1) && (current->pid != traced_pid)) { + /* Record this event if it is the scheduling change bringing in the traced PID */ + if (incoming_process == NULL) + return 0; + else if (incoming_process->pid != traced_pid) + return 0; + } + /* Are we monitoring a particular process group */ + if ((tracing_pgrp == 1) && (current->pgrp != traced_pgrp)) { + /* Record this event if it is the scheduling change bringing in a process of the traced PGRP */ + if (incoming_process == NULL) + return 0; + else if (incoming_process->pgrp != traced_pgrp) + return 0; + } + /* Are we monitoring the processes of a given group of users */ + if ((tracing_gid == 1) && (current->egid != traced_gid)) { + /* Record this event if it is the scheduling change bringing in a process of the traced GID */ + if (incoming_process == NULL) + return 0; + else if (incoming_process->egid != traced_gid) + return 0; + } + /* Are we monitoring the processes of a given user */ + if ((tracing_uid == 1) && (current->euid != traced_uid)) { + /* Record this event if it is the scheduling change bringing in a process of the traced UID */ + if (incoming_process == NULL) + return 0; + else if (incoming_process->euid != traced_uid) + return 0; + } + } + + /* Compute size of tracing data */ + data_size = sizeof(event_id) + sizeof(time_delta) + sizeof(data_size); + + /* Do we log the event details */ + if (ltt_test_bit(event_id, &log_event_details_mask)) { + /* Update the size of the data entry */ + data_size += event_struct_size[event_id]; + + /* Some events have variable length */ + switch (event_id) { + /* Is there a file name in this */ + case TRACE_EV_FILE_SYSTEM: + if ((((trace_file_system *) event_struct)->event_sub_id == TRACE_EV_FILE_SYSTEM_EXEC) + || (((trace_file_system *) event_struct)->event_sub_id == TRACE_EV_FILE_SYSTEM_OPEN)) { + /* Remember the string's begining and update size variables */ + var_data_beg = ((trace_file_system *) event_struct)->file_name; + var_data_len = ((trace_file_system *) event_struct)->event_data2 + 1; + data_size += (uint16_t) var_data_len; + } + break; + + /* Logging of a custom event */ + case TRACE_EV_CUSTOM: + var_data_beg = ((trace_custom *) event_struct)->data; + var_data_len = ((trace_custom *) event_struct)->data_size; + data_size += (uint16_t) var_data_len; + break; + } + } + + /* Do we record the CPUID */ + if ((log_cpuid == 1) && (event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START)) { + /* Update the size of the data entry */ + data_size += sizeof(cpu_id); + } + + /* If we're using the lockless scheme, we preempt the default path + here - nothing after this point in this function will be executed. + Note that even if we do have cmpxchg, we still want to have a + choice between the lock-free and locking schemes at run-time, thus + the using_lockless check. This used to be implemented as a kernel + hook, and will be again when/if kernel hooks are accepted into the + kernel. */ + if(using_lockless && have_cmpxchg()) + return lockless_write_event(event_id, + event_struct, + data_size, + cpu_id, + var_data_beg, + var_data_len); + + /* Disable interrupts on this CPU */ + local_irq_save(flags); + + /* The following time calculations have to be done with interrupts disabled because + otherwise the event order could be inverted. */ + + /* Get the time of the event */ + time_delta = get_time_delta(&time_stamp, cpu_id); + + /* Is there enough space left in the write buffer */ + if (current_write_pos(cpu_id) + data_size > write_limit(cpu_id)) { + /* Have we already switched buffers and informed the daemon of it */ + if (atomic_read(&signal_sent(cpu_id)) == 1) { + /* We've lost another event */ + (events_lost(cpu_id))++; + + /* Bye, bye, now */ + local_irq_restore(flags); + return -ENOMEM; + } + /* We need to inform the daemon */ + send_signal = 1; + + /* Get the time and TSC of the start/end buffer event */ + get_timestamp(&time_stamp, &time_delta); + + /* Switch buffers, pass lTimeDelta in case it's really a TSC */ + tracer_switch_buffers(time_stamp, time_delta, cpu_id); + + /* Recompute the time delta since buffer_start_time has changed because of the buffer change */ + recalc_time_delta(&time_stamp, &time_delta, cpu_id); + } + + /* Write the CPUID to the tracing buffer, if required */ + if ((log_cpuid == 1) && (event_id != TRACE_EV_START) && (event_id != TRACE_EV_BUFFER_START)) + tracer_write_to_buffer(current_write_pos(cpu_id), + &cpu_id, + sizeof(cpu_id)); + + /* Write event type to tracing buffer */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &event_id, + sizeof(event_id)); + + /* Write event time delta to tracing buffer */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &time_delta, + sizeof(time_delta)); + + /* Do we log event details */ + if (ltt_test_bit(event_id, &log_event_details_mask)) { + /* Write event structure */ + tracer_write_to_buffer(current_write_pos(cpu_id), + event_struct, + event_struct_size[event_id]); + + /* Write string if any */ + if (var_data_len) + tracer_write_to_buffer(current_write_pos(cpu_id), + var_data_beg, + var_data_len); + } + /* Write the length of the event description */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &data_size, + sizeof(data_size)); + + /* Should the tracing daemon be notified */ + if (send_signal == 1) { + /* Remember that a signal has been sent */ + atomic_set(&signal_sent(cpu_id), 1); + + /* Atomically mark buffer-switch bit for this cpu */ + set_bit(cpu_id, &buffer_switches_pending); + + /* Restore interrupts on this CPU */ + local_irq_restore(flags); + + /* Setup signal information */ + daemon_sig_info.si_signo = SIGIO; + daemon_sig_info.si_errno = 0; + daemon_sig_info.si_code = SI_KERNEL; + + /* Signal the tracing daemon */ + send_sig_info(SIGIO, &daemon_sig_info, daemon_task_struct); + } else + /* Restore interrupts on this CPU */ + local_irq_restore(flags); + return 0; +} + +/** + * tracer_switch_buffers: - Switches between read and write buffers. + * @current_time: current time. + * @current_tsc: the TSC associated with current_time, if applicable + * @cpu_id: the CPU associated with the event + * + * Put the current write buffer to be read and reset put the old read + * buffer to be written to. Set the tracer variables in consequence. + * + * No return values. + * + * This should be called from with interrupts disabled. + */ +void tracer_switch_buffers(struct timeval current_time, + trace_time_delta current_tsc, + u8 cpu_id) +{ + char *temp_buf; /* Temporary buffer pointer */ + char *temp_buf_end; /* Temporary buffer end pointer */ + char *init_write_pos; /* Initial write position */ + u8 event_id; /* Event ID of last event */ + uint16_t data_size; /* Size of tracing data */ + u32 size_lost; /* Size delta between last event and end of buffer */ + trace_time_delta time_delta; /* The time elapsed between now and the last event */ + trace_buffer_start start_buffer_event; /* Start of the new buffer event */ + trace_buffer_end end_buffer_event; /* End of buffer event */ + + /* Remember initial write position */ + init_write_pos = current_write_pos(cpu_id); + + /* Write the end event at the write of the buffer */ + end_buffer_event.time = current_time; + end_buffer_event.tsc = current_tsc; + + /* Write the CPUID to the tracing buffer, if required */ + if (log_cpuid == 1) { + tracer_write_to_buffer(current_write_pos(cpu_id), + &cpu_id, + sizeof(cpu_id)); + } + /* Write event type to tracing buffer */ + event_id = TRACE_EV_BUFFER_END; + tracer_write_to_buffer(current_write_pos(cpu_id), + &event_id, + sizeof(event_id)); + + /* Write event time delta/TSC to tracing buffer */ + time_delta = switch_time_delta(current_tsc); + tracer_write_to_buffer(current_write_pos(cpu_id), + &time_delta, + sizeof(time_delta)); + + /* Write event structure */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &end_buffer_event, + sizeof(end_buffer_event)); + + /* Compute the data size */ + data_size = sizeof(event_id) + + sizeof(time_delta) + + sizeof(end_buffer_event) + + sizeof(data_size); + + /* Write the length of the event description */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &data_size, + sizeof(data_size)); + + /* Get size lost */ + size_lost = write_buf_end(cpu_id) - init_write_pos; + + /* Write size lost at the end of the buffer */ + *((u32 *) (write_buf_end(cpu_id) - sizeof(size_lost))) = size_lost; + + /* Switch buffers */ + temp_buf = read_buf(cpu_id); + read_buf(cpu_id) = write_buf(cpu_id); + write_buf(cpu_id) = temp_buf; + + /* Set buffer ends */ + temp_buf_end = read_buf_end(cpu_id); + read_buf_end(cpu_id) = write_buf_end(cpu_id); + write_buf_end(cpu_id) = temp_buf_end; + + /* Set read limit */ + read_limit(cpu_id) = read_buf_end(cpu_id); + + /* Set write limit */ + write_limit(cpu_id) = write_buf_end(cpu_id) - TRACER_LAST_EVENT_SIZE; + + /* Set write position */ + current_write_pos(cpu_id) = write_buf(cpu_id); + + /* Increment buffer ID */ + (buffer_id(cpu_id))++; + + /* Set the time/TSC of beginning of this buffer */ + buffer_start_time(cpu_id) = current_time; + buffer_start_tsc(cpu_id) = current_tsc; + + /* Write the start of buffer event */ + start_buffer_event.id = buffer_id(cpu_id); + start_buffer_event.time = current_time; + start_buffer_event.tsc = current_tsc; + + /* Write event type to tracing buffer */ + event_id = TRACE_EV_BUFFER_START; + tracer_write_to_buffer(current_write_pos(cpu_id), + &event_id, + sizeof(event_id)); + + /* Write event time delta to tracing buffer */ + time_delta = switch_time_delta(current_tsc); + tracer_write_to_buffer(current_write_pos(cpu_id), + &time_delta, + sizeof(time_delta)); + + /* Write event structure */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &start_buffer_event, + sizeof(start_buffer_event)); + + /* Compute the data size */ + data_size = sizeof(event_id) + + sizeof(time_delta) + + sizeof(start_buffer_event) + + sizeof(data_size); + + /* Write the length of the event description */ + tracer_write_to_buffer(current_write_pos(cpu_id), + &data_size, + sizeof(data_size)); +} + +/** + * update_shared_buffer_control: - prepare for GET_BUFFER_CONTROL ioctl + * @cpu_id: the CPU associated with the ioctl + * + * Copies buffer control data into a common format that can be shared + * between the tracer and the daemon, allowing alignment to be ignored. + */ +static inline void update_shared_buffer_control(u8 cpu_id) +{ + int i, n_buffers; + + shared_buffer_control.cpu_id = cpu_id; + + /* Let the caller know if there are more buffer switches to process + AFTER this one */ + shared_buffer_control.buffer_switches_pending = + buffer_switches_pending & ~(1UL << cpu_id); + shared_buffer_control.buffer_control_valid = 1; + if(using_lockless) { + shared_buffer_control.bufno_bits = bufno_bits(cpu_id); + shared_buffer_control.offset_bits = offset_bits(cpu_id); + shared_buffer_control.buffers_produced = + buffers_produced(cpu_id); + shared_buffer_control.buffers_consumed = + buffers_consumed(cpu_id); + n_buffers = TRACE_MAX_BUFFER_NUMBER(buf_no_bits); + for(i = 0; i < n_buffers; i++) { + shared_buffer_control.fill_count[i] = + atomic_read(&fill_count(cpu_id, i)); + } + } +} + +/** + * sys_trace: - Tracing system call + * + * @tracer_handle: tracing mechanism handle + * @tracer_command: command given by the caller + * @command_arg1: argument "1" to the command + * @command_arg2: argument "2" to the command + * + * Returns: + * >0, In case the caller requested the number of events lost. + * 0, Everything went OK + * -ENOSYS, no such command + * -EINVAL, tracer not properly configured + * -EBUSY, tracer can't be reconfigured while in operation + * -ENOMEM, no more memory + * -EFAULT, unable to access user space memory + * -EACCES, invalid tracer handle + */ +asmlinkage int sys_trace(unsigned int tracer_handle, + unsigned int tracer_command, + unsigned long command_arg1, + unsigned long command_arg2) +{ + int retval; /* Function return value */ + int new_user_event_id; /* ID of newly created user event */ + unsigned long mmap_start_addr; /* Start address of buffer in process space */ + unsigned long int flags; /* CPU flags for lock */ + u8 cpu_id; /* Current CPU */ + u8 i; /* Counter */ + u32 buffers_consumed; /* # buffers consumed */ + trace_custom user_event; /* The user event to be logged */ + trace_change_mask trace_mask; /* Event mask */ + trace_new_event new_user_event; /* The event to be created for the user */ + struct timeval current_time; /* The time elapsed between now and the last event */ + trace_time_delta current_tsc; /* The time elapsed between now and the last event */ + struct buffers_committed buffers_committed; /* For COMMITTED case */ + + /* Is this a handle request */ + if (tracer_command == TRACER_ALLOC_HANDLE) + return trace_alloc_handle(tracer_handle); + + /* Is the handle provided valid? */ + if (!trace_valid_handle(tracer_handle)) + return -EACCES; + + /* If the tracer is started, the daemon can't modify the configuration */ + if ((tracer_handle == 0) + && (tracer_started == 1) + && (tracer_command != TRACER_STOP) + && (tracer_command != TRACER_DATA_COMITTED) + && (tracer_command != TRACER_GET_BUFFER_CONTROL)) + return -EBUSY; + + /* Only some operations are permitted to user processes trying to log events */ + if ((tracer_handle > 1) + && (tracer_command != TRACER_CREATE_USER_EVENT) + && (tracer_command != TRACER_DESTROY_USER_EVENT) + && (tracer_command != TRACER_TRACE_USER_EVENT) + && (tracer_command != TRACER_SET_EVENT_MASK) + && (tracer_command != TRACER_GET_EVENT_MASK)) + return -ENOSYS; + + + /* Depending on the command executed */ + switch (tracer_command) { + /* Start the tracer */ + case TRACER_START: + /* Start the heartbeat timer */ + init_heartbeat_timer(); + init_percpu_timers(); + + /* Initialize buffer control regardless of scheme in use */ + init_buffer_control(buffer_control, + !use_locking, /* using_lockless */ + buf_no_bits, /* bufno_bits, 2**n */ + buf_offset_bits); /* offset_bits, 2**n */ + + /* Check if the device has been properly set up */ + if (((use_syscall_eip_bounds == 1) + && (syscall_eip_depth_set == 1)) + || ((use_syscall_eip_bounds == 1) + && ((lower_eip_bound_set != 1) + || (upper_eip_bound_set != 1))) + || ((tracing_pid == 1) + && (tracing_pgrp == 1))) + return -EINVAL; + + /* Set the kernel-side trace configuration */ + if (trace_set_config(syscall_eip_depth_set, + use_syscall_eip_bounds, + syscall_eip_depth, + lower_eip_bound, + upper_eip_bound) < 0) + return -EINVAL; + + /* Always log the start event and the buffer start event */ + ltt_set_bit(TRACE_EV_BUFFER_START, &traced_events); + ltt_set_bit(TRACE_EV_BUFFER_START, &log_event_details_mask); + ltt_set_bit(TRACE_EV_START, &traced_events); + ltt_set_bit(TRACE_EV_START, &log_event_details_mask); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &traced_events); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &log_event_details_mask); + + /* If we're not using TSC, then we can initialize all now */ + if(using_tsc == 0) + for(i = 0; i < num_cpus; i++) + initialize_trace(i); + + /* Start tapping into Linux's syscall flow */ + syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &traced_events); + syscall_exit_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &traced_events); + + /* We can start tracing */ + tracer_stopping = 0; + tracer_started = 1; + + /* Reregister custom trace events created earlier */ + trace_reregister_custom_events(); + + break; + + /* Stop the tracer */ + case TRACER_STOP: + /* Stop heartbeat timer if we were using it */ + if(using_tsc == 1) + del_timer(&heartbeat_timer); + + /* Stop tracing */ + /* We don't log new events, but old lockless ones can finish */ + tracer_stopping = 1; + tracer_started = 0; + + /* Stop interrupting the normal flow of system calls */ + syscall_entry_trace_active = 0; + syscall_exit_trace_active = 0; + + /* Make sure the last buffer touched is finalized */ + if(using_lockless) { + /* If we're not using TSC, we can finalize all now */ + /* Write end buffer event as last event in old buf. */ + if(using_tsc == 0) { + for(i = 0; i < num_cpus; i++) + finalize_lockless_trace(i); + tracer_stopping = 0; + } else + for(i = 0; i < num_cpus; i++) + set_waiting_for_cpu_async(i, LTT_FINALIZE_TRACE); + break; + } /* Else locking scheme */ + + /* Acquire the lock to avoid SMP case of where another CPU is writing a trace + while buffer is being switched */ + spin_lock_irqsave(&trace_spin_lock, flags); + + if(using_tsc == 0) { + /* Get the time of the event */ + get_timestamp(¤t_time, ¤t_tsc); + + /* If we're not using TSC, we can finalize all now */ + for(i = 0; i < num_cpus; i++) { + /* Atomically mark buffer-switch bit for cpu */ + set_bit(i, &buffer_switches_pending); + + /* Switch the buffers to ensure that the end + of the buffer mark is set */ + tracer_switch_buffers(current_time, + current_tsc, i); + } + tracer_stopping = 0; + } else { + for(i = 0; i < num_cpus; i++) + set_waiting_for_cpu_async(i, LTT_FINALIZE_TRACE); + } + + /* Release lock */ + spin_unlock_irqrestore(&trace_spin_lock, flags); + break; + + /* Set the tracer to the default configuration */ + case TRACER_CONFIG_DEFAULT: + tracer_set_default_config(); + break; + + /* Set the memory buffers the daemon wants us to use */ + case TRACER_CONFIG_MEMORY_BUFFERS: + /* Is the given size "reasonable" */ + if (use_locking == 1) { + if (command_arg1 < TRACER_MIN_BUF_SIZE) + return -EINVAL; + } else { + if ((command_arg1 < TRACER_LOCKLESS_MIN_BUF_SIZE) || + (command_arg1 > TRACER_LOCKLESS_MAX_BUF_SIZE)) + return -EINVAL; + } + + /* Set the buffer's size */ + return tracer_set_buffer_size(command_arg1); + break; + + /* Set the number of memory buffers the daemon wants us to use */ + case TRACER_CONFIG_N_MEMORY_BUFFERS: + /* Is the given size "reasonable" */ + if ((use_locking == 1) || (command_arg1 < TRACER_MIN_BUFFERS) || + (command_arg1 > TRACER_MAX_BUFFERS)) + return -EINVAL; + + /* Set the number of buffers */ + return tracer_set_n_buffers(command_arg1); + break; + + /* Set locking scheme the daemon wants us to use */ + case TRACER_CONFIG_USE_LOCKING: + /* Set the locking scheme in a global for later */ + use_locking = command_arg1; + if((use_locking == 0) && (have_cmpxchg() == 0)) + /* Lock-free scheme not supported on this platform */ + return -EINVAL; + break; + + /* Trace the given events */ + case TRACER_CONFIG_EVENTS: + if (copy_from_user(&traced_events, (void *) command_arg1, sizeof(traced_events))) + return -EFAULT; + break; + + /* Trace the given events */ + case TRACER_CONFIG_TIMESTAMP: + using_tsc = command_arg1; + if((using_tsc == 1) && (have_tsc() == 0)) { + using_tsc = 0; + return -EINVAL; + } + break; + + /* Record the details of the event, or not */ + case TRACER_CONFIG_DETAILS: + if (copy_from_user(&log_event_details_mask, (void *) command_arg1, sizeof(log_event_details_mask))) + return -EFAULT; + break; + + /* Record the CPUID associated with the event */ + case TRACER_CONFIG_CPUID: + log_cpuid = 1; + break; + + /* Trace only one process */ + case TRACER_CONFIG_PID: + tracing_pid = 1; + traced_pid = command_arg1; + break; + + /* Trace only the given process group */ + case TRACER_CONFIG_PGRP: + tracing_pgrp = 1; + traced_pgrp = command_arg1; + break; + + /* Trace the processes of a given group of users */ + case TRACER_CONFIG_GID: + tracing_gid = 1; + traced_gid = command_arg1; + break; + + /* Trace the processes of a given user */ + case TRACER_CONFIG_UID: + tracing_uid = 1; + traced_uid = command_arg1; + break; + + /* Set the call depth a which the EIP should be fetched on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_DEPTH: + syscall_eip_depth_set = 1; + syscall_eip_depth = command_arg1; + break; + + /* Set the lowerbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_LOWER: + /* We are using bounds for fetching the EIP where syscall was made */ + use_syscall_eip_bounds = 1; + + /* Set the lower bound */ + lower_eip_bound = (void *) command_arg1; + + /* The lower bound has been set */ + lower_eip_bound_set = 1; + break; + + /* Set the upperbound address from which EIP is recorded on syscall */ + case TRACER_CONFIG_SYSCALL_EIP_UPPER: + /* We are using bounds for fetching the EIP where syscall was made */ + use_syscall_eip_bounds = 1; + + /* Set the upper bound */ + upper_eip_bound = (void *) command_arg1; + + /* The upper bound has been set */ + upper_eip_bound_set = 1; + break; + + /* The daemon has comitted the last trace */ + case TRACER_DATA_COMITTED: + /* Copy the information from user space */ + if (copy_from_user(&buffers_committed, (void *)command_arg1, + sizeof(buffers_committed))) + return -EFAULT; + + cpu_id = buffers_committed.cpu_id; + buffers_consumed = buffers_committed.buffers_consumed; + + /* Turn off the bit indicating that the cpu's buffer switch + needs servicing */ + clear_bit(cpu_id, &buffer_switches_pending); + + /* The lockless version doesn't use signal_sent. command_arg1 is + the number of buffers the daemon has told us it just + consumed. Add that to the global count. */ + if(using_lockless) { + local_irq_save(flags); + + /* We consumed some buffers, note it. */ + buffers_consumed(cpu_id) += buffers_consumed; + + /* If we were full, we no longer are */ + if(buffers_full(cpu_id) && (buffers_consumed > 0)) { + set_waiting_for_cpu(cpu_id, LTT_CONTINUE_TRACE); + } + + local_irq_restore(flags); + break; + } /* Else locking version below */ + + /* Safely set the signal sent flag to 0 */ + local_irq_save(flags); + atomic_set(&signal_sent(cpu_id), 0); + local_irq_restore(flags); + break; + + /* Get the number of events lost */ + case TRACER_GET_EVENTS_LOST: + return events_lost(command_arg1); + break; + + /* Create a user event */ + case TRACER_CREATE_USER_EVENT: + /* Copy the information from user space */ + if (copy_from_user(&new_user_event, (void *) command_arg1, sizeof(new_user_event))) + return -EFAULT; + + /* Create the event */ + new_user_event_id = trace_create_owned_event(new_user_event.type, + new_user_event.desc, + new_user_event.format_type, + new_user_event.form, + current->pid); + + /* Has the operation succeded */ + if (new_user_event_id >= 0) { + /* Set the event ID */ + new_user_event.id = new_user_event_id; + + /* Copy the event information back to user space */ + if (copy_to_user((void *) command_arg1, &new_user_event, sizeof(new_user_event))) { + /* Since we were unable to tell the user about the event, destroy it */ + trace_destroy_event(new_user_event_id); + return -EFAULT; + } + } else + /* Forward trace_create_event()'s error code */ + return new_user_event_id; + break; + + /* Destroy a user event */ + case TRACER_DESTROY_USER_EVENT: + /* Pass on the user's request */ + trace_destroy_event((int) command_arg1); + break; + + /* Trace a user event */ + case TRACER_TRACE_USER_EVENT: + /* Copy the information from user space */ + if (copy_from_user(&user_event, (void *) command_arg1, sizeof(user_event))) + return -EFAULT; + + /* Copy the user event data */ + if (copy_from_user(user_event_data, user_event.data, user_event.data_size)) + return -EFAULT; + + /* Log the raw event */ + retval = trace_raw_event(user_event.id, + user_event.data_size, + user_event_data); + + /* Has the operation failed */ + if (retval < 0) + /* Forward trace_create_event()'s error code */ + return retval; + break; + + /* Set event mask */ + case TRACER_SET_EVENT_MASK: + /* Copy the information from user space */ + if (copy_from_user(&(trace_mask.mask), (void *) command_arg1, sizeof(trace_mask.mask))) + return -EFAULT; + + /* Trace the event */ + + /* Note that we log this only for whatever CPU happens to be + current - the visualizer tools need to pick this up and + correlate it with the other CPUs' events. */ + retval = trace(TRACE_EV_CHANGE_MASK, &trace_mask, + smp_processor_id()); + + /* Change the event mask. (This has to be done second or else may loose the + information if the user decides to stop logging "change mask" events) */ + memcpy(&traced_events, &(trace_mask.mask), sizeof(trace_mask.mask)); + syscall_entry_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_ENTRY, &traced_events); + syscall_exit_trace_active = ltt_test_bit(TRACE_EV_SYSCALL_EXIT, &traced_events); + + /* Always trace the buffer start, the trace start and the change mask */ + ltt_set_bit(TRACE_EV_BUFFER_START, &traced_events); + ltt_set_bit(TRACE_EV_START, &traced_events); + ltt_set_bit(TRACE_EV_CHANGE_MASK, &traced_events); + + /* Forward trace()'s error code */ + return retval; + break; + + /* Get event mask */ + case TRACER_GET_EVENT_MASK: + /* Copy the information to user space */ + if (copy_to_user((void *) command_arg1, &traced_events, sizeof(traced_events))) + return -EFAULT; + break; + + /* Get information about the CPU configuration */ + case TRACER_GET_ARCH_INFO: + ltt_arch_info.n_cpus = num_cpus; + ltt_arch_info.page_shift = PAGE_SHIFT; + if(copy_to_user((void *) command_arg1, + <t_arch_info, + sizeof(ltt_arch_info))) + return -EFAULT; + break; + + /* Get buffer control data */ + case TRACER_GET_BUFFER_CONTROL: + for(i = 0; i < num_cpus; i++) { + /* Return the first buffer control with a buffer switch + still needing to be serviced - the daemon will ask + for the others later. */ + if(buffer_switches_pending & (1UL << i)) { + update_shared_buffer_control(i); + /* Copy the buffer control information to user + space. We can't copy_to_user() with a lock + held (accessing user memory may cause a page + fault), so buffers_produced may actually be + larger than what the daemon sees when this + snapshot is taken. This isn't a problem + because the daemon will get a chance to + read the new buffer the next time it's + signaled. */ + if(copy_to_user((void *) command_arg1, + &shared_buffer_control, + sizeof(shared_buffer_control))) + return -EFAULT; + return 0; + } + } + + /* If we're here, there were no cpus ready - let the daemon + know that. Use cpu 0 marked as invalid for this purpose. */ + shared_buffer_control.cpu_id = 0; + shared_buffer_control.buffer_control_valid = 0; + if(copy_to_user((void *) command_arg1, + &shared_buffer_control, + sizeof(shared_buffer_control))) + return -EFAULT; + break; + + /* Free a handle */ + case TRACER_FREE_HANDLE: + return trace_free_handle(tracer_handle); + break; + + /* Free the daemon's handle */ + case TRACER_FREE_DAEMON_HANDLE: + return trace_free_daemon_handle(); + break; + + /* Free all handles */ + case TRACER_FREE_ALL_HANDLES: + trace_free_all_handles(current); + break; + + /* Map buffer to process space */ + case TRACER_MAP_BUFFER: + retval = trace_mmap_buffer(tracer_handle, command_arg1, &mmap_start_addr); + /* Copy the mapping information back to user space */ + if (copy_to_user((void *) command_arg2, &mmap_start_addr, sizeof(mmap_start_addr))) + retval = -EFAULT; + + return retval; + break; + + /* Unknown command */ + default: + return -ENOSYS; + } + + return 0; +} + +/** + * trace_mmap_buffer: - mmap buffer to process space + * @tracer_handle: tracing handle + * @length: length requested by daemon + * @start_addr: pointer to mapping start address + * + * This function mmaps the buffer to the daemon's address space. To unmap, + * daemon should use sys_mumap(). No need to provide an actual function to + * munmap since the kernel already provides on for that purpose. The + * value of tracer_vm_area is set to NULL when trace_free_daemon_handle() + * is called. + * Returns: + * 0 if ok + * -EAGAIN, when remap failed + * -EINVAL, invalid requested length + * -EACCES, permission denied + */ +int trace_mmap_buffer(unsigned int tracer_handle, + unsigned long length, + unsigned long *start_addr) +{ + int retval; + unsigned long actual_size; + struct mm_struct *mm = current->mm; + + /* Only the trace daemon is allowed access to mmap */ + if (current != daemon_task_struct) + return -EACCES; + + /* Is the length requested equal to the existing length */ + if (length != (unsigned long) alloc_size) + return -EINVAL; + + /* Are the buffers already mapped to user-space */ + if (tracer_vm_area != NULL) + return -EBUSY; + + down_write(&mm->mmap_sem); + + /* Allocate space of trace buffers in process' address space */ + *start_addr = do_mmap(NULL, 0, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, 0); + /* Find vma matching start_addr */ + tracer_vm_area = find_vma(mm, *start_addr); + + actual_size = tracer_vm_area->vm_end - tracer_vm_area->vm_start; + /* Make sure sizes are consistent */ + if (IS_ERR((void *)*start_addr)) { + do_munmap(mm, (unsigned long)*start_addr, alloc_size); + tracer_vm_area = NULL; + retval = -EAGAIN; + } else { + /* Remap trace buffer into the process's memory space */ + retval = tracer_mmap_region(tracer_vm_area, + (char *) *start_addr, + trace_buf, + alloc_size); + } + + up_write(&mm->mmap_sem); + + return retval; +} + +/** + * trace_valid_handle: - Validate tracer handle. + * @tracer_handle: handle to be validated + * + * Returns: + * 1, if handle is valid + * 0, if handle is invalid + */ +int trace_valid_handle(unsigned int tracer_handle) +{ + int retval; + + /* Is this the daemon */ + if (tracer_handle == 0) { + if (daemon_task_struct == current) + retval = 1; + else + retval = 0; + } else { + /* Lock handle table for reading */ + read_lock(&trace_handle_table_lock); + + /* Test the handle */ + if (trace_handle_table[tracer_handle - 1].owner == current) + retval = 1; + else + retval = 0; + + /* Unlock table */ + read_unlock(&trace_handle_table_lock); + } + + return retval; +} + +/** + * trace_alloc_handle: - Allocate trace handle to caller. + * @tracer_handle: handle requested by process + * + * Returns: + * Handle ID, everything went OK + * -ENODEV, no more free handles. + * -EBUSY, daemon handle already in use. + */ +int trace_alloc_handle(unsigned int tracer_handle) +{ + int i; + int retval; + + /* Is there another process trying to get the daemon's handle */ + if ((tracer_handle == 0) && (daemon_task_struct != NULL)) + return -EBUSY; + + /* Is this a normal process trying to get a tracer handle */ + if (tracer_handle == 1) { + /* Lock the trace handle table for writing */ + write_lock(&trace_handle_table_lock); + + /* Look for a free handle */ + for (i = 0; i < TRACE_MAX_HANDLES; i++) + if (trace_handle_table[i].owner == NULL) { + trace_handle_table[i].owner = current; + break; + } + + /* Unlock the trace handle table */ + write_unlock(&trace_handle_table_lock); + + /* Were there any free handles */ + if (i == TRACE_MAX_HANDLES) + retval = -ENODEV; + else + retval = (i + 1); /* User handle "1" is entry "0" in trace_handle_table. */ + } else { + /* This is the daemon requesting his handle */ + tracer_started = 0; + tracer_stopping = 0; + + /* Fetch the task structure of the process that opened the device */ + daemon_task_struct = current; + + /* Reset the default configuration since this is the daemon and he will complete the setup */ + tracer_set_default_config(); + + /* Only daemon gets handle "0" */ + retval = 0; + } + + return retval; +} + +/** + * trace_free_handle: - Free a single handle. + * tracer_handle: handle to be freed. + * + * Returns: + * 0, everything went OK + * -ENODEV, no such handle. + * -EACCES, handle doesn't belong to caller. + */ +int trace_free_handle(unsigned int tracer_handle) +{ + int retval; + + /* Does this handle ID makes sense */ + if ((tracer_handle < 1) || (tracer_handle >= TRACE_MAX_HANDLES)) + return -ENODEV; + + /* Lock the trace handle table for writing */ + write_lock(&trace_handle_table_lock); + + /* Does this task have any handles */ + if (trace_handle_table[tracer_handle - 1].owner == current) { + /* Free the handle */ + trace_handle_table[tracer_handle - 1].owner = NULL; + retval = 0; + } else { + retval = -EACCES; + } + + /* Unlock the trace handle table */ + write_unlock(&trace_handle_table_lock); + + return retval; +} + +/** + * trace_free_daemon_handle: - Free the daemon's handle. + * + * Returns: + * 0, everything went OK + * -EACCES, handle doesn't belong to caller. + * -EBUSY, there are still event writes in progress so the buffer can't + * be released. + */ +int trace_free_daemon_handle(void) +{ + int i; + int event_writes_pending; + + /* Is this requested by the daemon */ + if (daemon_task_struct != current) + return -EACCES; + + /* Did we loose any events */ + for(i = 0; i < num_cpus; i++) + if (events_lost(i) > 0) + printk(KERN_ALERT "Tracer: Lost %d events on cpu %d\n", + events_lost(i), i); + + /* Reset the daemon's structures */ + daemon_task_struct = NULL; + tracer_vm_area = NULL; + + /* Make sure no timers can fire after we free buffer */ + del_percpu_timers(); + + /* Free the current buffers, if any, but only if they're not still + in use */ + if (trace_buf != NULL) { + event_writes_pending = trace_get_pending_write_count(); + if(event_writes_pending == 0) + rvfree(trace_buf, alloc_size); + else { + printk(KERN_ERR "Tracer: Couldn't release tracer - %d event writes pending \n", + event_writes_pending); + return -EBUSY; + } + } + + /* Reset the read and write buffers */ + trace_buf = NULL; + for(i = 0; i < num_cpus; i++) { + write_buf(i) = NULL; + read_buf(i) = NULL; + write_buf_end(i) = NULL; + read_buf_end(i) = NULL; + current_write_pos(i) = NULL; + read_limit(i) = NULL; + write_limit(i) = NULL; + events_lost(i) = 0; + atomic_set(&signal_sent(i), 0); + } + + use_locking = 1; + + /* Reset the tracer's configuration */ + tracer_set_default_config(); + tracer_started = 0; + tracer_stopping = 0; + + /* Reset number of bytes recorded and number of events lost */ + buf_read_complete = 0; + size_read_incomplete = 0; + + return 0; +} + +/** + * trace_free_all_handles: - Free all handles taken. + * @task_ptr: pointer to exiting task. + */ +void trace_free_all_handles(struct task_struct* task_ptr) +{ + int i; + + /* Is this the trace daemon */ + if(daemon_task_struct == task_ptr) + trace_free_daemon_handle(); + + /* Lock the trace handle table for writing */ + write_lock(&trace_handle_table_lock); + + /* Does this task have any handles */ + for (i = 0; i < TRACE_MAX_HANDLES; i++) + if (trace_handle_table[i].owner == current) + /* Free the handle */ + trace_handle_table[i].owner = NULL; + + /* Unlock the trace handle table */ + write_unlock(&trace_handle_table_lock); +} + +/** + * tracer_set_buffer_size: - Sets the size of the buffers. + * @buffers_size: Size of buffers + * + * Returns: + * 0, Size setting went OK + * -ENOMEM, unable to get a hold of memory for tracer + * + * buf_no_bits must have already been set before this function is called. + */ +int tracer_set_buffer_size(int buffers_size) +{ + int size_alloc; + int no_buffers = TRACE_MAX_BUFFER_NUMBER(buf_no_bits); + + /* We want to make sure the number of buffers allocated matches + the number of CPUs we use for the rest of the trace */ + num_cpus = num_online_cpus(); + + if(use_locking == 1) { + /* Set size to allocate (= buffers_size * 2) per CPU and fix it's + size to be on a page boundary */ + cpu_buf_size = FIX_SIZE(buffers_size << 1); + + /* Set size allocated for all CPUs */ + size_alloc = cpu_buf_size * num_cpus; + } else { + /* Calculate power-of-2 buffer size */ + if(hweight32(buffers_size) != 1) + /* Invalid if # set bits != 1 */ + return -EINVAL; + + /* Find position of one and only set bit */ + buf_offset_bits = ffs(buffers_size) - 1; + + /* Set size to allocate (= buffers_size * n buffers) per CPU and + fix it's size to be on a page boundary */ + cpu_buf_size = FIX_SIZE(buffers_size * no_buffers); + + /* Calculate total size of buffers for all CPUs*/ + size_alloc = cpu_buf_size * num_cpus; + + /* Sanity check */ + if(size_alloc > TRACER_LOCKLESS_MAX_TOTAL_BUF_SIZE) + return -EINVAL; + } + + /* Make sure no timers can fire after we free buffer */ + del_percpu_timers(); + + /* Free the current buffers, if any, but only if they're not still in use */ + if (trace_buf != NULL) { + if(trace_get_pending_write_count() == 0) + rvfree(trace_buf, alloc_size); + else + return -EBUSY; + } + + /* Allocate space for the tracing buffers */ + if ((trace_buf = (char *) rvmalloc(size_alloc)) == NULL) + return -ENOMEM; + + /* Remember the size set */ + buf_size = buffers_size; + alloc_size = size_alloc; + + return 0; +} + +/** + * tracer_set_default_config: - Sets the tracer in its default config + * + * Returns: + * 0, everything went OK + * -ENOMEM, unable to get a hold of memory for tracer + */ +int tracer_set_default_config(void) +{ + int i; + int retval = 0; + + /* Initialize the event mask */ + traced_events = 0; + + /* Initialize the event mask with all existing events with their details */ + for (i = 0; i <= TRACE_EV_MAX; i++) { + ltt_set_bit(i, &traced_events); + ltt_set_bit(i, &log_event_details_mask); + } + + /* Do not interfere with Linux's syscall flow until we actually start tracing */ + syscall_entry_trace_active = 0; + syscall_exit_trace_active = 0; + + /* Forget about the CPUID */ + log_cpuid = 0; + + /* We aren't tracing any PID or GID in particular */ + tracing_pid = 0; + tracing_pgrp = 0; + tracing_gid = 0; + tracing_uid = 0; + + /* We aren't looking for a particular call depth */ + syscall_eip_depth_set = 0; + + /* We aren't going to place bounds on syscall EIP fetching */ + use_syscall_eip_bounds = 0; + lower_eip_bound_set = 0; + upper_eip_bound_set = 0; + + /* By default, use TSC timestamping */ + using_tsc = 1; + + /* Set the kernel trace configuration to it's basics */ + trace_set_config(syscall_eip_depth_set, + use_syscall_eip_bounds, + 0, + 0, + 0); + + return retval; +} + +/** + * trace_init: - Tracing initialization function. + * + * Returns: + * 0, everything went OK + * -ENONMEM, incapable of allocating necessary memory + * Forwarded error code otherwise + */ +int __init trace_init(void) +{ + int i; + int retval = 0; + + /* Initialize configuration */ + if ((retval = tracer_set_default_config()) < 0) + return retval; + + /* Initialize bytes read and events lost */ + buf_read_complete = 0; + size_read_incomplete = 0; + + /* Initialize tracing daemon structures */ + daemon_task_struct = NULL; + tracer_vm_area = NULL; + + /* Allocate memory for large data components */ + if ((user_event_data = vmalloc(CUSTOM_EVENT_MAX_SIZE)) < 0) + return -ENOMEM; + + /* Initialize spin lock */ + trace_spin_lock = SPIN_LOCK_UNLOCKED; + + /* By default, use locking scheme */ + use_locking = 1; + + /* Initialize next event ID to be used */ + next_event_id = TRACE_EV_MAX + 1; + + /* Initialize custom events list */ + custom_events = &custom_events_head; + custom_events->next = custom_events; + custom_events->prev = custom_events; + + /* Initialize tracing handle table */ + for(i = 0; i < TRACE_MAX_HANDLES; i++) + trace_handle_table[i].owner = NULL; + + return retval; +} + +/** + * trace_set_config: - Set the tracing configuration + * @do_syscall_depth: Use depth to fetch eip + * @do_syscall_bounds: Use bounds to fetch eip + * @eip_depth: Detph to fetch eip + * @eip_lower_bound: Lower bound eip address + * @eip_upper_bound: Upper bound eip address + * + * Returns: + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + * -ENXIO, wrong tracer + * -EINVAL, invalid configuration + */ +int trace_set_config(int do_syscall_depth, + int do_syscall_bounds, + int eip_depth, + void *eip_lower_bound, + void *eip_upper_bound) +{ + /* Is this a valid configuration */ + if ((do_syscall_depth && do_syscall_bounds) + || (eip_lower_bound > eip_upper_bound) + || (eip_depth < 0)) + return -EINVAL; + + /* Set the configuration */ + fetch_syscall_eip_use_depth = do_syscall_depth; + fetch_syscall_eip_use_bounds = do_syscall_bounds; + syscall_eip_depth = eip_depth; + syscall_lower_eip_bound = eip_lower_bound; + syscall_upper_eip_bound = eip_upper_bound; + + return 0; +} + +/** + * trace_get_config: - Get the tracing configuration + * @do_syscall_depth: Use depth to fetch eip + * @do_syscall_bounds: Use bounds to fetch eip + * @eip_depth: Detph to fetch eip + * @eip_lower_bound: Lower bound eip address + * @eip_upper_bound: Upper bound eip address + * + * Returns: + * 0, all is OK + * -ENOMEDIUM, there isn't a registered tracer + */ +int trace_get_config(int *do_syscall_depth, + int *do_syscall_bounds, + int *eip_depth, + void **eip_lower_bound, + void **eip_upper_bound) +{ + /* Get the configuration */ + *do_syscall_depth = fetch_syscall_eip_use_depth; + *do_syscall_bounds = fetch_syscall_eip_use_bounds; + *eip_depth = syscall_eip_depth; + *eip_lower_bound = syscall_lower_eip_bound; + *eip_upper_bound = syscall_upper_eip_bound; + + return 0; +} + +/** + * _trace_create_event: - Create a new traceable event type + * @event_type: string describing event type + * @event_desc: string used for standard formatting + * @format_type: type of formatting used to log event data + * @format_data: data specific to format + * @owner_pid: PID of event's owner (0 if none) + * + * Returns: + * New Event ID if all is OK + * -ENOMEM, Unable to allocate new event + */ +int _trace_create_event(char *event_type, + char *event_desc, + int format_type, + char *format_data, + pid_t owner_pid) +{ + trace_new_event *new_event; + struct custom_event_desc *new_event_desc; + + /* Create event */ + if ((new_event_desc = (struct custom_event_desc *) kmalloc(sizeof(struct custom_event_desc), GFP_ATOMIC)) == NULL) + return -ENOMEM; + new_event = &(new_event_desc->event); + + /* Initialize event properties */ + new_event->type[0] = '\0'; + new_event->desc[0] = '\0'; + new_event->form[0] = '\0'; + + /* Set basic event properties */ + if (event_type != NULL) + strncpy(new_event->type, event_type, CUSTOM_EVENT_TYPE_STR_LEN); + if (event_desc != NULL) + strncpy(new_event->desc, event_desc, CUSTOM_EVENT_DESC_STR_LEN); + if (format_data != NULL) + strncpy(new_event->form, format_data, CUSTOM_EVENT_FORM_STR_LEN); + + /* Ensure that strings are bound */ + new_event->type[CUSTOM_EVENT_TYPE_STR_LEN - 1] = '\0'; + new_event->desc[CUSTOM_EVENT_DESC_STR_LEN - 1] = '\0'; + new_event->form[CUSTOM_EVENT_FORM_STR_LEN - 1] = '\0'; + + /* Set format type */ + new_event->format_type = format_type; + + /* Give the new event a unique event ID */ + new_event->id = next_event_id; + next_event_id++; + + /* Set event's owner */ + new_event_desc->owner_pid = owner_pid; + + /* Insert new event in event list */ + write_lock(&custom_list_lock); + new_event_desc->next = custom_events; + new_event_desc->prev = custom_events->prev; + custom_events->prev->next = new_event_desc; + custom_events->prev = new_event_desc; + write_unlock(&custom_list_lock); + + /* Log the event creation event */ + trace_event(TRACE_EV_NEW_EVENT, &(new_event_desc->event)); + + return new_event->id; +} +int trace_create_event(char *event_type, + char *event_desc, + int format_type, + char *format_data) +{ + return _trace_create_event(event_type, event_desc, format_type, format_data, 0); +} +int trace_create_owned_event(char *event_type, + char *event_desc, + int format_type, + char *format_data, + pid_t owner_pid) +{ + return _trace_create_event(event_type, event_desc, format_type, format_data, owner_pid); +} + +/** + * trace_destroy_event: - Destroy a created event type + * @event_id, the Id returned by trace_create_event() + * + * No return values. + */ +void trace_destroy_event(int event_id) +{ + struct custom_event_desc *event_desc; + + write_lock(&custom_list_lock); + + /* Find the event to destroy in the event description list */ + for (event_desc = custom_events->next; + event_desc != custom_events; + event_desc = event_desc->next) + if (event_desc->event.id == event_id) + break; + + /* If we found something */ + if (event_desc != custom_events) { + /* Remove the event fromt the list */ + event_desc->next->prev = event_desc->prev; + event_desc->prev->next = event_desc->next; + + /* Free the memory used by this event */ + kfree(event_desc); + } + write_unlock(&custom_list_lock); +} + +/** + * trace_destroy_owners_events: Destroy an owner's events + * @owner_pid: the PID of the owner who's events are to be deleted. + * + * No return values. + */ +void trace_destroy_owners_events(pid_t owner_pid) +{ + struct custom_event_desc *temp_event; + struct custom_event_desc *event_desc; + + write_lock(&custom_list_lock); + + /* Start at the first event in the list */ + event_desc = custom_events->next; + + /* Find all events belonging to the PID */ + while (event_desc != custom_events) { + temp_event = event_desc->next; + + /* Does this event belong to the same owner */ + if (event_desc->owner_pid == owner_pid) { + /* Remove the event from the list */ + event_desc->next->prev = event_desc->prev; + event_desc->prev->next = event_desc->next; + + /* Free the memory used by this event */ + kfree(event_desc); + } + event_desc = temp_event; + } + + write_unlock(&custom_list_lock); +} + +/** + * trace_reregister_custom_events: - Relogs event creations. + * + * Relog the declarations of custom events. This is necessary to make + * sure that even though the event creation might not have taken place + * during a previous trace, that all custom events be part of all traces. + * Hence, if a custom event occurs during a new trace, we can be sure + * that its definition will also be part of the trace. + * + * No return values. + */ +void trace_reregister_custom_events(void) +{ + struct custom_event_desc *event_desc; + + read_lock(&custom_list_lock); + + /* Log an event creation for every description in the list */ + for (event_desc = custom_events->next; + event_desc != custom_events; + event_desc = event_desc->next) + trace_event(TRACE_EV_NEW_EVENT, &(event_desc->event)); + + read_unlock(&custom_list_lock); +} + +/** + * trace_std_formatted_event: - Trace a formatted event + * @event_id: the event Id provided upon creation + * @...: printf-like data that will be used to fill the event string. + * + * Returns: + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer or event doesn't exist. + */ +int trace_std_formatted_event(int event_id,...) +{ + int string_size; /* Size of the string outputed by vsprintf() */ + char final_string[CUSTOM_EVENT_FINAL_STR_LEN]; /* Final formatted string */ + va_list vararg_list; /* Variable argument list */ + trace_custom custom_event; + struct custom_event_desc *event_desc; + + read_lock(&custom_list_lock); + + /* Find the event description matching this event */ + for (event_desc = custom_events->next; + event_desc != custom_events; + event_desc = event_desc->next) + if (event_desc->event.id == event_id) + break; + + /* If we haven't found anything */ + if (event_desc == custom_events) { + read_unlock(&custom_list_lock); + + return -ENOMEDIUM; + } + /* Set custom event Id */ + custom_event.id = event_id; + + /* Initialize variable argument list access */ + va_start(vararg_list, event_id); + + /* Print the description out to the temporary buffer */ + string_size = vsprintf(final_string, event_desc->event.desc, vararg_list); + + read_unlock(&custom_list_lock); + + /* Facilitate return to caller */ + va_end(vararg_list); + + /* Set the size of the event */ + custom_event.data_size = (u32) (string_size + 1); + + /* Set the pointer to the event data */ + custom_event.data = final_string; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &custom_event); +} + +/** + * trace_raw_event: - Trace a raw event + * @event_id, the event Id provided upon creation + * @event_size, the size of the data provided + * @event_data, data buffer describing event + * + * Returns: + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer or event doesn't exist. + */ +int trace_raw_event(int event_id, int event_size, void *event_data) +{ + trace_custom custom_event; + struct custom_event_desc *event_desc; + + read_lock(&custom_list_lock); + + /* Find the event description matching this event */ + for (event_desc = custom_events->next; + event_desc != custom_events; + event_desc = event_desc->next) + if (event_desc->event.id == event_id) + break; + + read_unlock(&custom_list_lock); + + /* If we haven't found anything */ + if (event_desc == custom_events) + return -ENOMEDIUM; + + /* Set custom event Id */ + custom_event.id = event_id; + + /* Set the data size */ + if (event_size <= CUSTOM_EVENT_MAX_SIZE) + custom_event.data_size = (u32) event_size; + else + custom_event.data_size = (u32) CUSTOM_EVENT_MAX_SIZE; + + /* Set the pointer to the event data */ + custom_event.data = event_data; + + /* Log the custom event */ + return trace_event(TRACE_EV_CUSTOM, &custom_event); +} + +/** + * trace_event: - Trace an event + * @event_id, the event's ID (check out trace.h) + * @event_struct, the structure describing the event + * + * Returns: + * Trace fct return code if OK. + * -ENOMEDIUM, there is no registered tracer + * -ENOMEM, couldn't access ltt_info + */ +int trace_event(u8 event_id, + void *event_struct) +{ + int ret_value; + + atomic_inc(&pending_write_count); + + /* Call the tracer */ + ret_value = trace(event_id, + event_struct, + smp_processor_id()); + + atomic_dec(&pending_write_count); + + return ret_value; +} + +/** + * trace_get_pending_write_count: - Get nbr pending writes. + * + * Returns the number of trace event writes in progress. + */ +int trace_get_pending_write_count(void) +{ + return atomic_read(&pending_write_count); +} + +module_init(trace_init); + +/* Export symbols so that can be visible from outside this file */ +EXPORT_SYMBOL(trace_set_config); +EXPORT_SYMBOL(trace_get_config); +EXPORT_SYMBOL(trace_create_event); +EXPORT_SYMBOL(trace_create_owned_event); +EXPORT_SYMBOL(trace_destroy_event); +EXPORT_SYMBOL(trace_destroy_owners_events); +EXPORT_SYMBOL(trace_std_formatted_event); +EXPORT_SYMBOL(trace_raw_event); +EXPORT_SYMBOL(trace_event); + +EXPORT_SYMBOL(syscall_entry_trace_active); +EXPORT_SYMBOL(syscall_exit_trace_active); - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/