Commit d594d8f4 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull printk updates from Petr Mladek:
 "The big new thing is the fully lockless ringbuffer implementation,
  including the support for continuous lines. It will allow to store and
  read messages in any situation wihtout the risk of deadlocks and
  without the need of temporary per-CPU buffers.

  The access is still serialized by logbuf_lock. It synchronizes few
  more operations, for example, temporary buffer for formatting the
  message, syslog and kmsg_dump operations. The lock removal is being
  discussed and should be ready for the next release.

  The continuous lines are handled exactly the same way as before to
  avoid regressions in user space. It means that they are appended to
  the last message when the caller is the same. Only the last message
  can be extended.

  The data ring includes plain text of the messages. Except for an
  integer at the beginning of each message that points back to the
  descriptor ring with other metadata.

  The dictionary has to stay. journalctl uses it to filter the log. It
  allows to show messages related to a given device. The dictionary
  values are stored in the descriptor ring with the other metadata.

  This is the first part of the printk rework as discussed at Plumbers
  2019, see https://lore.kernel.org/r/87k1acz5rx.fsf@linutronix.de. The
  next big step will be handling consoles by kthreads during the normal
  system operation. It will require special handling of situations when
  the kthreads could not get scheduled, for example, early boot,
  suspend, panic.

  Other changes:

   - Add John Ogness as a reviewer for printk subsystem. He is author of
     the rework and is familiar with the code and history.

   - Fix locking in serial8250_do_startup() to prevent lockdep report.

   - Few code cleanups"

* tag 'printk-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: (27 commits)
  printk: Use fallthrough pseudo-keyword
  printk: reduce setup_text_buf size to LOG_LINE_MAX
  printk: avoid and/or handle record truncation
  printk: remove dict ring
  printk: move dictionary keys to dev_printk_info
  printk: move printk_info into separate array
  printk: reimplement log_cont using record extension
  printk: ringbuffer: add finalization/extension support
  printk: ringbuffer: change representation of states
  printk: ringbuffer: clear initial reserved fields
  printk: ringbuffer: add BLK_DATALESS() macro
  printk: ringbuffer: relocate get_data()
  printk: ringbuffer: avoid memcpy() on state_var
  printk: ringbuffer: fix setting state in desc_read()
  kernel.h: Move oops_in_progress to printk.h
  scripts/gdb: update for lockless printk ringbuffer
  scripts/gdb: add utils.read_ulong()
  docs: vmcoreinfo: add lockless printk ringbuffer vmcoreinfo
  printk: reduce LOG_BUF_SHIFT range for H8300
  printk: ringbuffer: support dataless records
  ...
parents 029f56db 70333f4f
Loading
Loading
Loading
Loading
+109 −50
Original line number Diff line number Diff line
@@ -170,57 +170,103 @@ document trapinfo
	address the kernel panicked.
end

define dump_log_idx
	set $idx = $arg0
	if ($argc > 1)
		set $prev_flags = $arg1
define dump_record
	set var $desc = $arg0
	set var $info = $arg1
	if ($argc > 2)
		set var $prev_flags = $arg2
	else
		set $prev_flags = 0
		set var $prev_flags = 0
	end
	set $msg = ((struct printk_log *) (log_buf + $idx))
	set $prefix = 1
	set $newline = 1
	set $log = log_buf + $idx + sizeof(*$msg)

	# prev & LOG_CONT && !(msg->flags & LOG_PREIX)
	if (($prev_flags & 8) && !($msg->flags & 4))
		set $prefix = 0
	set var $prefix = 1
	set var $newline = 1

	set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits)
	set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits)

	# handle data-less record
	if ($begin & 1)
		set var $text_len = 0
		set var $log = ""
	else
		# handle wrapping data block
		if ($begin > $next)
			set var $begin = 0
		end

		# skip over descriptor id
		set var $begin = $begin + sizeof(long)

		# handle truncated message
		if ($next - $begin < $info->text_len)
			set var $text_len = $next - $begin
		else
			set var $text_len = $info->text_len
		end

		set var $log = &prb->text_data_ring.data[$begin]
	end

	# prev & LOG_CONT && !(info->flags & LOG_PREIX)
	if (($prev_flags & 8) && !($info->flags & 4))
		set var $prefix = 0
	end

	# msg->flags & LOG_CONT
	if ($msg->flags & 8)
	# info->flags & LOG_CONT
	if ($info->flags & 8)
		# (prev & LOG_CONT && !(prev & LOG_NEWLINE))
		if (($prev_flags & 8) && !($prev_flags & 2))
			set $prefix = 0
			set var $prefix = 0
		end
		# (!(msg->flags & LOG_NEWLINE))
		if (!($msg->flags & 2))
			set $newline = 0
		# (!(info->flags & LOG_NEWLINE))
		if (!($info->flags & 2))
			set var $newline = 0
		end
	end

	if ($prefix)
		printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000
		printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000
	end
	if ($msg->text_len != 0)
		eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len
	if ($text_len)
		eval "printf \"%%%d.%ds\", $log", $text_len, $text_len
	end
	if ($newline)
		printf "\n"
	end
	if ($msg->dict_len > 0)
		set $dict = $log + $msg->text_len
		set $idx = 0
		set $line = 1
		while ($idx < $msg->dict_len)
			if ($line)
				printf " "
				set $line = 0
			end
			set $c = $dict[$idx]

	# handle dictionary data

	set var $dict = &$info->dev_info.subsystem[0]
	set var $dict_len = sizeof($info->dev_info.subsystem)
	if ($dict[0] != '\0')
		printf " SUBSYSTEM="
		set var $idx = 0
		while ($idx < $dict_len)
			set var $c = $dict[$idx]
			if ($c == '\0')
				loop_break
			else
				if ($c < ' ' || $c >= 127 || $c == '\\')
					printf "\\x%02x", $c
				else
					printf "%c", $c
				end
			end
			set var $idx = $idx + 1
		end
		printf "\n"
				set $line = 1
	end

	set var $dict = &$info->dev_info.device[0]
	set var $dict_len = sizeof($info->dev_info.device)
	if ($dict[0] != '\0')
		printf " DEVICE="
		set var $idx = 0
		while ($idx < $dict_len)
			set var $c = $dict[$idx]
			if ($c == '\0')
				loop_break
			else
				if ($c < ' ' || $c >= 127 || $c == '\\')
					printf "\\x%02x", $c
@@ -228,33 +274,46 @@ define dump_log_idx
					printf "%c", $c
				end
			end
			set $idx = $idx + 1
			set var $idx = $idx + 1
		end
		printf "\n"
	end
end
document dump_log_idx
	Dump a single log given its index in the log buffer.  The first
	parameter is the index into log_buf, the second is optional and
	specified the previous log buffer's flags, used for properly
	formatting continued lines.
document dump_record
	Dump a single record. The first parameter is the descriptor,
	the second parameter is the info, the third parameter is
	optional and specifies the previous record's flags, used for
	properly formatting continued lines.
end

define dmesg
	set $i = log_first_idx
	set $end_idx = log_first_idx
	set $prev_flags = 0
	# definitions from kernel/printk/printk_ringbuffer.h
	set var $desc_committed = 1
	set var $desc_finalized = 2
	set var $desc_sv_bits = sizeof(long) * 8
	set var $desc_flags_shift = $desc_sv_bits - 2
	set var $desc_flags_mask = 3 << $desc_flags_shift
	set var $id_mask = ~$desc_flags_mask

	set var $desc_count = 1U << prb->desc_ring.count_bits
	set var $prev_flags = 0

	set var $id = prb->desc_ring.tail_id.counter
	set var $end_id = prb->desc_ring.head_id.counter

	while (1)
		set $msg = ((struct printk_log *) (log_buf + $i))
		if ($msg->len == 0)
			set $i = 0
		else
			dump_log_idx $i $prev_flags
			set $i = $i + $msg->len
			set $prev_flags = $msg->flags
		set var $desc = &prb->desc_ring.descs[$id % $desc_count]
		set var $info = &prb->desc_ring.infos[$id % $desc_count]

		# skip non-committed record
		set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift)
		if ($state == $desc_committed || $state == $desc_finalized)
			dump_record $desc $info $prev_flags
			set var $prev_flags = $info->flags
		end
		if ($i == $end_idx)

		set var $id = ($id + 1) & $id_mask
		if ($id == $end_id)
			loop_break
		end
	end
+102 −29
Original line number Diff line number Diff line
@@ -189,50 +189,123 @@ from this.
Free areas descriptor. User-space tools use this value to iterate the
free_area ranges. MAX_ORDER is used by the zone buddy allocator.

log_first_idx
prb
---

A pointer to the printk ringbuffer (struct printk_ringbuffer). This
may be pointing to the static boot ringbuffer or the dynamically
allocated ringbuffer, depending on when the the core dump occurred.
Used by user-space tools to read the active kernel log buffer.

printk_rb_static
----------------

A pointer to the static boot printk ringbuffer. If @prb has a
different value, this is useful for viewing the initial boot messages,
which may have been overwritten in the dynamically allocated
ringbuffer.

clear_seq
---------

The sequence number of the printk() record after the last clear
command. It indicates the first record after the last
SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space
tools to dump a subset of the dmesg log.

printk_ringbuffer
-----------------

The size of a printk_ringbuffer structure. This structure contains all
information required for accessing the various components of the
kernel log buffer.

(printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail)
-----------------------------------------------------------------

Offsets for the various components of the printk ringbuffer. Used by
user-space tools to view the kernel log buffer without requiring the
declaration of the structure.

prb_desc_ring
-------------

Index of the first record stored in the buffer log_buf. Used by
user-space tools to read the strings in the log_buf.
The size of the prb_desc_ring structure. This structure contains
information about the set of record descriptors.

log_buf
-------
(prb_desc_ring, count_bits|descs|head_id|tail_id)
-------------------------------------------------

Offsets for the fields describing the set of record descriptors. Used
by user-space tools to be able to traverse the descriptors without
requiring the declaration of the structure.

prb_desc
--------

The size of the prb_desc structure. This structure contains
information about a single record descriptor.

(prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos)
------------------------------------------------------

Offsets for the fields describing a record descriptors. Used by
user-space tools to be able to read descriptors without requiring
the declaration of the structure.

prb_data_blk_lpos
-----------------

The size of the prb_data_blk_lpos structure. This structure contains
information about where the text or dictionary data (data block) is
located within the respective data ring.

(prb_data_blk_lpos, begin|next)
-------------------------------

Console output is written to the ring buffer log_buf at index
log_first_idx. Used to get the kernel log.
Offsets for the fields describing the location of a data block. Used
by user-space tools to be able to locate data blocks without
requiring the declaration of the structure.

log_buf_len
printk_info
-----------

log_buf's length.
The size of the printk_info structure. This structure contains all
the meta-data for a record.

clear_idx
---------
(printk_info, seq|ts_nsec|text_len|dict_len|caller_id)
------------------------------------------------------

The index that the next printk() record to read after the last clear
command. It indicates the first record after the last SYSLOG_ACTION
_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump
the dmesg log.
Offsets for the fields providing the meta-data for a record. Used by
user-space tools to be able to read the information without requiring
the declaration of the structure.

log_next_idx
------------
prb_data_ring
-------------

The index of the next record to store in the buffer log_buf. Used to
compute the index of the current buffer position.
The size of the prb_data_ring structure. This structure contains
information about a set of data blocks.

printk_log
----------
(prb_data_ring, size_bits|data|head_lpos|tail_lpos)
---------------------------------------------------

The size of a structure printk_log. Used to compute the size of
messages, and extract dmesg log. It encapsulates header information for
log_buf, such as timestamp, syslog level, etc.
Offsets for the fields describing a set of data blocks. Used by
user-space tools to be able to access the data blocks without
requiring the declaration of the structure.

(printk_log, ts_nsec|len|text_len|dict_len)
-------------------------------------------
atomic_long_t
-------------

The size of the atomic_long_t structure. Used by user-space tools to
be able to copy the full structure, regardless of its
architecture-specific implementation.

(atomic_long_t, counter)
------------------------

It represents field offsets in struct printk_log. User space tools
parse it and check whether the values of printk_log's members have been
changed.
Offset for the long value of an atomic_long_t variable. Used by
user-space tools to access the long value without requiring the
architecture-specific declaration.

(free_area.free_list, MIGRATE_TYPES)
------------------------------------
+1 −0
Original line number Diff line number Diff line
@@ -13970,6 +13970,7 @@ PRINTK
M:	Petr Mladek <pmladek@suse.com>
M:	Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
R:	Steven Rostedt <rostedt@goodmis.org>
R:	John Ogness <john.ogness@linutronix.de>
S:	Maintained
F:	include/linux/printk.h
F:	kernel/printk/
+16 −30
Original line number Diff line number Diff line
@@ -4061,22 +4061,21 @@ void device_shutdown(void)
 */

#ifdef CONFIG_PRINTK
static int
create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
static void
set_dev_info(const struct device *dev, struct dev_printk_info *dev_info)
{
	const char *subsys;
	size_t pos = 0;

	memset(dev_info, 0, sizeof(*dev_info));

	if (dev->class)
		subsys = dev->class->name;
	else if (dev->bus)
		subsys = dev->bus->name;
	else
		return 0;
		return;

	pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys);
	if (pos >= hdrlen)
		goto overflow;
	strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem));

	/*
	 * Add device identifier DEVICE=:
@@ -4092,41 +4091,28 @@ create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen)
			c = 'b';
		else
			c = 'c';
		pos++;
		pos += snprintf(hdr + pos, hdrlen - pos,
				"DEVICE=%c%u:%u",
				c, MAJOR(dev->devt), MINOR(dev->devt));

		snprintf(dev_info->device, sizeof(dev_info->device),
			 "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt));
	} else if (strcmp(subsys, "net") == 0) {
		struct net_device *net = to_net_dev(dev);

		pos++;
		pos += snprintf(hdr + pos, hdrlen - pos,
				"DEVICE=n%u", net->ifindex);
		snprintf(dev_info->device, sizeof(dev_info->device),
			 "n%u", net->ifindex);
	} else {
		pos++;
		pos += snprintf(hdr + pos, hdrlen - pos,
				"DEVICE=+%s:%s", subsys, dev_name(dev));
		snprintf(dev_info->device, sizeof(dev_info->device),
			 "+%s:%s", subsys, dev_name(dev));
	}

	if (pos >= hdrlen)
		goto overflow;

	return pos;

overflow:
	dev_WARN(dev, "device/subsystem name too long");
	return 0;
}

int dev_vprintk_emit(int level, const struct device *dev,
		     const char *fmt, va_list args)
{
	char hdr[128];
	size_t hdrlen;
	struct dev_printk_info dev_info;

	hdrlen = create_syslog_header(dev, hdr, sizeof(hdr));
	set_dev_info(dev, &dev_info);

	return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args);
	return vprintk_emit(0, level, &dev_info, fmt, args);
}
EXPORT_SYMBOL(dev_vprintk_emit);

+3 −0
Original line number Diff line number Diff line
@@ -55,6 +55,9 @@ phys_addr_t paddr_vmcoreinfo_note(void);
#define VMCOREINFO_OFFSET(name, field) \
	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
			      (unsigned long)offsetof(struct name, field))
#define VMCOREINFO_TYPE_OFFSET(name, field) \
	vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
			      (unsigned long)offsetof(name, field))
#define VMCOREINFO_LENGTH(name, value) \
	vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
#define VMCOREINFO_NUMBER(name) \
Loading