// SPDX-License-Identifier: GPL-2.0 /* * Provide a pstore intermediate backend, organized into kernel memory * allocated zones that are then mapped and flushed into a single * contiguous region on a storage backend of some kind (block, mtd, etc).
*/
/** * struct psz_buffer - header of zone to flush to storage * * @sig: signature to indicate header (PSZ_SIG xor PSZONE-type value) * @datalen: length of data in @data * @start: offset into @data where the beginning of the stored bytes begin * @data: zone data.
*/ struct psz_buffer { #define PSZ_SIG (0x43474244) /* DBGC */
uint32_t sig;
atomic_t datalen;
atomic_t start;
uint8_t data[];
};
/** * struct psz_kmsg_header - kmsg dump-specific header to flush to storage * * @magic: magic num for kmsg dump header * @time: kmsg dump trigger time * @compressed: whether conpressed * @counter: kmsg dump counter * @reason: the kmsg dump reason (e.g. oops, panic, etc) * @data: pointer to log data * * This is a sub-header for a kmsg dump, trailing after &psz_buffer.
*/ struct psz_kmsg_header { #define PSTORE_KMSG_HEADER_MAGIC 0x4dfc3ae5 /* Just a random number */
uint32_t magic; struct timespec64 time; bool compressed;
uint32_t counter; enum kmsg_dump_reason reason;
uint8_t data[];
};
/** * struct pstore_zone - single stored buffer * * @off: zone offset of storage * @type: front-end type for this zone * @name: front-end name for this zone * @buffer: pointer to data buffer managed by this zone * @oldbuf: pointer to old data buffer * @buffer_size: bytes in @buffer->data * @should_recover: whether this zone should recover from storage * @dirty: whether the data in @buffer dirty * * zone structure in memory.
*/ struct pstore_zone {
loff_t off; constchar *name; enum pstore_type_id type;
/** * struct psz_context - all about running state of pstore/zone * * @kpszs: kmsg dump storage zones * @ppsz: pmsg storage zone * @cpsz: console storage zone * @fpszs: ftrace storage zones * @kmsg_max_cnt: max count of @kpszs * @kmsg_read_cnt: counter of total read kmsg dumps * @kmsg_write_cnt: counter of total kmsg dump writes * @pmsg_read_cnt: counter of total read pmsg zone * @console_read_cnt: counter of total read console zone * @ftrace_max_cnt: max count of @fpszs * @ftrace_read_cnt: counter of max read ftrace zone * @oops_counter: counter of oops dumps * @panic_counter: counter of panic dumps * @recovered: whether finished recovering data from storage * @on_panic: whether panic is happening * @pstore_zone_info_lock: lock to @pstore_zone_info * @pstore_zone_info: information from backend * @pstore: structure for pstore
*/ struct psz_context { struct pstore_zone **kpszs; struct pstore_zone *ppsz; struct pstore_zone *cpsz; struct pstore_zone **fpszs; unsignedint kmsg_max_cnt; unsignedint kmsg_read_cnt; unsignedint kmsg_write_cnt; unsignedint pmsg_read_cnt; unsignedint console_read_cnt; unsignedint ftrace_max_cnt; unsignedint ftrace_read_cnt; /* * These counters should be calculated during recovery. * It records the oops/panic times after crashes rather than boots.
*/ unsignedint oops_counter; unsignedint panic_counter;
atomic_t recovered;
atomic_t on_panic;
/* * pstore_zone_info_lock protects this entire structure during calls * to register_pstore_zone()/unregister_pstore_zone().
*/ struct mutex pstore_zone_info_lock; struct pstore_zone_info *pstore_zone_info; struct pstore_info pstore;
}; staticstruct psz_context pstore_zone_cxt;
/** * enum psz_flush_mode - flush mode for psz_zone_write() * * @FLUSH_NONE: do not flush to storage but update data on memory * @FLUSH_PART: just flush part of data including meta data to storage * @FLUSH_META: just flush meta data of zone to storage * @FLUSH_ALL: flush all of zone
*/ enum psz_flush_mode {
FLUSH_NONE = 0,
FLUSH_PART,
FLUSH_META,
FLUSH_ALL,
};
switch (flush_mode) { case FLUSH_NONE: if (unlikely(buf && wlen)) goto dirty; return 0; case FLUSH_PART:
wcnt = writeop((constchar *)zone->buffer->data + off, wlen,
zone->off + sizeof(*zone->buffer) + off); if (wcnt != wlen) goto dirty;
fallthrough; case FLUSH_META:
wlen = sizeof(struct psz_buffer);
wcnt = writeop((constchar *)zone->buffer, wlen, zone->off); if (wcnt != wlen) goto dirty; break; case FLUSH_ALL:
wlen = zone->buffer_size + sizeof(*zone->buffer);
wcnt = writeop((constchar *)zone->buffer, wlen, zone->off); if (wcnt != wlen) goto dirty; break;
}
return 0;
dirty: /* no need to mark dirty if going to try next zone */ if (wcnt == -ENOMSG) return -ENOMSG;
atomic_set(&zone->dirty, true); /* flush dirty zones nicely */ if (wcnt == -EBUSY && !is_on_panic())
schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(500)); return -EBUSY;
}
staticint psz_flush_dirty_zone(struct pstore_zone *zone)
{ int ret;
if (unlikely(!zone)) return -EINVAL;
if (unlikely(!atomic_read(&pstore_zone_cxt.recovered))) return -EBUSY;
if (!atomic_xchg(&zone->dirty, false)) return 0;
ret = psz_zone_write(zone, FLUSH_ALL, NULL, 0, 0); if (ret)
atomic_set(&zone->dirty, true); return ret;
}
staticint psz_flush_dirty_zones(struct pstore_zone **zones, unsignedint cnt)
{ int i, ret; struct pstore_zone *zone;
if (!zones) return -EINVAL;
for (i = 0; i < cnt; i++) {
zone = zones[i]; if (!zone) return -EINVAL;
ret = psz_flush_dirty_zone(zone); if (ret) return ret;
} return 0;
}
staticvoid psz_flush_all_dirty_zones(struct work_struct *work)
{ struct psz_context *cxt = &pstore_zone_cxt; int ret = 0;
if (cxt->ppsz)
ret |= psz_flush_dirty_zone(cxt->ppsz); if (cxt->cpsz)
ret |= psz_flush_dirty_zone(cxt->cpsz); if (cxt->kpszs)
ret |= psz_flush_dirty_zones(cxt->kpszs, cxt->kmsg_max_cnt); if (cxt->fpszs)
ret |= psz_flush_dirty_zones(cxt->fpszs, cxt->ftrace_max_cnt); if (ret && cxt->pstore_zone_info)
schedule_delayed_work(&psz_cleaner, msecs_to_jiffies(1000));
}
for (i = 0; i < cxt->kmsg_max_cnt; i++) {
zone = cxt->kpszs[i]; if (unlikely(!zone)) return -EINVAL; if (atomic_read(&zone->dirty)) { unsignedint wcnt = cxt->kmsg_write_cnt; struct pstore_zone *new = cxt->kpszs[wcnt]; int ret;
ret = psz_move_zone(zone, new); if (ret) {
pr_err("move zone from %lu to %d failed\n",
i, wcnt); return ret;
}
cxt->kmsg_write_cnt = (wcnt + 1) % cxt->kmsg_max_cnt;
} if (!zone->should_recover) continue;
buf = zone->buffer;
rcnt = info->read((char *)buf, zone->buffer_size + sizeof(*buf),
zone->off); if (rcnt != zone->buffer_size + sizeof(*buf)) return rcnt < 0 ? rcnt : -EIO;
} return 0;
}
staticint psz_kmsg_recover_meta(struct psz_context *cxt)
{ struct pstore_zone_info *info = cxt->pstore_zone_info; struct pstore_zone *zone;
ssize_t rcnt, len; struct psz_buffer *buf; struct psz_kmsg_header *hdr; struct timespec64 time = { }; unsignedlong i; /* * Recover may on panic, we can't allocate any memory by kmalloc. * So, we use local array instead.
*/ char buffer_header[sizeof(*buf) + sizeof(*hdr)] = {0};
if (!info->read) return -EINVAL;
len = sizeof(*buf) + sizeof(*hdr);
buf = (struct psz_buffer *)buffer_header; for (i = 0; i < cxt->kmsg_max_cnt; i++) {
zone = cxt->kpszs[i]; if (unlikely(!zone)) return -EINVAL;
rcnt = info->read((char *)buf, len, zone->off); if (rcnt == -ENOMSG) {
pr_debug("%s with id %lu may be broken, skip\n",
zone->name, i); continue;
} elseif (rcnt != len) {
pr_err("read %s with id %lu failed\n", zone->name, i); return rcnt < 0 ? rcnt : -EIO;
}
if (buf->sig != zone->buffer->sig) {
pr_debug("no valid data in kmsg dump zone %lu\n", i); continue;
}
if (zone->buffer_size < atomic_read(&buf->datalen)) {
pr_info("found overtop zone: %s: id %lu, off %lld, size %zu\n",
zone->name, i, zone->off,
zone->buffer_size); continue;
}
hdr = (struct psz_kmsg_header *)buf->data; if (hdr->magic != PSTORE_KMSG_HEADER_MAGIC) {
pr_info("found invalid zone: %s: id %lu, off %lld, size %zu\n",
zone->name, i, zone->off,
zone->buffer_size); continue;
}
/* * we get the newest zone, and the next one must be the oldest * or unused zone, because we do write one by one like a circle.
*/ if (hdr->time.tv_sec >= time.tv_sec) {
time.tv_sec = hdr->time.tv_sec;
cxt->kmsg_write_cnt = (i + 1) % cxt->kmsg_max_cnt;
}
len = atomic_read(&tmpbuf.datalen) + sizeof(*oldbuf);
oldbuf = kzalloc(len, GFP_KERNEL); if (!oldbuf) return -ENOMEM;
memcpy(oldbuf, &tmpbuf, sizeof(*oldbuf));
buf = (char *)oldbuf + sizeof(*oldbuf);
len = atomic_read(&oldbuf->datalen);
start = atomic_read(&oldbuf->start);
off = zone->off + sizeof(*oldbuf);
/* get part of data */
rcnt = info->read(buf, len - start, off + start); if (rcnt != len - start) {
pr_err("read zone %s failed\n", zone->name);
ret = rcnt < 0 ? rcnt : -EIO; goto free_oldbuf;
}
/* get the rest of data */
rcnt = info->read(buf + len - start, start, off); if (rcnt != start) {
pr_err("read zone %s failed\n", zone->name);
ret = rcnt < 0 ? rcnt : -EIO; goto free_oldbuf;
}
/** * psz_recovery() - recover data from storage * @cxt: the context of pstore/zone * * recovery means reading data back from storage after rebooting * * Return: 0 on success, others on failure.
*/ staticinlineint psz_recovery(struct psz_context *cxt)
{ int ret;
if (atomic_read(&cxt->recovered)) return 0;
ret = psz_kmsg_recover(cxt); if (ret) goto out;
ret = psz_recover_zone(cxt, cxt->ppsz); if (ret) goto out;
ret = psz_recover_zone(cxt, cxt->cpsz); if (ret) goto out;
ret = psz_recover_zones(cxt, cxt->fpszs, cxt->ftrace_max_cnt);
kfree(zone->oldbuf);
zone->oldbuf = NULL; /* * if there are new data in zone buffer, that means the old data * are already invalid. It is no need to flush 0 (erase) to * block device.
*/ if (!buffer_datalen(zone)) return psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
psz_flush_dirty_zone(zone); return 0;
}
/* * In case zone is broken, which may occur to MTD device, we try each zones, * start at cxt->kmsg_write_cnt.
*/ staticinlineint notrace psz_kmsg_write_record(struct psz_context *cxt, struct pstore_record *record)
{
size_t size, hlen; struct pstore_zone *zone; unsignedint i;
for (i = 0; i < cxt->kmsg_max_cnt; i++) { unsignedint zonenum, len; int ret;
zonenum = (cxt->kmsg_write_cnt + i) % cxt->kmsg_max_cnt;
zone = cxt->kpszs[zonenum]; if (unlikely(!zone)) return -ENOSPC;
/* avoid destroying old data, allocate a new one */
len = zone->buffer_size + sizeof(*zone->buffer);
zone->oldbuf = zone->buffer;
zone->buffer = kzalloc(len, GFP_ATOMIC); if (!zone->buffer) {
zone->buffer = zone->oldbuf; return -ENOMEM;
}
zone->buffer->sig = zone->oldbuf->sig;
pr_debug("write %s to zone id %d\n", zone->name, zonenum);
psz_write_kmsg_hdr(zone, record);
hlen = sizeof(struct psz_kmsg_header);
size = min_t(size_t, record->size, zone->buffer_size - hlen);
ret = psz_zone_write(zone, FLUSH_ALL, record->buf, size, hlen); if (likely(!ret || ret != -ENOMSG)) {
cxt->kmsg_write_cnt = zonenum + 1;
cxt->kmsg_write_cnt %= cxt->kmsg_max_cnt; /* no need to try next zone, free last zone buffer */
kfree(zone->oldbuf);
zone->oldbuf = NULL; return ret;
}
pr_debug("zone %u may be broken, try next dmesg zone\n",
zonenum);
kfree(zone->buffer);
zone->buffer = zone->oldbuf;
zone->oldbuf = NULL;
}
/* * Explicitly only take the first part of any new crash. * If our buffer is larger than kmsg_bytes, this can never happen, * and if our buffer is smaller than kmsg_bytes, we don't want the * report split across multiple records.
*/ if (record->part != 1) return -ENOSPC;
if (!cxt->kpszs) return -ENOSPC;
ret = psz_kmsg_write_record(cxt, record); if (!ret && is_on_panic()) { /* ensure all data are flushed to storage when panic */
pr_debug("try to flush other dirty zones\n");
psz_flush_all_dirty_zones(NULL);
}
/* always return 0 as we had handled it on buffer */ return 0;
}
/** * psz_zone_write will set datalen as start + cnt. * It work if actual data length lesser than buffer size. * If data length greater than buffer size, pmsg will rewrite to * beginning of zone, which make buffer->datalen wrongly. * So we should reset datalen as buffer size once actual data length * greater than buffer size.
*/ if (is_full_data) {
atomic_set(&zone->buffer->datalen, zone->buffer_size);
psz_zone_write(zone, FLUSH_META, NULL, 0, 0);
} return 0;
}
if (record->type == PSTORE_TYPE_DMESG &&
record->reason == KMSG_DUMP_PANIC)
atomic_set(&cxt->on_panic, 1);
/* * if on panic, do not write except panic records * Fix case that panic_write prints log which wakes up console backend.
*/ if (is_on_panic() && record->type != PSTORE_TYPE_DMESG) return -EBUSY;
switch (record->type) { case PSTORE_TYPE_DMESG: return psz_kmsg_write(cxt, record); case PSTORE_TYPE_CONSOLE: return psz_record_write(cxt->cpsz, record); case PSTORE_TYPE_PMSG: return psz_record_write(cxt->ppsz, record); case PSTORE_TYPE_FTRACE: { int zonenum = smp_processor_id();
while (cxt->kmsg_read_cnt < cxt->kmsg_max_cnt) {
zone = cxt->kpszs[cxt->kmsg_read_cnt++]; if (psz_ok(zone)) return zone;
}
if (cxt->ftrace_read_cnt < cxt->ftrace_max_cnt) /* * No need psz_old_ok(). Let psz_ftrace_read() do so for * combination. psz_ftrace_read() should traverse over * all zones in case of some zone without data.
*/ return cxt->fpszs[cxt->ftrace_read_cnt++];
if (cxt->pmsg_read_cnt == 0) {
cxt->pmsg_read_cnt++;
zone = cxt->ppsz; if (psz_old_ok(zone)) return zone;
}
if (cxt->console_read_cnt == 0) {
cxt->console_read_cnt++;
zone = cxt->cpsz; if (psz_old_ok(zone)) return zone;
}
size = buffer_datalen(zone); /* Clear and skip this kmsg dump record if it has no valid header */ if (psz_kmsg_read_hdr(zone, record)) {
atomic_set(&zone->buffer->datalen, 0);
atomic_set(&zone->dirty, 0); return -ENOMSG;
}
size -= sizeof(struct psz_kmsg_header);
if (!record->compressed) { char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n",
kmsg_dump_reason_str(record->reason),
record->count); if (!buf) return -ENOMEM;
hlen = strlen(buf);
record->buf = krealloc(buf, hlen + size, GFP_KERNEL); if (!record->buf) {
kfree(buf); return -ENOMEM;
}
} else {
record->buf = kmalloc(size, GFP_KERNEL); if (!record->buf) return -ENOMEM;
}
staticvoid psz_free_all_zones(struct psz_context *cxt)
{ if (cxt->kpszs)
psz_free_zones(&cxt->kpszs, &cxt->kmsg_max_cnt); if (cxt->ppsz)
psz_free_zone(&cxt->ppsz); if (cxt->cpsz)
psz_free_zone(&cxt->cpsz); if (cxt->fpszs)
psz_free_zones(&cxt->fpszs, &cxt->ftrace_max_cnt);
}
/** * register_pstore_zone() - register to pstore/zone * * @info: back-end driver information. See &struct pstore_zone_info. * * Only one back-end at one time. * * Return: 0 on success, others on failure.
*/ int register_pstore_zone(struct pstore_zone_info *info)
{ int err = -EINVAL; struct psz_context *cxt = &pstore_zone_cxt;
if (info->total_size < 4096) {
pr_warn("total_size must be >= 4096\n"); return -EINVAL;
} if (info->total_size > SZ_128M) {
pr_warn("capping size to 128MiB\n");
info->total_size = SZ_128M;
}
if (!info->kmsg_size && !info->pmsg_size && !info->console_size &&
!info->ftrace_size) {
pr_warn("at least one record size must be non-zero\n"); return -EINVAL;
}
if (!info->name || !info->name[0]) return -EINVAL;
#define check_size(name, size) { \ if (info->name > 0 && info->name < (size)) { \
pr_err(#name" must be over %d\n", (size)); \ return -EINVAL; \
} \ if (info->name & (size - 1)) { \
pr_err(#name" must be a multiple of %d\n", \
(size)); \ return -EINVAL; \
} \
}
/* * the @read and @write must be applied. * if no @read, pstore may mount failed. * if no @write, pstore do not support to remove record file.
*/ if (!info->read || !info->write) {
pr_err("no valid general read/write interface\n"); return -EINVAL;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.