/*
 * decode.c - program to decode SAL error records
 *
 * Copyright (c) 2003 Hewlett-Packard Co
 *	Bjorn Helgaas <bjorn.helgaas@hp.com>
 * 2003-11-05 Add options.
 *	      Handle SAL records as well as standalone raw records.
 *	      Keith Owens <kaos@sgi.com>
 * 2003-11-16 Break out oem data decoder so each platform can handle the
 *	      oem data as it likes.
 *	      Keith Owens <kaos@sgi.com>
 * 2004-10-04 Handle kernels that clear the bit themselves when there is no data.
 *	      Keith Owens <kaos@sgi.com>
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 *
 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */

#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/wait.h>

#include "mca.h"

extern int debug;

static sal_log_record_header_t *
salinfo_buffer(int fd, int *bufsize)
{
	int nbytes, size, alloc;
	sal_log_record_header_t *buffer;

	lseek(fd, 0, 0);
	buffer = NULL;
	alloc = 16 * 1024;	// total buffer size
	size = 0;		// amount of buffer used so far
	do {
		buffer = realloc(buffer, alloc);
		if (!buffer) {
			fprintf(stderr, "%s: Can't alloc %d bytes\n", __FUNCTION__, alloc);
			exit(1);
		}

		nbytes = read(fd, buffer + size, alloc - size);
		if (nbytes < 0) {
			perror("salinfo_buffer read");
			exit(1);
		}

		if (nbytes == alloc - size)
			alloc *= 2;

		size += nbytes;
	} while (nbytes);

	if (size) {
		if (bufsize)
			*bufsize = size;
		return buffer;
	}

	free(buffer);
	return NULL;
}

static void
usage (void)
{
	fputs("Usage:\n"
		" salinfo_decode [-d] -t type -D directory\n"
		" salinfo_decode [-d] filename\n"
		"  -d              Increment debug level\n"
		"  -t type         Type of record to wait for (cmc, cpe, mca or init)\n"
		"  -D directory    Directory to store the raw and decoded records\n"
		"  filename        Decode a saved raw record, wthout involving SAL\n"
		, stderr);
}

/* Ben Woodard of RedHat changed the kernel salinfo code around 2.6.9-rc3 to
 * clear the cpu state bit if there is no data.  He did not add any indication
 * to user space of this change, which means that user space must deduce if it
 * is running on a kernel with or without Ben Woodward's change :(.
 *
 * Start off by assuming we are running on a changed kernel, and do not write
 * 'clear' to the kernel when there is no data.  When running on an old kernel,
 * user space will then be invoked repeatedly with no data.  Detect this loop
 * for an old kernel and turn on do_clear.
 */

static int
clear_cpu(int fd_data, int cpu, const char *data_filename, int have_data)
{
	char text[400];
	int l;
	static int prev_cpu = -1, loop = 0, do_clear = 0;

	if (have_data)
		loop = 0;
	if (!do_clear) {
		if (cpu <= prev_cpu) {
			++loop;
			if (loop == 2)
				do_clear = 1;
		}
		prev_cpu = cpu;
	}
	if (!have_data && !do_clear)
		return 0;

	snprintf(text, sizeof(text), "clear %d\n", cpu);
	l = strlen(text);
	if (write(fd_data, text, l) != l) {
		fprintf(stderr, "%s: Error writing '%s' to %s\n",
			__FUNCTION__, text, data_filename);
		perror(data_filename);
		return 1;
	}
	return 0;
}

static int oemdata_fd[2];
static volatile int child_died;		/* lock free flag to detect child death */

static void
sig_chld (int sig)
{
	child_died = 1;
	oemdata_fd[1] = -1;
	wait(NULL);
}

/* See if this platform has supplied a program to decode oem data */
static int *
fork_oemdata(void)
{
	static const char pgm[] = "salinfo_decode_oem";
	int pid;
	int pp[2], pc[2];		/* parent writes to pp[1], child writes to pc[1] */
	if (pipe(pp) || pipe(pc)) {
		fprintf(stderr, "%s: pipe failed (%m), giving up\n", __FUNCTION__);
		exit(1);
	}
	signal(SIGCHLD, sig_chld);
	if ((pid = fork()) == 0) {
		/* child reads fd 0, writes fd 1 */
		if (dup2(pp[0], 0) < 0 || dup2(pc[1], 1) < 0) {
			fprintf(stderr, "%s: dup2 failed (%m), giving up\n", __FUNCTION__);
			exit(1);
		}
		close(pp[0]);
		close(pp[1]);
		close(pc[0]);
		close(pc[1]);
		execlp(pgm, pgm, NULL);
		exit(0);
	} else if (pid >= 0) {
		/* parent parent writes oemdata_fd[1], reads oemdata_fd[0] */
		close(pp[0]);
		close(pc[1]);
		oemdata_fd[0] = pc[0];	/* mca.c read from pgm */
		oemdata_fd[1] = pp[1];	/* mca.c write to pgm */
		__asm__ __volatile__ ("" ::: "memory");	/* lock free barrier */
		if (child_died)
			oemdata_fd[1] = -1;
		return oemdata_fd;
	} else {
		fprintf(stderr, "%s: fork failed (%m), giving up\n", __FUNCTION__);
		exit(1);
	}
}

/* Talk to /proc/sal/type/{event,data} to extract, save, decode and clear SAL
 * records.
 */
static int
talk_to_sal (const char *type, const char *directory)
{
	sal_log_record_header_t *buffer;
	char event_filename[PATH_MAX], data_filename[PATH_MAX], text[200];
	int fd_event = -1, fd_data = -1, i, cpu, ret = 1;
	int *oemdata_fd = NULL;
	static const char *rd[] = { "raw", "decoded" };

	for (i = 0; i < 2; ++i) {
		int fd;
		char filename[PATH_MAX];
		snprintf(filename, sizeof(filename), "%s/%s/.check", directory, rd[i]);
		if ((fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC)) < 0) {
			perror(filename);
			goto out;
		}
		close(fd);
		unlink(filename);
	}

	snprintf(event_filename, sizeof(event_filename), "/proc/sal/%s/event", type);
	if ((fd_event = open(event_filename, O_RDONLY)) < 0) {
		perror(event_filename);
		goto out;
	}
	snprintf(data_filename, sizeof(data_filename), "/proc/sal/%s/data", type);
	if ((fd_data = open(data_filename, O_RDWR)) < 0) {
		perror(data_filename);
		goto out;
	}

	oemdata_fd = fork_oemdata();

	/* Run until we are killed */
	while (1) {
		int i, l, fd, bufsize, suffix;
		char filename[PATH_MAX];
		if (read(fd_event, text, sizeof(text)) <= 0) {
			if (errno == EINTR)
				ret = 0;
			else
				perror(event_filename);
			goto out;
		}
		if (sscanf(text, "read %d\n", &cpu) != 1) {
			fprintf(stderr, "%s: Unknown text '%s' from %s\n",
				__FUNCTION__, text, event_filename);
			goto out;
		}
		l = strlen(text);
		if (write(fd_data, text, l) != l) {
			fprintf(stderr, "%s: Error writing '%s' to %s\n",
				__FUNCTION__, text, data_filename);
			perror(data_filename);
			goto out;
		}
		if (!(buffer = salinfo_buffer(fd_data, &bufsize))) {
			if (clear_cpu(fd_data, cpu, data_filename, 0))
				goto out;
			continue;	/* event but no data is normal at boot */
		}

		for (suffix = 0; ; ++suffix) {
			snprintf(filename, sizeof(filename),
				"%s/raw/%02x%02x-%02x-%02x-%02x:%02x:%02x-cpu%d-%s.%d",
				directory,
				buffer->timestamp.slh_century,
				buffer->timestamp.slh_year,
				buffer->timestamp.slh_month,
				buffer->timestamp.slh_day,
				buffer->timestamp.slh_hour,
				buffer->timestamp.slh_minute,
				buffer->timestamp.slh_second,
				cpu,
				type,
				suffix);
			if ((fd = open(filename, O_WRONLY|O_CREAT|O_EXCL, S_IRUSR|S_IWUSR)) >= 0)
				break;
			if (errno != EEXIST) {
				perror(filename);
				goto out;
			}
		}

		for (l = 0; l < bufsize; ) {
			i = write(fd, (char *)buffer + l, bufsize - l);
			if (i <= 0) {
				perror(filename);
				goto out;
			}
			l += i;
		}
		close(fd);

		snprintf(filename, sizeof(filename),
			"%s/decoded/%02x%02x-%02x-%02x-%02x:%02x:%02x-cpu%d-%s.%d",
			directory,
			buffer->timestamp.slh_century,
			buffer->timestamp.slh_year,
			buffer->timestamp.slh_month,
			buffer->timestamp.slh_day,
			buffer->timestamp.slh_hour,
			buffer->timestamp.slh_minute,
			buffer->timestamp.slh_second,
			cpu,
			type,
			suffix);
		fclose(stdout);
		if (!(stdout = fopen(filename, "a"))) {
			perror(filename);
			goto out;
		}
		fclose(stderr);
		if (!((stderr = fopen(filename, "a")))) {
			perror(filename);
			goto out;
		}

		printf("BEGIN HARDWARE ERROR STATE from %s on cpu %d\n", type, cpu);
		platform_info_print(buffer, 1, fd_data, cpu, oemdata_fd);
		printf("END HARDWARE ERROR STATE from %s on cpu %d\n", type, cpu);
		free(buffer);
		fclose(stdout);
		if (clear_cpu(fd_data, cpu, data_filename, 1))
			goto out;

	}

out:
	if (oemdata_fd && oemdata_fd[1] > 0) {
		close(oemdata_fd[0]);
		close(oemdata_fd[1]);
		wait(NULL);
	}
	if (fd_event > 0)
		close(fd_event);
	if (fd_data > 0)
		close(fd_data);
	return ret;
}

/* Decode an existing raw file */
static int
decode_a_file (const char *filename)
{
	sal_log_record_header_t *buffer;
	int fd;
	int *oemdata_fd = NULL;
	if ((fd = open(filename, O_RDONLY)) < 0) {
		perror(filename);
		return 1;
	}
	if ((buffer = salinfo_buffer(fd, NULL))) {
		oemdata_fd = fork_oemdata();
		printf("BEGIN HARDWARE ERROR STATE from %s\n", filename);
		platform_info_print(buffer, 0, fd, -1, oemdata_fd);
		printf("END HARDWARE ERROR STATE from %s\n", filename);
	}
	if (oemdata_fd && oemdata_fd[1] > 0) {
		close(oemdata_fd[0]);
		close(oemdata_fd[1]);
		wait(NULL);
	}
	close(fd);
	free(buffer);
	return 0;
}

int main(int argc, char **argv)
{
	char *type = NULL, *directory = NULL, *filename = NULL;
	int o;

	while ((o = getopt(argc, argv, "dt:D:")) > 0) {
		switch (o) {
		case 'd':
			++debug;
			break;
		case 'h':
			usage();
			return 0;
		case 't':
			type = optarg;
			break;
		case 'D':
			directory = optarg;
			break;
		default:
			usage();
			return 1;
		}
	}

	if (optind == argc-1)
		filename = argv[optind++];
	if (optind != argc ||
		argc == 1 ||
		(type && !directory) ||
		(!type && directory) ||
		(filename && type)  ||
		!(filename || type)) {
		usage();
		return 1;
	}

	if (type)
		return talk_to_sal(type, directory);
	else
		return decode_a_file(filename);
}
