/* * Copyright (c) 1991-2001 by Sun Microsystems, Inc. * All rights reserved. */ /* * Copyright (c) 1994-2001 by Fujitsu, Ltd. * All rights reserved. */ #ifndef _SYS_ASYNC_H #define _SYS_ASYNC_H #pragma ident "@(#)async.h 1.30 01/08/28 SMI" #include #ifdef STINGRAY #include #endif STINGRAY #ifdef __cplusplus extern "C" { #endif #ifndef _ASM #include /* * The async_flt structure is used to record all pertinent information about * an asynchronous CPU or bus-related memory error. Typically, the structure * is initialized by a high-level interrupt or trap handler, and then enqueued * for later processing. Separate queues are maintained for correctable and * uncorrectable errors. The current CPU module determines the size of the * queue elements, so that it may declare a CPU-specific fault structure * which contains a struct async_flt as its first member. Each async_flt also * contains a callback function (flt_func) that is invoked by the processing * code in order to actually log messages when the event is dequeued. This * function may be called from a softint, from trap() as part of AST handling * before the victim thread returns to userland, or as part of panic(). As * such, the flt_func should basically only be calling cmn_err (but NOT with * the CE_PANIC flag). It must not call panic(), acquire locks, or block. * The owner of the event is responsible for determining whether the event is * fatal; if so, the owner should set flt_panic and panic() after enqueuing * the event. The event will then be dequeued and logged as part of panic * processing. If flt_panic is not set, the queue function will schedule a * soft interrupt to process the event. */ struct async_flt; typedef void (*async_func_t)(struct async_flt *, char *); struct async_flt { uint64_t flt_id; /* gethrtime() at time of fault */ uint64_t flt_stat; /* async fault status register */ uint64_t flt_addr; /* async fault address register */ caddr_t flt_pc; /* program counter from error trap */ async_func_t flt_func; /* logging function */ uint_t flt_bus_id; /* hardware bus id# of cpu/sbus/pci */ uint_t flt_inst; /* software instance of cpu/sbus/pci */ ushort_t flt_status; /* error information */ ushort_t flt_synd; /* ECC syndrome */ uchar_t flt_in_memory; /* fault occurred in memory if != 0 */ uchar_t flt_class; /* fault class (cpu or bus) */ uchar_t flt_prot; /* type of fault protection (if any) */ uchar_t flt_priv; /* fault occurred in kernel if != 0 */ uchar_t flt_panic; /* fault caused owner to panic() */ uchar_t flt_tl; /* fault occurred at TL > 0 */ uchar_t flt_core; /* fault occurred during core() dump */ uchar_t flt_pad; /* reserved for future use */ }; #ifdef STINGRAY /* * FJSV,SPARC64 asyn. registers. */ struct eregs { uint64_t mask; uint64_t icu_addr; uint64_t tdu_addr; uint64_t icu_data; uint64_t tdu_data; uint64_t dc_data; }; /* * holding U2-Cache Tag Parity Error count */ struct u2ts_counter { ulong_t cnt; }; #endif STINGRAY #ifdef _KAISER /* * Kaiser may need struct ecc_flt to handle PCI UE. */ struct ecc_flt { uint64_t flt_stat; /* async. fault stat. reg. */ uint64_t flt_addr; /* async. fault addr. reg. */ u_short flt_in_proc; /* fault being handled */ u_short flt_synd; /* ECC syndrome (CE only) */ u_short flt_size; /* size of failed transfer */ u_short flt_offset; /* offset of fault failed transfer */ u_short flt_bus_id; /* bus id# of cpu/sbus/pci */ u_short flt_inst; /* instance of cpu/sbus/pci */ async_func_t flt_func; /* logging func for fault */ }; #endif _KAISER #if defined(_KAISER) || defined(_KILAUEA_EMUL) /* * Error Analysis structure */ struct ecc_err_data { uint64_t errid; uint64_t synd; }; #endif /* _KAISER || _KILAUEA_EMUL */ /* * Bus nexus drivers can use the bus_func_register() interface to register * callback functions for error handling and panic handling. The handler * functions should be registered and unregistered from driver attach and * detach context, where it is safe to perform a sleeping allocation. The * callbacks themselves can be invoked from panic, or from the CPU module's * asynchronous trap handler at high PIL. As such, these routines may only * test for errors and enqueue async_flt events. They may not grab adaptive * locks, call panic(), or invoke bus_func_register() or bus_func_unregister(). * Each callback function should return one of the BF_* return status values * below. The bus_func_invoke() function calls all the registered handlers of * the specified type, and returns the maximum of their return values (e.g. * BF_FATAL if any callback returned BF_FATAL). If any callback returns * BF_FATAL, the system will panic at the end of callback processing. */ typedef uint_t (*busfunc_t)(void *); #define BF_TYPE_UE 1 /* check for uncorrectable errors */ #define BF_TYPE_ERRDIS 2 /* disable error detection */ #define BF_NONE 0 /* no errors were detected */ #define BF_NONFATAL 1 /* one or more non-fatal errors found */ #define BF_FATAL 2 /* one or more fatal errors found */ typedef struct bus_func_desc { int bf_type; /* type of function (see above) */ busfunc_t bf_func; /* function to call */ void *bf_arg; /* function argument */ struct bus_func_desc *bf_next; /* pointer to next registered desc */ } bus_func_desc_t; extern void bus_func_register(int, busfunc_t, void *); extern void bus_func_unregister(int, busfunc_t, void *); extern uint_t bus_func_invoke(int); #ifdef STINGRAY #ifdef _COLUMBUS extern void ce_count_unum(struct async_flt *ecc, int len, char *unum, int bnum); #else extern void ce_count_unum(struct async_flt *ecc, int len, char *unum); #endif /* _COLUMBUS */ #else /* Sun Ultra */ extern void ce_count_unum(int status, int len, char *unum); #endif /* STINGRAY */ extern void ce_scrub(struct async_flt *); extern void error_init(void); #ifdef STINGRAY /* * FJSV,SPARC64 specific functions. */ extern void fj_err_init(void); extern uint_t fj_scrub_memory(uint64_t, uint64_t); extern uint_t fj_scrub_cache(uint64_t, uint64_t); extern uint64_t rdscr(void); extern void wrscr(uint64_t); extern uint64_t rdhwm(void); extern void wrhwm(uint64_t); extern int ecache_size; extern int ecache_linesize; extern uint64_t fj_errlog; extern struct ce_info *mem_ce_simm; extern struct ce_info *u2_ce_slot; extern kmutex_t simm_mutex, ceslot_mutex, ce_lock; extern uint_t ce_pil; extern uint_t set_error_disable_tl1(uint64_t neer, uint64_t dummy); extern void fj_ecc_init(uint64_t *, uint64_t *); extern void fjsv_disable_ecc(void); extern int fjsv_disable_uecheck(void); extern void fjsv_enable_uecheck(int); extern void fjsv_clr_eregs(void); extern void fjsv_rdclr_eregs_tl1(struct eregs *regs); extern int ue_check_bus_func(void); extern uint_t fj_reset_errcnt_tl1(uint64_t, uint64_t); extern void fj_errlog_clear(void); extern void prom_isolate_failure(void *); extern void prom_get_simminfo(); extern void fj_isolate_cpu(int); extern void set_error_enable_tl1(uint64_t neer, uint64_t dummy); #ifdef _KAISER extern void fjsv_isolate_cpu(int); extern void fjsv_isolate_memory(uint64_t); #else extern void fj_isolate_cpu(int); extern void fj_isolate_memory(int); #endif /* _KAISER */ #if defined(_KAISER) || defined(_KILAUEA_EMUL) extern void fjsv_get_errid(uint64_t *pa, uint64_t *edata); #endif /* _KAISER || _KILAUEA_EMUL */ #endif STINGRAY extern int ce_verbose; extern int ce_show_data; extern int ce_debug; extern int ue_debug; extern int aft_verbose; extern int aft_reboot; extern int aft_panic; extern int aft_testfatal; extern struct async_flt panic_aflt; extern errorq_t *ce_queue; extern errorq_t *ue_queue; #endif /* !_ASM */ /* * ECC or parity error status for async_flt.flt_status. */ #define ECC_C_TRAP 0x0001 /* Trap 0x63 Corrected ECC Error */ #define ECC_I_TRAP 0x0002 /* Trap 0x0A Instr Access Error */ #define ECC_ECACHE 0x0004 /* Ecache ECC Error */ #define ECC_IOBUS 0x0008 /* Pci or sysio ECC Error */ #define ECC_INTERMITTENT 0x0010 /* Intermittent ECC Error */ #define ECC_PERSISTENT 0x0020 /* Persistent ECC Error */ #define ECC_STICKY 0x0040 /* Sticky ECC Error */ #define ECC_D_TRAP 0x0080 /* Trap 0x32 Data Access Error */ #define ECC_F_TRAP 0x0100 /* Cheetah Trap 0x70 Fast ECC Error */ #define ECC_DP_TRAP 0x0200 /* Cheetah+ Trap 0x71 D$ Parity Error */ #define ECC_IP_TRAP 0x0400 /* Cheetah+ Trap 0x72 I$ Parity Error */ /* * Fault classes for async_flt.flt_class. */ #define BUS_FAULT 0 /* originating from bus drivers */ #define CPU_FAULT 1 /* originating from CPUs */ /* * Invalid or unknown physical address for async_flt.flt_addr. */ #define AFLT_INV_ADDR (-1ULL) /* * Fault protection values for async_flt.flt_prot. The async error handling * code may be able to recover from errors when kernel code has explicitly * protected itself using one of the mechanisms specified here. */ #define AFLT_PROT_NONE 0 /* no protection active */ #define AFLT_PROT_ACCESS 1 /* on_trap OT_DATA_ACCESS protection */ #define AFLT_PROT_EC 2 /* on_trap OT_DATA_EC protection */ #define AFLT_PROT_COPY 3 /* t_lofault protection (ucopy, etc.) */ /* * Maximum length of unum string returned from the prom. */ #ifdef STINGRAY #define UNUM_NAMLEN 256 #else #define UNUM_NAMLEN 60 #endif /* STINGRAY */ #ifdef STINGRAY /* * For Taiho/Kaiser, the size of UPA/U2$ CE error logs depends on * these MACROs. */ #ifdef _KAISER #define FJSV_MAX_SLOT (32 * (NCPU/4)) #define FJSV_MAX_CPU NCPU #else /* TAIHO */ #define FJSV_MAX_SLOT 32 #define FJSV_MAX_CPU 8 #endif /* _KAISER */ /* * Uncorrectable error logging return values. */ #define UE_USER_FATAL 0x0 /* NonPriv. UnCorrectable ECC Error */ #define UE_FATAL 0x1 /* Priv. UnCorrectable ECC Error */ #define UE_DEBUG 0x2 /* Debugging loophole */ #define UE_USER 0x3 /* NonPriv. UE Non Fatal Error */ /* * Alignment macros */ #define ALIGN_64(i) ((i) & ~0x3F) #define ALIGN_32(i) ((i) & ~0x1F) #define ALIGN_16(i) ((i) & ~0xF) #define ALIGN_8(i) ((i) & ~0x7) #endif /* STINGRAY */ #ifdef __cplusplus } #endif #endif /* _SYS_ASYNC_H */