/** * @file error_handling.cpp * * @date Apr 1, 2014 * @author Andrey Belomutskiy, (c) 2012-2020 */ #include "pch.h" #include "rusefi/efistringutil.h" #include "os_util.h" #include "backup_ram.h" #include "error_handling_led.h" #include "log_hard_fault.h" #include "rusefi/critical_error.h" /** * Executes the BKPT instruction that causes the debugger to stop. */ #define bkpt() __asm volatile("BKPT #0\n") // see strncpy man page // this implementation helps avoiding following gcc error/warning: // error: 'strncpy' output may be truncated copying xxx bytes from a string of length xxx char *strlncpy(char *dest, const char *src, size_t size) { size_t i; for (i = 0; (i < (size - 1)) && (src[i] != '\0'); i++) dest[i] = src[i]; for ( ; i < size; i++) dest[i] = '\0'; return dest; } static critical_msg_t warningBuffer; static critical_msg_t criticalErrorMessageBuffer; static critical_msg_t configErrorMessageBuffer; // recoverable configuration error, non-critical bool hasCriticalFirmwareErrorFlag = false; static bool hasConfigErrorFlag = false; static bool hasReportFile = false; // todo: revisit very questionable code! // todo: reuse hasCriticalFirmwareErrorFlag? something? bool isInHardFaultHandler = false; const char *dbg_panic_file; int dbg_panic_line; // todo: need vararg version of 'firmwareError' to make this method vararg? void efiCriticalError(const char *message) { criticalError(message); } const char* getCriticalErrorMessage() { return criticalErrorMessageBuffer; } bool hasConfigError() { return hasConfigErrorFlag; } void clearConfigErrorMessage() { hasConfigErrorFlag = false; } bool hasErrorReportFile() { return hasReportFile; } const char* getConfigErrorMessageBuffer() { return configErrorMessageBuffer; } #if EFI_PROD_CODE #if EFI_BACKUP_SRAM static backupErrorState lastBootError; static uint32_t bootCount = 0; #endif // EFI_BACKUP_SRAM void errorHandlerInit() { #if EFI_BACKUP_SRAM /* copy error state from backup RAM and clear it in backup RAM. * so few users can access previous error state and we should not care about who sohuld clear backup ram. */ auto sramState = getBackupSram(); memcpy(&lastBootError, &sramState->err, sizeof(backupErrorState)); memset(&sramState->err, 0x00, sizeof(sramState->err)); // Reset cookie so we don't report it again. sramState->err.Cookie = ErrorCookie::None; // Cookie can be some random value at first power on // reset to None to avoid generating 'Unknown' fail report if ((lastBootError.Cookie != ErrorCookie::FirmwareError) && (lastBootError.Cookie != ErrorCookie::HardFault) && (lastBootError.Cookie != ErrorCookie::ChibiOsPanic)) { lastBootError.Cookie = ErrorCookie::None; } //bootcount if (sramState->BootCountCookie != 0xdeadbeef) { sramState->BootCountCookie = 0xdeadbeef; sramState->BootCount = 0; } // save current bootcounter bootCount = sramState->BootCount; sramState->BootCount++; if (engineConfiguration->rethrowHardFault) { backupErrorState *err = &lastBootError; if (err->Cookie == ErrorCookie::HardFault) { criticalError("Last boot had hard fault type: %lx addr: %lx CSFR: %lx", err->FaultType, err->FaultAddress, err->Csfr); } } Reset_Cause_t cause = getMCUResetCause(); // if reset by watchdog, signal a fatal error if ((cause == Reset_Cause_IWatchdog) || (cause == Reset_Cause_WWatchdog)) { firmwareError(ObdCode::OBD_PCM_Processor_Fault, "Watchdog Reset detected! Check SD card for report file."); } #endif // EFI_PROD_CODE // see https://github.com/rusefi/rusefi/wiki/Resilience addConsoleAction("chibi_fault", [](){ chDbgCheck(0); } ); addConsoleAction("soft_fault", [](){ firmwareError(ObdCode::RUNTIME_CRITICAL_TEST_ERROR, "firmwareError: %d", getRusEfiVersion()); }); addConsoleAction("hard_fault", [](){ causeHardFault(); } ); } bool errorHandlerIsStartFromError() { #if EFI_BACKUP_SRAM return (lastBootError.Cookie != ErrorCookie::None); #else return 0; #endif } const char *errorCookieToName(ErrorCookie cookie) { switch (cookie) { case ErrorCookie::None: return "No error"; case ErrorCookie::FirmwareError: return "firmware"; case ErrorCookie::HardFault: return "HardFault"; case ErrorCookie::ChibiOsPanic: return "ChibiOS panic"; } return "Unknown"; } #define printResetReason() \ PRINT("Reset Cause: %s", getMCUResetCause(getMCUResetCause())) #define printErrorState() \ do { \ PRINT("Power cycle count: %lu", bootCount); \ \ if (cookie == ErrorCookie::None) { \ break; \ } \ \ PRINT("Last error type %s", errorCookieToName(err->Cookie)); \ \ switch (cookie) { \ case ErrorCookie::FirmwareError: \ { \ PRINT("%s", err->msg); \ } \ break; \ case ErrorCookie::HardFault: \ { \ PRINT("type: 0x%08lx addr: 0x%08lx CSFR: 0x%08lx", \ err->FaultType, err->FaultAddress, err->Csfr); \ \ auto ctx = &err->FaultCtx; \ PRINT("r0 0x%08lx", ctx->r0); \ PRINT("r1 0x%08lx", ctx->r1); \ PRINT("r2 0x%08lx", ctx->r2); \ PRINT("r3 0x%08lx", ctx->r3); \ PRINT("r12 0x%08lx", ctx->r12); \ PRINT("lr (thread) 0x%08lx", ctx->lr_thd); \ PRINT("pc 0x%08lx", ctx->pc); \ PRINT("xpsr 0x%08lx", ctx->xpsr); \ \ /* FPU registers - not very useful for debug */ \ if (0) { \ /* Print rest the context as a sequence of uintptr */ \ uintptr_t* data = reinterpret_cast(&err->FaultCtx); \ for (size_t i = 8; i < sizeof(port_extctx) / sizeof(uintptr_t); i++) { \ PRINT("Fault ctx %d: 0x%08x", i, data[i]); \ } \ } \ } \ break; \ case ErrorCookie::ChibiOsPanic: \ { \ PRINT("msg %s", err->msg); \ PRINT("file %s", err->file); \ PRINT("line %d", err->line); \ } \ break; \ default: \ /* No cookie stored or invalid cookie (ie, backup RAM contains random garbage) */ \ break; \ } \ } while(0) #define printErrorStack() \ do { \ PRINT("SP 0x%08lx", err->sp); \ for (size_t i = 0; i < ERROR_STACK_DEPTH; i++) { \ PRINT(" 0x%08lx: 0x%08lx", err->sp - i * 4, err->stack[i]); \ } \ } while(0) // TODO: reuse this code for writing crash report file void errorHandlerShowBootReasonAndErrors() { //this is console print #define PRINT(...) efiPrintf(__VA_ARGS__) printResetReason(); #if EFI_BACKUP_SRAM backupErrorState *err = &lastBootError; ErrorCookie cookie = err->Cookie; printErrorState(); printErrorStack(); #endif // EFI_BACKUP_SRAM #undef PRINT } #if EFI_FILE_LOGGING #include "ff.h" #define FAIL_REPORT_PREFIX "fail" PUBLIC_API_WEAK void onBoardWriteErrorFile(FIL *) { } static const char *errorHandlerGetErrorName(ErrorCookie cookie) { switch (cookie) { case ErrorCookie::None: return "none"; case ErrorCookie::FirmwareError: return "FWerror"; case ErrorCookie::HardFault: return "HardFault"; case ErrorCookie::ChibiOsPanic: return "OSpanic"; } return "unknown"; } bool needErrorReportFile = false; void errorHandlerWriteReportFile(FIL *fd) { #if EFI_BACKUP_SRAM backupErrorState *err = &lastBootError; ErrorCookie cookie = err->Cookie; #else ErrorCookie cookie = ErrorCookie::None; #endif if (cookie != ErrorCookie::None) { needErrorReportFile = true; } auto cause = getMCUResetCause(); // TODO: should we also report Unknown? if ((cause != Reset_Cause_NRST_Pin) && (cause != Reset_Cause_BOR) && (cause != Reset_Cause_POR) && (cause != Reset_Cause_Unknown)) { // not an expected cause needErrorReportFile = true; } if (needErrorReportFile) { char fileName[_MAX_FILLER + 20]; memset(fd, 0, sizeof(FIL)); // clear the memory //TODO: use date + time for file name? #if EFI_BACKUP_SRAM sprintf(fileName, "%05ld_%s_%s.txt", bootCount, FAIL_REPORT_PREFIX, errorHandlerGetErrorName(cookie)); #else sprintf(fileName, "last_%s_%s.txt", FAIL_REPORT_PREFIX, errorHandlerGetErrorName(cookie)); #endif FRESULT ret = f_open(fd, fileName, FA_CREATE_ALWAYS | FA_WRITE); if (ret == FR_OK) { //this is file print #define PRINT(format, ...) f_printf(fd, format "\r\n", __VA_ARGS__) printResetReason(); #if EFI_BACKUP_SRAM printErrorState(); printErrorStack(); #endif // EFI_BACKUP_SRAM f_printf(fd, "rusEFI v%d@%u", getRusEfiVersion(), /*do we have a working way to print 64 bit values?!*/(int)SIGNATURE_HASH); // additional board-specific data onBoardWriteErrorFile(fd); // todo: figure out what else would be useful f_close(fd); enginePins.warningLedPin.setValue(1); } } } static int errorHandlerIsReportExist(ErrorCookie cookie) { bool exist = false; FRESULT fr; /* Return value */ DIR dj; /* Directory object */ FILINFO fno; /* File information */ TCHAR pattern[32]; sprintf(pattern, "*%s*", errorHandlerGetErrorName(cookie)); fr = f_findfirst(&dj, &fno, "", pattern); exist = ((fr == FR_OK) && (fno.fname[0])); f_closedir(&dj); return exist; } int errorHandlerCheckReportFiles() { hasReportFile = (errorHandlerIsReportExist(ErrorCookie::FirmwareError) > 0) || (errorHandlerIsReportExist(ErrorCookie::HardFault) > 0) || (errorHandlerIsReportExist(ErrorCookie::ChibiOsPanic) > 0); return hasReportFile; } static void errorHandlerDeleteTypedReport(ErrorCookie cookie) { bool failed = false; FRESULT fr; /* Return value */ DIR dj; /* Directory object */ FILINFO fno; /* File information */ TCHAR pattern[32]; sprintf(pattern, "*%s*", errorHandlerGetErrorName(cookie)); do { fr = f_findfirst(&dj, &fno, "", pattern); f_closedir(&dj); if ((fr == FR_OK) && (fno.fname[0])) { efiPrintf("deleting %s", fno.fname); FRESULT ret = f_unlink(fno.fname); if (ret != FR_OK) { efiPrintf("Faield to delete %s: %d", fno.fname, ret); failed = true; } else { efiPrintf("%s removed", fno.fname); } } } while ((!failed) && (fr == FR_OK) && (fno.fname[0])); } void errorHandlerDeleteReports() { errorHandlerDeleteTypedReport(ErrorCookie::FirmwareError); errorHandlerDeleteTypedReport(ErrorCookie::HardFault); errorHandlerDeleteTypedReport(ErrorCookie::ChibiOsPanic); // update errorHandlerCheckReportFiles(); } #endif #if EFI_BACKUP_SRAM static void errorHandlerSaveStack(backupErrorState *err, uint32_t *sp) { err->sp = (uint32_t)sp; for (size_t i = 0; i < ERROR_STACK_DEPTH; i++) { // avoid optimizatio and usage of __builtin_memcpy // to avoid "error: '__builtin_memcpy' reading 128 bytes from a region of size 4" err->stack[i] = *(volatile uint32_t *)sp; sp++; } } #endif // EFI_BACKUP_SRAM void logHardFault(uint32_t type, uintptr_t faultAddress, void* sp, port_extctx* ctx, uint32_t csfr) { // todo: reuse hasCriticalFirmwareErrorFlag? something? isInHardFaultHandler = true; // Evidence first! #if EFI_BACKUP_SRAM auto bkpram = getBackupSram(); auto err = &bkpram->err; if (err->Cookie == ErrorCookie::None) { err->FaultType = type; err->FaultAddress = faultAddress; err->Csfr = csfr; memcpy(&err->FaultCtx, ctx, sizeof(port_extctx)); err->Cookie = ErrorCookie::HardFault; // copy stack last as it can be corrupted and cause another exeption errorHandlerSaveStack(err, (uint32_t *)sp); } #endif // EFI_BACKUP_SRAM // criticalShutdown() shutdown can cause cascaded fault. // So we first save some valuable evidence and only after try to gracefully shutdown HW criticalShutdown(); } #endif /* EFI_PROD_CODE */ #if EFI_SIMULATOR || EFI_PROD_CODE void chDbgPanic3(const char *msg, const char * file, int line) { #if EFI_PROD_CODE #if EFI_BACKUP_SRAM // following is allocated on stack // add some marker uint32_t tmp = 0xfffffa11; #endif // Attempt to break in to the debugger, if attached if (CoreDebug->DHCSR & CoreDebug_DHCSR_C_DEBUGEN_Msk) { bkpt(); } #endif // EFI_PROD_CODE #if EFI_BACKUP_SRAM auto bkpram = getBackupSram(); auto err = &bkpram->err; if (err->Cookie == ErrorCookie::None) { strlncpy(err->file, file, efi::size(err->file)); err->line = line; strlncpy(err->msg, msg, efi::size(err->msg)); err->Cookie = ErrorCookie::ChibiOsPanic; // copy stack last as it can be corrupted and cause another exeption uint32_t *sp = &tmp; errorHandlerSaveStack(err, sp); } #endif // EFI_BACKUP_SRAM if (hasOsPanicError()) return; dbg_panic_file = file; dbg_panic_line = line; #if CH_DBG_SYSTEM_STATE_CHECK ch0.dbg.panic_msg = msg; #endif /* CH_DBG_SYSTEM_STATE_CHECK */ #if !EFI_PROD_CODE printf("chDbgPanic3 %s %s%d", msg, file, line); exit(-1); #else // EFI_PROD_CODE criticalError("assert fail %s %s:%d", msg, file, line); // If on the main thread, longjmp back to the init process so we can keep USB alive if (chThdGetSelfX()->threadId == 0) { // Force unlock, since we may be throwing-under-lock chSysUnconditionalUnlock(); // there was a port_disable in chSysHalt, reenable interrupts so USB works port_enable(); __NO_RETURN void onAssertionFailure(); onAssertionFailure(); } else { // Not the main thread. // All hope is now lost. // Reboot! NVIC_SystemReset(); } #endif // EFI_PROD_CODE } #endif /* EFI_SIMULATOR || EFI_PROD_CODE */ /** * ObdCode::OBD_PCM_Processor_Fault is the general error code for now * * @returns TRUE in case there were warnings recently */ bool warning(ObdCode code, const char *fmt, ...) { if (hasCriticalFirmwareErrorFlag) { return true; } bool known = engine->engineState.warnings.isWarningNow(code); // if known - just reset timer engine->engineState.warnings.addWarningCode(code); #if EFI_SIMULATOR || EFI_PROD_CODE // we just had this same warning, let's not spam if (known) { return true; } // print Pxxxx (for standard OBD) or Cxxxx (for custom) prefix size_t size = snprintf(warningBuffer, sizeof(warningBuffer), "%s%04d: ", code < ObdCode::CUSTOM_NAN_ENGINE_LOAD ? "P" : "C", (int) code); va_list ap; va_start(ap, fmt); chvsnprintf(warningBuffer + size, sizeof(warningBuffer) - size, fmt, ap); va_end(ap); efiPrintf("WARNING: %s", warningBuffer); #else printf("unit_test_warning: "); va_list ap; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\r\n"); #endif /* EFI_SIMULATOR || EFI_PROD_CODE */ return false; } #if EFI_CLOCK_LOCKS uint32_t lastLockTime; /** * Maximum time before requesting lock and releasing lock at the end of critical section */ uint32_t maxLockedDuration = 0; /** * this depends on chdebug.h patch #if CH_DBG_SYSTEM_STATE_CHECK == TRUE -#define _dbg_enter_lock() (ch.dbg.lock_cnt = (cnt_t)1) -#define _dbg_leave_lock() (ch.dbg.lock_cnt = (cnt_t)0) +#define _dbg_enter_lock() {(ch.dbg.lock_cnt = (cnt_t)1); ON_LOCK_HOOK;} +#define _dbg_leave_lock() {ON_UNLOCK_HOOK;(ch.dbg.lock_cnt = (cnt_t)0);} #endif */ #endif /* EFI_CLOCK_LOCKS */ void onLockHook(void) { #if ENABLE_PERF_TRACE perfEventInstantGlobal(PE::GlobalLock); #endif /* ENABLE_PERF_TRACE */ #if EFI_CLOCK_LOCKS lastLockTime = getTimeNowLowerNt(); #endif /* EFI_CLOCK_LOCKS */ } void onUnlockHook(void) { #if EFI_CLOCK_LOCKS uint32_t lockedDuration = getTimeNowLowerNt() - lastLockTime; if (lockedDuration > maxLockedDuration) { maxLockedDuration = lockedDuration; } // if (lockedDuration > 2800) { // // un-comment this if you want a nice stop for a breakpoint // maxLockedDuration = lockedDuration + 1; // } #endif /* EFI_CLOCK_LOCKS */ #if ENABLE_PERF_TRACE perfEventInstantGlobal(PE::GlobalUnlock); #endif /* ENABLE_PERF_TRACE */ } #if EFI_SIMULATOR || EFI_UNIT_TEST #include #endif void configError(const char *fmt, ...) { va_list ap; va_start(ap, fmt); chvsnprintf(configErrorMessageBuffer, sizeof(configErrorMessageBuffer), fmt, ap); va_end(ap); hasConfigErrorFlag = true; } const char* getConfigErrorMessage() { return configErrorMessageBuffer; } static void firmwareErrorV(ObdCode code, const char *fmt, va_list ap) { #if EFI_PROD_CODE #if EFI_BACKUP_SRAM // following is allocated on stack // add some marker uint32_t tmp = 0xfaaaaa11; #endif if (hasCriticalFirmwareErrorFlag) return; hasCriticalFirmwareErrorFlag = true; // construct error message if (indexOf(fmt, '%') == -1) { /** * in case of simple error message let's reduce stack usage * chvsnprintf could cause an overflow if we're already low */ strlncpy((char*) criticalErrorMessageBuffer, fmt, sizeof(criticalErrorMessageBuffer)); criticalErrorMessageBuffer[sizeof(criticalErrorMessageBuffer) - 1] = 0; // just to be sure } else { chvsnprintf(criticalErrorMessageBuffer, sizeof(criticalErrorMessageBuffer), fmt, ap); } int errorMessageSize = strlen((char*)criticalErrorMessageBuffer); static char versionBuffer[32]; chsnprintf(versionBuffer, sizeof(versionBuffer), " %d@%s", getRusEfiVersion(), FIRMWARE_ID); if (errorMessageSize + strlen(versionBuffer) < sizeof(criticalErrorMessageBuffer)) { strcpy((char*)(criticalErrorMessageBuffer) + errorMessageSize, versionBuffer); } // Evidence first! #if EFI_BACKUP_SRAM auto bkpram = getBackupSram(); auto err = &bkpram->err; if (err->Cookie == ErrorCookie::None) { strlncpy(err->msg, criticalErrorMessageBuffer, sizeof(err->msg)); err->Cookie = ErrorCookie::FirmwareError; // copy stack last as it can be corrupted and cause another exeption uint32_t *sp = &tmp; errorHandlerSaveStack(err, sp); } #endif // EFI_BACKUP_SRAM #if EFI_ENGINE_CONTROL getLimpManager()->fatalError(); #endif // EFI_ENGINE_CONTROL engine->engineState.warnings.addWarningCode(code); // criticalShutdown() shutdown can cause cascaded fault. // So we first save some valuable evidence and only after try to gracefully shutdown HW criticalShutdown(); enginePins.communicationLedPin.setValue(1, /*force*/true); #else // EFI_PROD_CODE // large buffer on stack is risky we better use normal memory static char errorBuffer[200]; vsnprintf(errorBuffer, sizeof(errorBuffer), fmt, ap); printf("\x1B[31m>>>>>>>>>> firmwareError [%s]\r\n\x1B[0m\r\n", errorBuffer); throw std::logic_error(errorBuffer); #endif // EFI_PROD_CODE } void firmwareError(ObdCode code, const char *fmt, ...) { va_list ap; va_start(ap, fmt); firmwareErrorV(code, fmt, ap); va_end(ap); } // This is critical error for plain C use. // From plain C file we cannot use somethings like this: // #define criticalError(...) firmwareError(ObdCode::OBD_PCM_Processor_Fault, __VA_ARGS__) // because of ObdCode:: // Also we can not write some wrapper that will pass variable length argument list to another variable length argument list function void criticalErrorC(const char *fmt, ...) { va_list ap; va_start(ap, fmt); firmwareErrorV(ObdCode::OBD_PCM_Processor_Fault, fmt, ap); va_end(ap); }