Write hard faults to backup ram, print on next boot (#4324)

* use backup ram, hard fault handler improvement

* wire it up

* upload elf

* s

* I guess I'll just use strncpy

* s

* what is it even complaining about?

* neat, we don't need a cast

* cypress and kinetis

* cleanup

* ci

* guard for tests
This commit is contained in:
Matthew Kennedy 2022-07-07 12:14:31 -07:00 committed by GitHub
parent 18ad4383af
commit 0cc1f729c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 251 additions and 91 deletions

View File

@ -346,6 +346,13 @@ jobs:
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' && env.skip != 'true' }}
run: bash misc/jenkins/compile_other_versions/prepare_bundle.sh ${{matrix.build-target}} ${{matrix.ini-file}}
- name: Upload build elf
if: ${{ github.event_name != 'push' || github.ref != 'refs/heads/master' && env.skip != 'true' }}
uses: actions/upload-artifact@v2
with:
name: rusefi_${{matrix.build-target}}.elf
path: ./firmware/build/rusefi.elf
- name: Upload build bin
if: ${{ github.event_name != 'push' || github.ref != 'refs/heads/master' && env.skip != 'true' }}
uses: actions/upload-artifact@v2

View File

@ -266,7 +266,8 @@ TCPPSRC =
# List ASM source files here
ASMXSRC = $(ALLXASMSRC) \
$(RUSEFIASM)
$(RUSEFIASM) \
main_hardfault_asm.S
#
# WARNING! order of variables is important here - for instance cypress own folders should go before default folders

View File

@ -7,6 +7,7 @@
#include "pch.h"
#include "os_access.h"
#include "backup_ram.h"
static critical_msg_t warningBuffer;
static critical_msg_t criticalErrorMessageBuffer;
@ -27,6 +28,48 @@ const char* getCriticalErrorMessage(void) {
}
#if EFI_PROD_CODE
void checkLastBootError() {
auto sramState = getBackupSram();
switch (sramState->Cookie) {
case ErrorCookie::FirmwareError:
efiPrintf("Last boot had firmware error: %s", sramState->ErrorString);
break;
case ErrorCookie::HardFault: {
efiPrintf("Last boot had hard fault type: %x addr: %x CSFR: %x", sramState->FaultType, sramState->FaultAddress, sramState->Csfr);
// Print out the context as a sequence of uintptr
uintptr_t* data = reinterpret_cast<uintptr_t*>(&sramState->FaultCtx);
for (size_t i = 0; i < sizeof(port_extctx) / sizeof(uintptr_t); i++) {
efiPrintf("Fault ctx %d: %x", i, data[i]);
}
break;
}
default:
// No cookie stored or invalid cookie (ie, backup RAM contains random garbage)
break;
}
// Reset cookie so we don't print it again.
sramState->Cookie = ErrorCookie::None;
if (sramState->BootCountCookie != 0xdeadbeef) {
sramState->BootCountCookie = 0xdeadbeef;
sramState->BootCount = 0;
}
efiPrintf("Power cycle count: %d", sramState->BootCount);
sramState->BootCount++;
}
void logHardFault(uint32_t type, uintptr_t faultAddress, port_extctx* ctx, uint32_t csfr) {
auto sramState = getBackupSram();
sramState->Cookie = ErrorCookie::HardFault;
sramState->FaultAddress = faultAddress;
sramState->Csfr = csfr;
memcpy(&sramState->FaultCtx, ctx, sizeof(port_extctx));
}
extern ioportid_t criticalErrorLedPort;
extern ioportmask_t criticalErrorLedPin;
@ -226,6 +269,9 @@ void firmwareError(obd_code_e code, const char *fmt, ...) {
strcpy((char*)(criticalErrorMessageBuffer) + errorMessageSize, versionBuffer);
}
auto sramState = getBackupSram();
strncpy(sramState->ErrorString, criticalErrorMessageBuffer, efi::size(sramState->ErrorString));
sramState->Cookie = ErrorCookie::FirmwareError;
#else
char errorBuffer[200];

View File

@ -8,6 +8,7 @@
#pragma once
#include "obd_error_codes.h"
#include <cstdint>
#ifdef __cplusplus
extern "C"
@ -22,7 +23,7 @@ extern "C"
*/
bool warning(obd_code_e code, const char *fmt, ...);
typedef char critical_msg_t[ERROR_BUFFER_SIZE];
using critical_msg_t = char[ERROR_BUFFER_SIZE];
/**
* Something really bad had happened - firmware cannot function, we cannot run the engine
@ -51,6 +52,14 @@ int getRusEfiVersion(void);
#define efiAssertVoid(code, condition, message) { }
#endif /* EFI_ENABLE_ASSERTS */
#if EFI_PROD_CODE
#include <hal.h>
// If there was an error on the last boot, print out information about it now and reset state.
void checkLastBootError();
void logHardFault(uint32_t type, uintptr_t faultAddress, port_extctx* ctx, uint32_t csfr);
#endif // EFI_PROD_CODE
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -10,6 +10,8 @@
#include "global.h"
#include "efi_gpio.h"
#include "error_handling.h"
typedef enum {
/**
* IAC Stepper motor position, 16-bit (stored in BKP0R 0..15)
@ -47,3 +49,26 @@ void backupRamSave(backup_ram_e idx, uint32_t value);
// make sure that all changes are saved before we shutdown the MCU
void backupRamFlush(void);
// These use very specific values to avoid interpreting random garbage memory as a real value
enum class ErrorCookie : uint32_t {
None = 0,
FirmwareError = 0xcafebabe,
HardFault = 0xdeadbeef,
};
#if EFI_PROD_CODE
struct BackupSramData {
ErrorCookie Cookie;
critical_msg_t ErrorString;
port_extctx FaultCtx;
uint32_t FaultType;
uint32_t FaultAddress;
uint32_t Csfr;
uint32_t BootCount;
uint32_t BootCountCookie;
};
BackupSramData* getBackupSram();
#endif // EFI_PROD_CODE

View File

@ -78,3 +78,8 @@ void backupRamFlush(void) {
// but if there is, at least try to reinitialize...
wasLoaded = false;
}
// TODO: implement me!
BackupSramData* getBackupSram() {
return nullptr;
}

View File

@ -14,3 +14,8 @@ uint32_t backupRamLoad(backup_ram_e idx) {
void backupRamSave(backup_ram_e idx, uint32_t value) {
}
// TODO: implement me!
BackupSramData* getBackupSram() {
return nullptr;
}

View File

@ -58,3 +58,12 @@ void backupRamSave(backup_ram_e idx, uint32_t value) {
void backupRamFlush(void) {
// nothing to do here, in STM32 all data is saved instantaneously
}
// STM32 only has 4k bytes of backup SRAM
static_assert(sizeof(BackupSramData) <= 4096);
extern BackupSramData __backup_sram_addr__;
BackupSramData* getBackupSram() {
return &__backup_sram_addr__;
}

View File

@ -50,6 +50,8 @@ MEMORY
ram7 : org = 0xD0000000, len = SDRAM_SIZE /* SDRAM */
}
__backup_sram_addr__ = ORIGIN(ram5);
/* For each data/text section two region are defined, a virtual region
and a load region (_LMA suffix).*/

View File

@ -76,7 +76,7 @@
#define STM32_PLLI2SR_VALUE 5
#define STM32_PVD_ENABLE FALSE
#define STM32_PLS STM32_PLS_LEV0
#define STM32_BKPRAM_ENABLE FALSE
#define STM32_BKPRAM_ENABLE TRUE
/*
* GPT driver system settings.

View File

@ -51,6 +51,8 @@ MEMORY
ram7 (wx) : org = 0x00000000, len = 0
}
__backup_sram_addr__ = ORIGIN(ram5);
/* For each data/text section two region are defined, a virtual region
and a load region (_LMA suffix).*/

View File

@ -65,7 +65,7 @@
#define STM32_NO_INIT FALSE
#define STM32_PVD_ENABLE FALSE
#define STM32_PLS STM32_PLS_LEV0
#define STM32_BKPRAM_ENABLE FALSE
#define STM32_BKPRAM_ENABLE TRUE
#define STM32_HSI_ENABLED TRUE
#define STM32_LSI_ENABLED TRUE
#define STM32_HSE_ENABLED TRUE

View File

@ -45,6 +45,8 @@ MEMORY
ram7 (wx) : org = 0x38800000, len = 4k /* BCKP SRAM */
}
__backup_sram_addr__ = ORIGIN(ram7);
/* For each data/text section two region are defined, a virtual region
and a load region (_LMA suffix).*/

View File

@ -17,102 +17,110 @@
#define bkpt() __asm volatile("BKPT #0\n")
void NMI_Handler(void) {
//TODO
while(1);
NVIC_SystemReset();
}
//See http://infocenter.arm.com/help/topic/com.arm.doc.dui0552a/BABBGBEC.html
typedef enum {
Reset = 1,
NMI = 2,
HardFault = 3,
MemManage = 4,
BusFault = 5,
UsageFault = 6,
Reset = 1,
NMI = 2,
HardFault = 3,
MemManage = 4,
BusFault = 5,
UsageFault = 6,
} FaultType;
void HardFault_Handler(void) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, (void*)__get_PSP(), sizeof(struct port_extctx));
(void)ctx;
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//For HardFault/BusFault this is the address that was accessed causing the error
uint32_t faultAddress = SCB->BFAR;
(void)faultAddress;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isFaultPrecise = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 1) ? true : false);
bool isFaultImprecise = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 2) ? true : false);
bool isFaultOnUnstacking = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 3) ? true : false);
bool isFaultOnStacking = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 4) ? true : false);
bool isFaultAddressValid = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 7) ? true : false);
(void)isFaultPrecise;
(void)isFaultImprecise;
(void)isFaultOnUnstacking;
(void)isFaultOnStacking;
(void)isFaultAddressValid;
//Cause debugger to stop. Ignored if no debugger is attached
bkpt();
NVIC_SystemReset();
void logHardFault(uint32_t type, uintptr_t faultAddress, struct port_extctx* ctx, uint32_t csfr);
void HardFault_Handler_C(void* sp) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, sp, sizeof(struct port_extctx));
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//For HardFault/BusFault this is the address that was accessed causing the error
uint32_t faultAddress = SCB->BFAR;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isFaultPrecise = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 1) ? true : false);
bool isFaultImprecise = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 2) ? true : false);
bool isFaultOnUnstacking = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 3) ? true : false);
bool isFaultOnStacking = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 4) ? true : false);
bool isFaultAddressValid = ((SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos) & (1 << 7) ? true : false);
(void)isFaultPrecise;
(void)isFaultImprecise;
(void)isFaultOnUnstacking;
(void)isFaultOnStacking;
(void)isFaultAddressValid;
logHardFault(faultType, faultAddress, &ctx, SCB->CFSR >> SCB_CFSR_BUSFAULTSR_Pos);
//Cause debugger to stop. Ignored if no debugger is attached
bkpt();
NVIC_SystemReset();
}
void BusFault_Handler(void) __attribute__((alias("HardFault_Handler")));
void UsageFault_Handler_C(void* sp) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, sp, sizeof(struct port_extctx));
void UsageFault_Handler(void) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, (void*)__get_PSP(), sizeof(struct port_extctx));
(void)ctx;
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isUndefinedInstructionFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 0) ? true : false);
bool isEPSRUsageFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 1) ? true : false);
bool isInvalidPCFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 2) ? true : false);
bool isNoCoprocessorFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 3) ? true : false);
bool isUnalignedAccessFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 8) ? true : false);
bool isDivideByZeroFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 9) ? true : false);
(void)isUndefinedInstructionFault;
(void)isEPSRUsageFault;
(void)isInvalidPCFault;
(void)isNoCoprocessorFault;
(void)isUnalignedAccessFault;
(void)isDivideByZeroFault;
bkpt();
NVIC_SystemReset();
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isUndefinedInstructionFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 0) ? true : false);
bool isEPSRUsageFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 1) ? true : false);
bool isInvalidPCFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 2) ? true : false);
bool isNoCoprocessorFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 3) ? true : false);
bool isUnalignedAccessFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 8) ? true : false);
bool isDivideByZeroFault = ((SCB->CFSR >> SCB_CFSR_USGFAULTSR_Pos) & (1 << 9) ? true : false);
(void)isUndefinedInstructionFault;
(void)isEPSRUsageFault;
(void)isInvalidPCFault;
(void)isNoCoprocessorFault;
(void)isUnalignedAccessFault;
(void)isDivideByZeroFault;
logHardFault(faultType, 0, &ctx, SCB->CFSR);
bkpt();
NVIC_SystemReset();
}
void MemManage_Handler(void) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, (void*)__get_PSP(), sizeof(struct port_extctx));
(void)ctx;
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//For HardFault/BusFault this is the address that was accessed causing the error
uint32_t faultAddress = SCB->MMFAR;
(void)faultAddress;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isInstructionAccessViolation = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 0) ? true : false);
bool isDataAccessViolation = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 1) ? true : false);
bool isExceptionUnstackingFault = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 3) ? true : false);
bool isExceptionStackingFault = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 4) ? true : false);
bool isFaultAddressValid = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 7) ? true : false);
(void)isInstructionAccessViolation;
(void)isDataAccessViolation;
(void)isExceptionUnstackingFault;
(void)isExceptionStackingFault;
(void)isFaultAddressValid;
bkpt();
NVIC_SystemReset();
void MemManage_Handler_C(void* sp) {
//Copy to local variables (not pointers) to allow GDB "i loc" to directly show the info
//Get thread context. Contains main registers including PC and LR
struct port_extctx ctx;
memcpy(&ctx, sp, sizeof(struct port_extctx));
//Interrupt status register: Which interrupt have we encountered, e.g. HardFault?
FaultType faultType = (FaultType)__get_IPSR();
(void)faultType;
//For HardFault/BusFault this is the address that was accessed causing the error
uint32_t faultAddress = SCB->MMFAR;
//Flags about hardfault / busfault
//See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0552a/Cihdjcfc.html for reference
bool isInstructionAccessViolation = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 0) ? true : false);
bool isDataAccessViolation = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 1) ? true : false);
bool isExceptionUnstackingFault = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 3) ? true : false);
bool isExceptionStackingFault = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 4) ? true : false);
bool isFaultAddressValid = ((SCB->CFSR >> SCB_CFSR_MEMFAULTSR_Pos) & (1 << 7) ? true : false);
(void)isInstructionAccessViolation;
(void)isDataAccessViolation;
(void)isExceptionUnstackingFault;
(void)isExceptionStackingFault;
(void)isFaultAddressValid;
logHardFault(faultType, faultAddress, &ctx, SCB->CFSR);
bkpt();
NVIC_SystemReset();
}

View File

@ -0,0 +1,35 @@
.syntax unified
.cpu cortex-m3
.thumb
.align 2
.thumb_func
.type HardFault_Handler, %function
.type UsageFault_Handler, %function
.type MemManage_Handler, %function
.global HardFault_Handler
.global BusFault_Handler
HardFault_Handler:
BusFault_Handler:
tst LR, #4
ite EQ
mrseq R0, MSP
mrsne R0, PSP
b HardFault_Handler_C
.global UsageFault_Handler
UsageFault_Handler:
tst LR, #4
ite EQ
mrseq R0, MSP
mrsne R0, PSP
b UsageFault_Handler_C
.global MemManage_Handler
MemManage_Handler:
tst LR, #4
ite EQ
mrseq R0, MSP
mrsne R0, PSP
b MemManage_Handler_C

View File

@ -181,6 +181,10 @@ void runRusEfi() {
startLoggingProcessor();
#endif
#if EFI_PROD_CODE
checkLastBootError();
#endif
#ifdef STM32F7
void sys_dual_bank(void);
addConsoleAction("dual_bank", sys_dual_bank);