From c75af2ac75a0547782a011d0deeabad16420a95e Mon Sep 17 00:00:00 2001 From: Matthew Kennedy Date: Mon, 15 Feb 2021 14:39:11 -0800 Subject: [PATCH] properly put buffers in no-cache regions (#2357) * configure for no cache * this doesn't need a setting * now we don't need invalidate * reorder and comment * mmc * sw knock --- firmware/config/boards/proteus/adc_hack.cpp | 5 +-- firmware/config/stm32f4ems/efifeatures.h | 4 -- firmware/config/stm32f7ems/efifeatures.h | 3 -- firmware/console/status_loop.cpp | 2 +- .../controllers/sensors/software_knock.cpp | 2 +- firmware/global.h | 42 ++++++++----------- firmware/hw_layer/adc/AdcConfiguration.h | 1 - firmware/hw_layer/adc/adc_inputs.cpp | 24 +---------- firmware/hw_layer/hardware.cpp | 4 -- firmware/hw_layer/mmc_card.cpp | 7 ++-- .../ports/stm32/stm32f7/cfg/mcuconf.h | 2 +- 11 files changed, 27 insertions(+), 69 deletions(-) diff --git a/firmware/config/boards/proteus/adc_hack.cpp b/firmware/config/boards/proteus/adc_hack.cpp index 5749b89c98..35162ad18c 100644 --- a/firmware/config/boards/proteus/adc_hack.cpp +++ b/firmware/config/boards/proteus/adc_hack.cpp @@ -28,7 +28,7 @@ static ADCConversionGroup adcConvGroup = { FALSE, 1, nullptr, nullptr, ADC_SQR3_SQ1_N(ADC_CHANNEL_IN9) // sqr3 - vbatt is on pf3 = adc9 }; -__ALIGNED(32) adcsample_t samples[8]; +static NO_CACHE adcsample_t samples[8]; // we use this as a hook to run near the rest of ADC init... void setAdcChannelOverrides(void) { @@ -42,9 +42,6 @@ adcsample_t vbattSampleProteus = 0; void proteusAdcHack() { adcConvert(&ADCD3, &adcConvGroup, samples, 8); -#if defined(STM32F7XX) - SCB_InvalidateDCache_by_Addr(reinterpret_cast(samples), sizeof(samples)); -#endif /* STM32F7XX */ uint32_t sum = 0; diff --git a/firmware/config/stm32f4ems/efifeatures.h b/firmware/config/stm32f4ems/efifeatures.h index 39f4c350e3..045d96af34 100644 --- a/firmware/config/stm32f4ems/efifeatures.h +++ b/firmware/config/stm32f4ems/efifeatures.h @@ -44,10 +44,6 @@ #define EFI_ENABLE_CRITICAL_ENGINE_STOP TRUE #define EFI_ENABLE_ENGINE_WARNING TRUE -#if !defined(EFI_ENABLE_ASSERTS) - #define EFI_USE_CCM TRUE -#endif - #ifndef SC_BUFFER_SIZE #define SC_BUFFER_SIZE 4000 #endif diff --git a/firmware/config/stm32f7ems/efifeatures.h b/firmware/config/stm32f7ems/efifeatures.h index 251e01b864..c7d4041239 100644 --- a/firmware/config/stm32f7ems/efifeatures.h +++ b/firmware/config/stm32f7ems/efifeatures.h @@ -14,9 +14,6 @@ #pragma once -#undef EFI_USE_CCM -#define EFI_USE_CCM TRUE - #undef EFI_POTENTIOMETER #define EFI_POTENTIOMETER FALSE diff --git a/firmware/console/status_loop.cpp b/firmware/console/status_loop.cpp index 890ba3ece0..1167161425 100644 --- a/firmware/console/status_loop.cpp +++ b/firmware/console/status_loop.cpp @@ -131,7 +131,7 @@ static void setWarningEnabled(int value) { #if EFI_FILE_LOGGING // this one needs to be in main ram so that SD card SPI DMA works fine -static char sdLogBuffer[100] MAIN_RAM; +static NO_CACHE char sdLogBuffer[100]; static uint64_t binaryLogCount = 0; #endif /* EFI_FILE_LOGGING */ diff --git a/firmware/controllers/sensors/software_knock.cpp b/firmware/controllers/sensors/software_knock.cpp index aa854be3dc..034e7ea186 100644 --- a/firmware/controllers/sensors/software_knock.cpp +++ b/firmware/controllers/sensors/software_knock.cpp @@ -12,7 +12,7 @@ EXTERN_ENGINE; #include "knock_config.h" -adcsample_t sampleBuffer[2000]; +NO_CACHE adcsample_t sampleBuffer[2000]; int8_t currentCylinderIndex = 0; Biquad knockFilter; diff --git a/firmware/global.h b/firmware/global.h index 525cc852d3..fa829a078e 100644 --- a/firmware/global.h +++ b/firmware/global.h @@ -61,15 +61,6 @@ typedef unsigned int time_t; #define EFI_ERROR_CODE 0xffffffff -#if EFI_USE_CCM && defined __GNUC__ -#define MAIN_RAM __attribute__((section(".ram0"))) -#elif defined __GNUC__ -#define MAIN_RAM -#else -#define MAIN_RAM @ ".ram0" -#endif - - /** * rusEfi is placing some of data structures into CCM memory simply * in order to use that memory - no magic about which RAM is faster etc. @@ -78,22 +69,23 @@ typedef unsigned int time_t; * * Please note that DMA does not work with CCM memory */ -#if defined(STM32F7XX) -#define CCM_RAM ".ram3" -#define NO_CACHE CCM_OPTIONAL -#else /* defined(STM32F4XX) */ -#define CCM_RAM ".ram4" -#define NO_CACHE -#endif /* defined(STM32F4XX) */ - -#if EFI_USE_CCM -#if defined __GNUC__ -#define CCM_OPTIONAL __attribute__((section(CCM_RAM))) -#else // non-gcc -#define CCM_OPTIONAL @ CCM_RAM -#endif -#else /* !EFI_USE_CCM */ +#if defined(STM32F4XX) +// CCM memory is 64k +#define CCM_OPTIONAL __attribute__((section(".ram4"))) +#define NO_CACHE // F4 has no cache, do nothing +#elif defined(STM32F7XX) +// DTCM memory is 128k +#define CCM_OPTIONAL __attribute__((section(".ram3"))) +// SRAM2 is 16k and set to disable dcache +#define NO_CACHE __attribute__((section(".ram2"))) +#elif defined(STM32H7XX) +// DTCM memory is 128k +#define CCM_OPTIONAL __attribute__((section(".ram5"))) +// SRAM3 is 32k and set to disable dcache +#define NO_CACHE __attribute__((section(".ram3"))) +#else /* this MCU doesn't need these */ #define CCM_OPTIONAL -#endif /* EFI_USE_CCM */ +#define NO_CACHE +#endif #define UNIT_TEST_BUSY_WAIT_CALLBACK() {} diff --git a/firmware/hw_layer/adc/AdcConfiguration.h b/firmware/hw_layer/adc/AdcConfiguration.h index 7f3695bde2..77f9ad1961 100644 --- a/firmware/hw_layer/adc/AdcConfiguration.h +++ b/firmware/hw_layer/adc/AdcConfiguration.h @@ -31,7 +31,6 @@ public: uint32_t conversionCount = 0; uint32_t errorsCount = 0; int getAdcValueByIndex(int internalIndex) const; - void invalidateSamplesCache(); adcsample_t *samples; size_t buf_len; diff --git a/firmware/hw_layer/adc/adc_inputs.cpp b/firmware/hw_layer/adc/adc_inputs.cpp index ea23b9d082..60e79c120c 100644 --- a/firmware/hw_layer/adc/adc_inputs.cpp +++ b/firmware/hw_layer/adc/adc_inputs.cpp @@ -37,15 +37,8 @@ #include "maf.h" #include "perf_trace.h" -// on F7 this must be aligned on a 32-byte boundary, and be a multiple of 32 bytes long. -// When we invalidate the cache line(s) for ADC samples, we don't want to nuke any -// adjacent data. -// F4 does not care -static __ALIGNED(32) adcsample_t slowAdcSampleBuf[ADC_BUF_DEPTH_SLOW * ADC_MAX_CHANNELS_COUNT]; -static __ALIGNED(32) adcsample_t fastAdcSampleBuf[ADC_BUF_DEPTH_FAST * ADC_MAX_CHANNELS_COUNT]; - -static_assert(sizeof(slowAdcSampleBuf) % 32 == 0, "Slow ADC sample buffer size must be a multiple of 32 bytes"); -static_assert(sizeof(fastAdcSampleBuf) % 32 == 0, "Fast ADC sample buffer size must be a multiple of 32 bytes"); +static NO_CACHE adcsample_t slowAdcSampleBuf[ADC_BUF_DEPTH_SLOW * ADC_MAX_CHANNELS_COUNT]; +static NO_CACHE adcsample_t fastAdcSampleBuf[ADC_BUF_DEPTH_FAST * ADC_MAX_CHANNELS_COUNT]; static adc_channel_mode_e adcHwChannelEnabled[HW_MAX_ADC_INDEX]; @@ -313,17 +306,6 @@ int AdcDevice::getAdcValueByIndex(int internalIndex) const { return values.adc_data[internalIndex]; } -void AdcDevice::invalidateSamplesCache() { -#if defined(STM32F7XX) - // The STM32F7xx has a data cache - // DMA operations DO NOT invalidate cache lines, since the ARM m7 doesn't have - // anything like a CCI that maintains coherency across multiple bus masters. - // As a result, we have to manually invalidate the D-cache any time we (the CPU) - // would like to read something that somebody else wrote (ADC via DMA, in this case) - SCB_InvalidateDCache_by_Addr(reinterpret_cast(samples), sizeof(*samples) * buf_len); -#endif /* STM32F7XX */ -} - void AdcDevice::init(void) { hwConfig->num_channels = size(); /* driver does this internally */ @@ -483,8 +465,6 @@ public: { ScopePerf perf(PE::AdcProcessSlow); - slowAdc.invalidateSamplesCache(); - /* Calculates the average values from the ADC samples.*/ for (int i = 0; i < slowAdc.size(); i++) { adcsample_t value = getAvgAdcValue(i, slowAdc.samples, ADC_BUF_DEPTH_SLOW, slowAdc.size()); diff --git a/firmware/hw_layer/hardware.cpp b/firmware/hw_layer/hardware.cpp index 74ce271a5e..bffbfa0b0a 100644 --- a/firmware/hw_layer/hardware.cpp +++ b/firmware/hw_layer/hardware.cpp @@ -167,8 +167,6 @@ void adc_callback_fast(ADCDriver *adcp) { //size_t n = adcp->depth; if (adcp->state == ADC_COMPLETE) { - fastAdc.invalidateSamplesCache(); - #if HAL_TRIGGER_USE_ADC // we need to call this ASAP, because trigger processing is time-critical if (triggerSampleIndex >= 0) @@ -223,8 +221,6 @@ void adc_callback_fast(ADCDriver *adcp) { if (adcp->state == ADC_COMPLETE) { ScopePerf perf(PE::AdcCallbackFastComplete); - fastAdc.invalidateSamplesCache(); - /** * this callback is executed 10 000 times a second, it needs to be as fast as possible */ diff --git a/firmware/hw_layer/mmc_card.cpp b/firmware/hw_layer/mmc_card.cpp index 956a1fcbf8..8ebc9ead88 100644 --- a/firmware/hw_layer/mmc_card.cpp +++ b/firmware/hw_layer/mmc_card.cpp @@ -77,7 +77,8 @@ spi_device_e mmcSpiDevice = SPI_NONE; extern const USBConfig msdusbcfg; #endif /* HAL_USE_USB_MSD */ -static THD_WORKING_AREA(mmcThreadStack,3 * UTILITY_THREAD_STACK_SIZE); // MMC monitor thread +// TODO: this is NO_CACHE because of https://github.com/rusefi/rusefi/issues/2356 +static NO_CACHE THD_WORKING_AREA(mmcThreadStack,3 * UTILITY_THREAD_STACK_SIZE); // MMC monitor thread /** * MMC driver instance. @@ -342,7 +343,7 @@ static void mmcUnMount(void) { } #if HAL_USE_USB_MSD -static uint8_t blkbuf[MMCSD_BLOCK_SIZE]; +static NO_CACHE uint8_t blkbuf[MMCSD_BLOCK_SIZE]; static const scsi_inquiry_response_t scsi_inquiry_response = { 0x00, /* direct access block device */ @@ -488,7 +489,7 @@ struct SdLogBufferWriter final : public BufferedWriter<512> { } }; -static SdLogBufferWriter logBuffer MAIN_RAM; +static NO_CACHE SdLogBufferWriter logBuffer; static THD_FUNCTION(MMCmonThread, arg) { (void)arg; diff --git a/firmware/hw_layer/ports/stm32/stm32f7/cfg/mcuconf.h b/firmware/hw_layer/ports/stm32/stm32f7/cfg/mcuconf.h index 4f451b3c0e..61af416235 100644 --- a/firmware/hw_layer/ports/stm32/stm32f7/cfg/mcuconf.h +++ b/firmware/hw_layer/ports/stm32/stm32f7/cfg/mcuconf.h @@ -146,7 +146,7 @@ #define STM32_CECSEL STM32_CECSEL_LSE #define STM32_CK48MSEL STM32_CK48MSEL_PLL #define STM32_SDMMCSEL STM32_SDMMCSEL_PLL48CLK -#define STM32_SRAM2_NOCACHE FALSE +#define STM32_SRAM2_NOCACHE TRUE /* * ADC driver system settings.