/* * Copyright (C) 2016 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <variant/inc/variant.h> #include <plat/inc/cmsis.h> #include <plat/inc/gpio.h> #include <plat/inc/pwr.h> #include <plat/inc/bl.h> #include <nanohub/sha2.h> #include <nanohub/aes.h> #include <nanohub/rsa.h> #include <nanohub/nanohub.h> #include <printf.h> #include <string.h> #include <alloca.h> #include <gpio.h> static uint32_t blVerifyOsImage(const uint8_t *addr, struct OsUpdateHdr **start, uint32_t *size); struct StmCrc { volatile uint32_t DR; volatile uint32_t IDR; volatile uint32_t CR; }; struct StmFlash { volatile uint32_t ACR; volatile uint32_t KEYR; volatile uint32_t OPTKEYR; volatile uint32_t SR; volatile uint32_t CR; volatile uint32_t OPTCR; }; struct StmRcc { volatile uint32_t CR; volatile uint32_t PLLCFGR; volatile uint32_t CFGR; volatile uint32_t CIR; volatile uint32_t AHB1RSTR; volatile uint32_t AHB2RSTR; volatile uint32_t AHB3RSTR; uint8_t unused0[4]; volatile uint32_t APB1RSTR; volatile uint32_t APB2RSTR; uint8_t unused1[8]; volatile uint32_t AHB1ENR; volatile uint32_t AHB2ENR; volatile uint32_t AHB3ENR; uint8_t unused2[4]; volatile uint32_t APB1ENR; volatile uint32_t APB2ENR; uint8_t unused3[8]; volatile uint32_t AHB1LPENR; volatile uint32_t AHB2LPENR; volatile uint32_t AHB3LPENR; uint8_t unused4[4]; volatile uint32_t APB1LPENR; volatile uint32_t APB2LPENR; uint8_t unused5[8]; volatile uint32_t BDCR; volatile uint32_t CSR; uint8_t unused6[8]; volatile uint32_t SSCGR; volatile uint32_t PLLI2SCFGR; }; struct StmUdid { volatile uint32_t U_ID[3]; }; struct StmSpi { volatile uint32_t CR1; volatile uint32_t CR2; volatile uint32_t SR; volatile uint32_t DR; volatile uint32_t CRCPR; volatile uint32_t RXCRCR; volatile uint32_t TXCRCR; volatile uint32_t I2SCFGR; volatile uint32_t I2SPR; }; struct StmGpio { volatile uint32_t MODER; volatile uint32_t OTYPER; volatile uint32_t OSPEEDR; volatile uint32_t PUPDR; volatile uint32_t IDR; volatile uint32_t ODR; volatile uint32_t BSRR; volatile uint32_t LCKR; volatile uint32_t AFR[2]; }; //stm defines #define BL_MAX_FLASH_CODE 1024 #define FLASH_ACR_LAT(x) ((x) & FLASH_ACR_LAT_MASK) #define FLASH_ACR_LAT_MASK 0x0F #define FLASH_ACR_PRFTEN 0x00000100 #define FLASH_ACR_ICEN 0x00000200 #define FLASH_ACR_DCEN 0x00000400 #define FLASH_ACR_ICRST 0x00000800 #define FLASH_ACR_DCRST 0x00001000 #define FLASH_SR_EOP 0x00000001 #define FLASH_SR_OPERR 0x00000002 #define FLASH_SR_WRPERR 0x00000010 #define FLASH_SR_PGAERR 0x00000020 #define FLASH_SR_PGPERR 0x00000040 #define FLASH_SR_PGSERR 0x00000080 #define FLASH_SR_RDERR 0x00000100 #define FLASH_SR_BSY 0x00010000 #define FLASH_CR_PG 0x00000001 #define FLASH_CR_SER 0x00000002 #define FLASH_CR_MER 0x00000004 #define FLASH_CR_SNB(x) (((x) << FLASH_CR_SNB_SHIFT) & FLASH_CR_SNB_MASK) #define FLASH_CR_SNB_MASK 0x00000078 #define FLASH_CR_SNB_SHIFT 3 #define FLASH_CR_PSIZE(x) (((x) << FLASH_CR_PSIZE_SHIFT) & FLASH_CR_PSIZE_MASK) #define FLASH_CR_PSIZE_MASK 0x00000300 #define FLASH_CR_PSIZE_SHIFT 8 #define FLASH_CR_PSIZE_8 0x0 #define FLASH_CR_PSIZE_16 0x1 #define FLASH_CR_PSIZE_32 0x2 #define FLASH_CR_PSIZE_64 0x3 #define FLASH_CR_STRT 0x00010000 #define FLASH_CR_EOPIE 0x01000000 #define FLASH_CR_ERRIE 0x02000000 #define FLASH_CR_LOCK 0x80000000 //for comms protocol #define BL_SYNC_IN 0x5A #define BL_ACK 0x79 #define BL_NAK 0x1F #define BL_SYNC_OUT 0xA5 #define BL_CMD_GET 0x00 #define BL_CMD_READ_MEM 0x11 #define BL_CMD_WRITE_MEM 0x31 #define BL_CMD_ERASE 0x44 #define BL_CMD_GET_SIZES 0xEE /* our own command. reports: {u32 osSz, u32 sharedSz, u32 eeSz} all in big endian */ #define BL_CMD_UPDATE_FINISHED 0xEF /* our own command. attempts to verify the update -> ACK/NAK. MUST be called after upload to mark it as completed */ #define BL_SHARED_AREA_FAKE_ERASE_BLK 0xFFF0 #define BL_SHARED_AREA_FAKE_ADDR 0x50000000 typedef void (*FlashEraseF)(volatile uint32_t *, uint32_t, volatile uint32_t *); typedef void (*FlashWriteF)(volatile uint8_t *, uint8_t, volatile uint32_t *); //linker provides these extern uint32_t __pubkeys_start[]; extern uint32_t __pubkeys_end[]; extern uint8_t __stack_top[]; extern uint8_t __ram_start[]; extern uint8_t __ram_end[]; extern uint8_t __eedata_start[]; extern uint8_t __eedata_end[]; extern uint8_t __code_start[]; extern uint8_t __code_end[]; extern uint8_t __shared_start[]; extern uint8_t __shared_end[]; extern void __VECTORS(); //make GCC happy void __blEntry(void); enum BlFlashType { BL_FLASH_BL, BL_FLASH_EEDATA, BL_FLASH_KERNEL, BL_FLASH_SHARED }; static const struct blFlashTable // For erase code, we need to know which page a given memory address is in { uint8_t *address; uint32_t length; uint32_t type; } mBlFlashTable[] = #ifndef BL_FLASH_TABLE { { (uint8_t *)(&BL), 0x04000, BL_FLASH_BL }, { (uint8_t *)(__eedata_start), 0x04000, BL_FLASH_EEDATA }, { (uint8_t *)(__eedata_start + 0x04000), 0x04000, BL_FLASH_EEDATA }, { (uint8_t *)(__code_start), 0x04000, BL_FLASH_KERNEL }, { (uint8_t *)(__code_start + 0x04000), 0x10000, BL_FLASH_KERNEL }, { (uint8_t *)(__shared_start), 0x20000, BL_FLASH_SHARED }, { (uint8_t *)(__shared_start + 0x20000), 0x20000, BL_FLASH_SHARED }, { (uint8_t *)(__shared_start + 0x40000), 0x20000, BL_FLASH_SHARED }, }; #else BL_FLASH_TABLE; #endif static const char mOsUpdateMagic[] = OS_UPDT_MAGIC; //BL stack uint64_t __attribute__ ((section (".stack"))) _STACK[BL_STACK_SIZE / sizeof(uint64_t)]; #ifdef DEBUG_UART_PIN static bool blLogPutcharF(void *userData, char ch) { if (ch == '\n') gpioBitbangedUartOut('\r'); gpioBitbangedUartOut(ch); return true; } void blLog(const char *str, ...) { va_list vl; va_start(vl, str); cvprintf(blLogPutcharF, NULL, str, vl); va_end(vl); } #else #define blLog(...) #endif static inline uint32_t blDisableInts(void) { uint32_t state; asm volatile ( "mrs %0, PRIMASK \n" "cpsid i \n" :"=r"(state) ); return state; } static inline void blRestoreInts(uint32_t state) { asm volatile( "msr PRIMASK, %0 \n" ::"r"((uint32_t)state) ); } static uint32_t blExtApiGetVersion(void) { return BL_VERSION_CUR; } static void blExtApiReboot(void) { SCB->AIRCR = 0x05FA0004; //we never get here while(1); } static void blExtApiGetSnum(uint32_t *snum, uint32_t length) { struct StmUdid *reg = (struct StmUdid *)UDID_BASE; uint32_t i; if (length > 3) length = 3; for (i = 0; i < length; i++) snum[i] = reg->U_ID[i]; } /* * Return the address of the erase code and the length of the code * * This code needs to run out of ram and not flash since accessing flash * while erasing is undefined (best case the processor stalls, worst case * it starts executing garbage) * * This function is used to get a pointer to the actual code that does the * erase and polls for completion (so we can copy it to ram) as well as the * length of the code (so we know how much space to allocate for it) * * void FlashEraseF(volatile uint32_t *addr, uint32_t value, volatile uint32_t *status) * { * *addr = value; * while (*status & FLASH_SR_BSY) ; * } */ static void __attribute__((naked)) blGetFlashEraseCode(uint16_t **addr, uint32_t *size) { asm volatile ( " push {lr} \n" " bl 9f \n" " str r1, [r0, #0] \n" // *addr = value "1: \n" " ldr r3, [r2, #0] \n" // r3 = *status " lsls r3, #15 \n" // r3 <<= 15 " bmi 1b \n" // if (r3 < 0) goto 1 " bx lr \n" // return "9: \n" " bic lr, #0x1 \n" " adr r3, 9b \n" " sub r3, lr \n" " str lr, [r0] \n" " str r3, [r1] \n" " pop {pc} \n" ); } /* * Return the address of the write code and the length of the code * * This code needs to run out of ram and not flash since accessing flash * while writing to flash is undefined (best case the processor stalls, worst * case it starts executing garbage) * * This function is used to get a pointer to the actual code that does the * write and polls for completion (so we can copy it to ram) as well as the * length of the code (so we know how much space to allocate for it) * * void FlashWriteF(volatile uint8_t *addr, uint8_t value, volatile uint32_t *status) * { * *addr = value; * while (*status & FLASH_SR_BSY) ; * } */ static void __attribute__((naked)) blGetFlashWriteCode(uint16_t **addr, uint32_t *size) { asm volatile ( " push {lr} \n" " bl 9f \n" " strb r1, [r0, #0] \n" // *addr = value "1: \n" " ldr r3, [r2, #0] \n" // r3 = *status " lsls r3, #15 \n" // r3 <<= 15 " bmi 1b \n" // if (r3 < 0) goto 1 " bx lr \n" // return "9: \n" " bic lr, #0x1 \n" " adr r3, 9b \n" " sub r3, lr \n" " str lr, [r0] \n" " str r3, [r1] \n" " pop {pc} \n" ); } static void blEraseSectors(uint32_t sector_cnt, uint8_t *erase_mask) { struct StmFlash *flash = (struct StmFlash *)FLASH_BASE; uint16_t *code_src, *code; uint32_t i, code_length; FlashEraseF func; blGetFlashEraseCode(&code_src, &code_length); if (code_length < BL_MAX_FLASH_CODE) { code = (uint16_t *)(((uint32_t)alloca(code_length + 1) + 1) & ~0x1); func = (FlashEraseF)((uint8_t *)code+1); for (i = 0; i < code_length / sizeof(uint16_t); i++) code[i] = code_src[i]; for (i = 0; i < sector_cnt; i++) { if (erase_mask[i]) { flash->CR = (flash->CR & ~(FLASH_CR_SNB_MASK)) | FLASH_CR_SNB(i) | FLASH_CR_SER; func(&flash->CR, flash->CR | FLASH_CR_STRT, &flash->SR); flash->CR &= ~(FLASH_CR_SNB_MASK | FLASH_CR_SER); } } } } static void blWriteBytes(uint8_t *dst, const uint8_t *src, uint32_t length) { struct StmFlash *flash = (struct StmFlash *)FLASH_BASE; uint16_t *code_src, *code; uint32_t i, code_length; FlashWriteF func; blGetFlashWriteCode(&code_src, &code_length); if (code_length < BL_MAX_FLASH_CODE) { code = (uint16_t *)(((uint32_t)alloca(code_length+1) + 1) & ~0x1); func = (FlashWriteF)((uint8_t *)code+1); for (i = 0; i < code_length / sizeof(uint16_t); i++) code[i] = code_src[i]; flash->CR |= FLASH_CR_PG; for (i = 0; i < length; i++) { if (dst[i] != src[i]) func(&dst[i], src[i], &flash->SR); } flash->CR &= ~FLASH_CR_PG; } } static bool blProgramFlash(uint8_t *dst, const uint8_t *src, uint32_t length, uint32_t key1, uint32_t key2) { struct StmFlash *flash = (struct StmFlash *)FLASH_BASE; const uint32_t sector_cnt = sizeof(mBlFlashTable) / sizeof(struct blFlashTable); uint32_t acr_cache, cr_cache, offset, i, j = 0, int_state = 0; uint8_t *ptr; if (((length == 0)) || ((0xFFFFFFFF - (uint32_t)dst) < (length - 1)) || ((dst < mBlFlashTable[0].address)) || ((dst + length) > (mBlFlashTable[sector_cnt-1].address + mBlFlashTable[sector_cnt-1].length))) { return false; } // compute which flash block we are starting from for (i = 0; i < sector_cnt; i++) { if (dst >= mBlFlashTable[i].address && dst < (mBlFlashTable[i].address + mBlFlashTable[i].length)) { break; } } // now loop through all the flash blocks and see if we have to do any // 0 -> 1 transitions of a bit. If so, return false // 1 -> 0 transitions of a bit do not require an erase offset = (uint32_t)(dst - mBlFlashTable[i].address); ptr = mBlFlashTable[i].address; while (j < length && i < sector_cnt) { if (offset == mBlFlashTable[i].length) { i++; offset = 0; ptr = mBlFlashTable[i].address; } if ((ptr[offset] & src[j]) != src[j]) { return false; } else { j++; offset++; } } // disable interrupts // otherwise an interrupt during flash write will stall the processor // until the write completes int_state = blDisableInts(); // wait for flash to not be busy (should never be set at this point) while (flash->SR & FLASH_SR_BSY); cr_cache = flash->CR; if (flash->CR & FLASH_CR_LOCK) { // unlock flash flash->KEYR = key1; flash->KEYR = key2; } if (flash->CR & FLASH_CR_LOCK) { // unlock failed, restore interrupts blRestoreInts(int_state); return false; } flash->CR = FLASH_CR_PSIZE(FLASH_CR_PSIZE_8); acr_cache = flash->ACR; // disable and flush data and instruction caches flash->ACR &= ~(FLASH_ACR_DCEN | FLASH_ACR_ICEN); flash->ACR |= (FLASH_ACR_DCRST | FLASH_ACR_ICRST); blWriteBytes(dst, src, length); flash->ACR = acr_cache; flash->CR = cr_cache; blRestoreInts(int_state); return !memcmp(dst, src, length); } static bool blProgramTypedArea(uint8_t *dst, const uint8_t *src, uint32_t length, uint32_t type, uint32_t key1, uint32_t key2) { const uint32_t sector_cnt = sizeof(mBlFlashTable) / sizeof(struct blFlashTable); uint32_t i; for (i = 0; i < sector_cnt; i++) { if ((dst >= mBlFlashTable[i].address && dst < (mBlFlashTable[i].address + mBlFlashTable[i].length)) || (dst < mBlFlashTable[i].address && (dst + length > mBlFlashTable[i].address))) { if (mBlFlashTable[i].type != type) return false; } } return blProgramFlash(dst, src, length, key1, key2); } static bool blExtApiProgramSharedArea(uint8_t *dst, const uint8_t *src, uint32_t length, uint32_t key1, uint32_t key2) { return blProgramTypedArea(dst, src, length, BL_FLASH_SHARED, key1, key2); } static bool blExtApiProgramEe(uint8_t *dst, const uint8_t *src, uint32_t length, uint32_t key1, uint32_t key2) { return blProgramTypedArea(dst, src, length, BL_FLASH_EEDATA, key1, key2); } static bool blEraseTypedArea(uint32_t type, uint32_t key1, uint32_t key2) { struct StmFlash *flash = (struct StmFlash *)FLASH_BASE; const uint32_t sector_cnt = sizeof(mBlFlashTable) / sizeof(struct blFlashTable); uint32_t i, acr_cache, cr_cache, erase_cnt = 0, int_state = 0; uint8_t erase_mask[sector_cnt]; for (i = 0; i < sector_cnt; i++) { if (mBlFlashTable[i].type == type) { erase_mask[i] = 1; erase_cnt++; } else { erase_mask[i] = 0; } } // disable interrupts // otherwise an interrupt during flash write/erase will stall the processor // until the write/erase completes int_state = blDisableInts(); // wait for flash to not be busy (should never be set at this point) while (flash->SR & FLASH_SR_BSY); cr_cache = flash->CR; if (flash->CR & FLASH_CR_LOCK) { // unlock flash flash->KEYR = key1; flash->KEYR = key2; } if (flash->CR & FLASH_CR_LOCK) { // unlock failed, restore interrupts blRestoreInts(int_state); return false; } flash->CR = FLASH_CR_PSIZE(FLASH_CR_PSIZE_8); acr_cache = flash->ACR; // disable and flush data and instruction caches flash->ACR &= ~(FLASH_ACR_DCEN | FLASH_ACR_ICEN); flash->ACR |= (FLASH_ACR_DCRST | FLASH_ACR_ICRST); if (erase_cnt) blEraseSectors(sector_cnt, erase_mask); flash->ACR = acr_cache; flash->CR = cr_cache; // restore interrupts blRestoreInts(int_state); return true; //we assume erase worked } static bool blExtApiEraseSharedArea(uint32_t key1, uint32_t key2) { return blEraseTypedArea(BL_FLASH_SHARED, key1, key2); } static uint32_t blVerifyOsUpdate(struct OsUpdateHdr **start, uint32_t *size) { uint32_t ret; int i; for (i = 0; i < BL_SCAN_OFFSET; i += 4) { ret = blVerifyOsImage(__shared_start + i, start, size); if (ret != OS_UPDT_HDR_CHECK_FAILED) break; } return ret; } static uint32_t blExtApiVerifyOsUpdate(void) { return blVerifyOsUpdate(NULL, NULL); } static void blSupirousIntHandler(void) { //BAD! blExtApiReboot(); } static const uint32_t *blExtApiGetRsaKeyInfo(uint32_t *numKeys) { uint32_t numWords = __pubkeys_end - __pubkeys_start; if (numWords % RSA_WORDS) // something is wrong return NULL; *numKeys = numWords / RSA_WORDS; return __pubkeys_start; } static const uint32_t* blExtApiSigPaddingVerify(const uint32_t *rsaResult) { uint32_t i; //all but first and last word of padding MUST have no zero bytes for (i = SHA2_HASH_WORDS + 1; i < RSA_WORDS - 1; i++) { if (!(uint8_t)(rsaResult[i] >> 0)) return NULL; if (!(uint8_t)(rsaResult[i] >> 8)) return NULL; if (!(uint8_t)(rsaResult[i] >> 16)) return NULL; if (!(uint8_t)(rsaResult[i] >> 24)) return NULL; } //first padding word must have all nonzero bytes except low byte if ((rsaResult[SHA2_HASH_WORDS] & 0xff) || !(rsaResult[SHA2_HASH_WORDS] & 0xff00) || !(rsaResult[SHA2_HASH_WORDS] & 0xff0000) || !(rsaResult[SHA2_HASH_WORDS] & 0xff000000)) return NULL; //last padding word must have 0x0002 in top 16 bits and nonzero random bytes in lower bytes if ((rsaResult[RSA_WORDS - 1] >> 16) != 2) return NULL; if (!(rsaResult[RSA_WORDS - 1] & 0xff00) || !(rsaResult[RSA_WORDS - 1] & 0xff)) return NULL; return rsaResult; } const struct BlVecTable __attribute__((section(".blvec"))) __BL_VECTORS = { /* cortex */ .blStackTop = (uint32_t)&__stack_top, .blEntry = &__blEntry, .blNmiHandler = &blSupirousIntHandler, .blMmuFaultHandler = &blSupirousIntHandler, .blBusFaultHandler = &blSupirousIntHandler, .blUsageFaultHandler = &blSupirousIntHandler, /* api */ .blGetVersion = &blExtApiGetVersion, .blReboot = &blExtApiReboot, .blGetSnum = &blExtApiGetSnum, .blProgramShared = &blExtApiProgramSharedArea, .blEraseShared = &blExtApiEraseSharedArea, .blProgramEe = &blExtApiProgramEe, .blGetPubKeysInfo = &blExtApiGetRsaKeyInfo, .blRsaPubOpIterative = &rsaPubOpIterative, .blSha2init = &sha2init, .blSha2processBytes = &sha2processBytes, .blSha2finish = &sha2finish, .blAesInitForEncr = &aesInitForEncr, .blAesInitForDecr = &aesInitForDecr, .blAesEncr = &aesEncr, .blAesDecr = &aesDecr, .blAesCbcInitForEncr = &aesCbcInitForEncr, .blAesCbcInitForDecr = &aesCbcInitForDecr, .blAesCbcEncr = &aesCbcEncr, .blAesCbcDecr = &aesCbcDecr, .blSigPaddingVerify = &blExtApiSigPaddingVerify, .blVerifyOsUpdate = &blExtApiVerifyOsUpdate, }; static void blApplyVerifiedUpdate(const struct OsUpdateHdr *os) //only called if an update has been found to exist and be valid, signed, etc! { //copy shared to code, and if successful, erase shared area if (blEraseTypedArea(BL_FLASH_KERNEL, BL_FLASH_KEY1, BL_FLASH_KEY2)) if (blProgramTypedArea(__code_start, (const uint8_t*)(os + 1), os->size, BL_FLASH_KERNEL, BL_FLASH_KEY1, BL_FLASH_KEY2)) (void)blExtApiEraseSharedArea(BL_FLASH_KEY1, BL_FLASH_KEY2); } static void blWriteMark(struct OsUpdateHdr *hdr, uint32_t mark) { uint8_t dstVal = mark; (void)blExtApiProgramSharedArea(&hdr->marker, &dstVal, sizeof(hdr->marker), BL_FLASH_KEY1, BL_FLASH_KEY2); } static void blUpdateMark(uint32_t old, uint32_t new) { struct OsUpdateHdr *hdr = (struct OsUpdateHdr *)__shared_start; if (hdr->marker != old) return; blWriteMark(hdr, new); } static uint32_t blVerifyOsImage(const uint8_t *addr, struct OsUpdateHdr **start, uint32_t *size) { const uint32_t *rsaKey, *osSigHash, *osSigPubkey, *ourHash, *rsaResult, *expectedHash = NULL; struct OsUpdateHdr *hdr = (struct OsUpdateHdr*)addr; struct OsUpdateHdr cpy; uint32_t i, numRsaKeys = 0, rsaStateVar1, rsaStateVar2, rsaStep = 0; const uint8_t *updateBinaryData; bool isValid = false; struct Sha2state sha; struct RsaState rsa; uint32_t ret = OS_UPDT_HDR_CHECK_FAILED; const uint32_t overhead = sizeof(*hdr) + 2 * RSA_WORDS; // header does not fit or is not aligned if (addr < __shared_start || addr > (__shared_end - overhead) || ((uintptr_t)addr & 3)) return OS_UPDT_HDR_CHECK_FAILED; // image does not fit if (hdr->size > (__shared_end - addr - overhead)) return OS_UPDT_HDR_CHECK_FAILED; // OS magic does not match if (memcmp(hdr->magic, mOsUpdateMagic, sizeof(hdr->magic)) != 0) return OS_UPDT_HDR_CHECK_FAILED; // we don't allow shortcuts on success path, but we want to fail quickly if (hdr->marker == OS_UPDT_MARKER_INVALID) return OS_UPDT_HDR_MARKER_INVALID; // download did not finish if (hdr->marker == OS_UPDT_MARKER_INPROGRESS) return OS_UPDT_HDR_MARKER_INVALID; //get pointers updateBinaryData = (const uint8_t*)(hdr + 1); osSigHash = (const uint32_t*)(updateBinaryData + hdr->size); osSigPubkey = osSigHash + RSA_WORDS; //make sure the pub key is known for (i = 0, rsaKey = blExtApiGetRsaKeyInfo(&numRsaKeys); i < numRsaKeys; i++, rsaKey += RSA_WORDS) { if (memcmp(rsaKey, osSigPubkey, RSA_BYTES) == 0) break; } if (i == numRsaKeys) { ret = OS_UPDT_UNKNOWN_PUBKEY; //signed with an unknown key -> fail goto fail; } //decode sig using pubkey do { rsaResult = rsaPubOpIterative(&rsa, osSigHash, osSigPubkey, &rsaStateVar1, &rsaStateVar2, &rsaStep); } while (rsaStep); if (!rsaResult) { //decode fails -> invalid sig ret = OS_UPDT_INVALID_SIGNATURE; goto fail; } //verify padding expectedHash = blExtApiSigPaddingVerify(rsaResult); if (!expectedHash) { //padding check fails -> invalid sig ret = OS_UPDT_INVALID_SIGNATURE_HASH; goto fail; } //hash the update sha2init(&sha); memcpy(&cpy, hdr, sizeof(cpy)); cpy.marker = OS_UPDT_MARKER_INPROGRESS; sha2processBytes(&sha, &cpy, sizeof(cpy)); sha2processBytes(&sha, (uint8_t*)(hdr + 1), hdr->size); ourHash = sha2finish(&sha); //verify hash match if (memcmp(expectedHash, ourHash, SHA2_HASH_SIZE) != 0) { //hash does not match -> data tampered with ret = OS_UPDT_INVALID_SIGNATURE_HASH; // same error; do not disclose nature of hash problem goto fail; } //it is valid isValid = true; ret = OS_UPDT_SUCCESS; if (start) *start = hdr; if (size) *size = hdr->size; fail: //mark it appropriately blWriteMark(hdr, isValid ? OS_UPDT_MARKER_VERIFIED : OS_UPDT_MARKER_INVALID); return ret; } static inline bool blUpdateVerify() { return blVerifyOsImage(__shared_start, NULL, NULL) == OS_UPDT_SUCCESS; } static void blSpiLoaderDrainRxFifo(struct StmSpi *spi) { (void)spi->DR; while (!(spi->SR & 1)); (void)spi->DR; } static uint8_t blSpiLoaderTxRxByte(struct StmSpi *spi, uint32_t val) { while (!(spi->SR & 2)); spi->DR = val; while (!(spi->SR & 1)); return spi->DR; } static void blSpiLoaderTxBytes(struct StmSpi *spi, const void *data, uint32_t len) { const uint8_t *buf = (const uint8_t*)data; blSpiLoaderTxRxByte(spi, len - 1); while (len--) blSpiLoaderTxRxByte(spi, *buf++); } static bool blSpiLoaderSendSyncOut(struct StmSpi *spi) { return blSpiLoaderTxRxByte(spi, BL_SYNC_OUT) == BL_SYNC_IN; } static bool blSpiLoaderSendAck(struct StmSpi *spi, bool ack) { blSpiLoaderTxRxByte(spi, 0); blSpiLoaderTxRxByte(spi, ack ? BL_ACK : BL_NAK); return blSpiLoaderTxRxByte(spi, 0) == BL_ACK; } static void blSpiLoader(bool force) { const uint32_t intInPin = SH_INT_WAKEUP - GPIO_PA(0); struct StmGpio *gpioa = (struct StmGpio*)GPIOA_BASE; struct StmSpi *spi = (struct StmSpi*)SPI1_BASE; struct StmRcc *rcc = (struct StmRcc*)RCC_BASE; uint32_t oldApb2State, oldAhb1State, nRetries; bool seenErase = false; uint32_t nextAddr = 0; uint32_t expectedSize = 0; if (SH_INT_WAKEUP < GPIO_PA(0) || SH_INT_WAKEUP > GPIO_PA(15)) { //link time assert :) extern void ThisIsAnError_BlIntPinNotInGpioA(void); ThisIsAnError_BlIntPinNotInGpioA(); } //SPI & GPIOA on oldApb2State = rcc->APB2ENR; oldAhb1State = rcc->AHB1ENR; rcc->APB2ENR |= PERIPH_APB2_SPI1; rcc->AHB1ENR |= PERIPH_AHB1_GPIOA; //reset units rcc->APB2RSTR |= PERIPH_APB2_SPI1; rcc->AHB1RSTR |= PERIPH_AHB1_GPIOA; rcc->APB2RSTR &=~ PERIPH_APB2_SPI1; rcc->AHB1RSTR &=~ PERIPH_AHB1_GPIOA; //configure GPIOA for SPI A4..A7 for SPI use (function 5), int pin as not func, high speed, no pullups, not open drain, proper directions gpioa->AFR[0] = (gpioa->AFR[0] & 0x0000ffff & ~(0x0f << (intInPin * 4))) | 0x55550000; gpioa->OSPEEDR |= 0x0000ff00 | (3 << (intInPin * 2)); gpioa->PUPDR &=~ (0x0000ff00 | (3 << (intInPin * 2))); gpioa->OTYPER &=~ (0x00f0 | (1 << intInPin)); gpioa->MODER = (gpioa->MODER & 0xffff00ff & ~(0x03 << (intInPin * 2))) | 0x0000aa00; //if int pin is not low, do not bother any further if (!(gpioa->IDR & (1 << intInPin)) || force) { //config SPI spi->CR1 = 0x00000040; //spi is on, configured same as bootloader would spi->CR2 = 0x00000000; //spi is on, configured same as bootloader would //wait for sync for (nRetries = 10000; nRetries; nRetries--) { if (spi->SR & 1) { if (spi->DR == BL_SYNC_IN) break; (void)spi->SR; //re-read to clear overlfow condition (if any) } } //if we saw a sync, do the bootloader thing if (nRetries) { static const uint8_t supportedCmds[] = {BL_CMD_GET, BL_CMD_READ_MEM, BL_CMD_WRITE_MEM, BL_CMD_ERASE, BL_CMD_GET_SIZES, BL_CMD_UPDATE_FINISHED}; uint32_t allSizes[] = {__builtin_bswap32(__code_end - __code_start), __builtin_bswap32(__shared_end - __shared_start), __builtin_bswap32(__eedata_end - __eedata_start)}; bool ack = true; //we ack the sync ack = blSpiLoaderSendSyncOut(spi); //loop forever listening to commands while (1) { uint32_t sync, cmd, cmdNot, addr = 0, len, checksum = 0, i; uint8_t data[256]; //send ack or NAK for last thing if (!blSpiLoaderSendAck(spi, ack)) goto out; while ((sync = blSpiLoaderTxRxByte(spi, 0)) != BL_SYNC_IN); cmd = blSpiLoaderTxRxByte(spi, 0); cmdNot = blSpiLoaderTxRxByte(spi, BL_ACK); ack = false; if (sync == BL_SYNC_IN && (cmd ^ cmdNot) == 0xff) switch (cmd) { case BL_CMD_GET: //ACK the command (void)blSpiLoaderSendAck(spi, true); blSpiLoaderTxBytes(spi, supportedCmds, sizeof(supportedCmds)); ack = true; break; case BL_CMD_READ_MEM: if (!seenErase) //no reading till we erase the shared area (this way we do not leak encrypted apps' plaintexts) break; //ACK the command (void)blSpiLoaderSendAck(spi, true); //get address for (i = 0; i < 4; i++) { uint32_t byte = blSpiLoaderTxRxByte(spi, 0); checksum ^= byte; addr = (addr << 8) + byte; } //reject addresses outside of our fake area or on invalid checksum if (blSpiLoaderTxRxByte(spi, 0) != checksum || addr < BL_SHARED_AREA_FAKE_ADDR || addr - BL_SHARED_AREA_FAKE_ADDR > __shared_end - __shared_start) break; //ack the address (void)blSpiLoaderSendAck(spi, true); //get the length len = blSpiLoaderTxRxByte(spi, 0); //reject invalid checksum if (blSpiLoaderTxRxByte(spi, 0) != (uint8_t)~len || addr + len - BL_SHARED_AREA_FAKE_ADDR > __shared_end - __shared_start) break; len++; //reject reads past the end of the shared area if (addr + len - BL_SHARED_AREA_FAKE_ADDR > __shared_end - __shared_start) break; //ack the length (void)blSpiLoaderSendAck(spi, true); //read the data & send it blSpiLoaderTxBytes(spi, __shared_start + addr - BL_SHARED_AREA_FAKE_ADDR, len); ack = true; break; case BL_CMD_WRITE_MEM: if (!seenErase) //no writing till we erase the shared area (this way we do not purposefully modify encrypted apps' plaintexts in a nefarious fashion) break; //ACK the command (void)blSpiLoaderSendAck(spi, true); //get address for (i = 0; i < 4; i++) { uint32_t byte = blSpiLoaderTxRxByte(spi, 0); checksum ^= byte; addr = (addr << 8) + byte; } //reject addresses outside of our fake area or on invalid checksum if (blSpiLoaderTxRxByte(spi, 0) != checksum || addr < BL_SHARED_AREA_FAKE_ADDR || addr - BL_SHARED_AREA_FAKE_ADDR > __shared_end - __shared_start) break; addr -= BL_SHARED_AREA_FAKE_ADDR; if (addr != nextAddr) break; //ack the address (void)blSpiLoaderSendAck(spi, true); //get the length checksum = len = blSpiLoaderTxRxByte(spi, 0); len++; //get bytes for (i = 0; i < len; i++) { uint32_t byte = blSpiLoaderTxRxByte(spi, 0); checksum ^= byte; data[i] = byte; } //reject writes that takes out outside fo shared area or invalid checksums if (blSpiLoaderTxRxByte(spi, 0) != checksum || addr + len > __shared_end - __shared_start) break; // OBSOLETE: superseded by sequential contiguous write requirement //if (addr && addr < sizeof(struct OsUpdateHdr)) // break; //a write starting at zero must be big enough to contain a full OS update header if (!addr) { const struct OsUpdateHdr *hdr = (const struct OsUpdateHdr*)data; //verify it is at least as big as the header if (len < sizeof(struct OsUpdateHdr)) break; //check for magic for (i = 0; i < sizeof(hdr->magic) && hdr->magic[i] == mOsUpdateMagic[i]; i++); //verify magic check passed & marker is properly set to inprogress if (i != sizeof(hdr->magic) || hdr->marker != OS_UPDT_MARKER_INPROGRESS) break; expectedSize = sizeof(*hdr) + hdr->size + 2 * RSA_BYTES; } if (addr + len > expectedSize) break; //do it ack = blExtApiProgramSharedArea(__shared_start + addr, data, len, BL_FLASH_KEY1, BL_FLASH_KEY2); blSpiLoaderDrainRxFifo(spi); nextAddr += len; break; case BL_CMD_ERASE: //ACK the command (void)blSpiLoaderSendAck(spi, true); //get address for (i = 0; i < 2; i++) { uint32_t byte = blSpiLoaderTxRxByte(spi, 0); checksum ^= byte; addr = (addr << 8) + byte; } //reject addresses that are not our magic address or on invalid checksum if (blSpiLoaderTxRxByte(spi, 0) != checksum || addr != BL_SHARED_AREA_FAKE_ERASE_BLK) break; //do it ack = blExtApiEraseSharedArea(BL_FLASH_KEY1, BL_FLASH_KEY2); if (ack) { seenErase = true; nextAddr = 0; expectedSize = 0; } blSpiLoaderDrainRxFifo(spi); break; case BL_CMD_GET_SIZES: //ACK the command (void)blSpiLoaderSendAck(spi, true); blSpiLoaderTxBytes(spi, allSizes, sizeof(allSizes)); break; case BL_CMD_UPDATE_FINISHED: blUpdateMark(OS_UPDT_MARKER_INPROGRESS, OS_UPDT_MARKER_DOWNLOADED); ack = blUpdateVerify(); break; } } } } out: //reset units & return APB2 & AHB1 to initial state rcc->APB2RSTR |= PERIPH_APB2_SPI1; rcc->AHB1RSTR |= PERIPH_AHB1_GPIOA; rcc->APB2RSTR &=~ PERIPH_APB2_SPI1; rcc->AHB1RSTR &=~ PERIPH_AHB1_GPIOA; rcc->APB2ENR = oldApb2State; rcc->AHB1ENR = oldAhb1State; } void __blEntry(void) { extern char __bss_end[], __bss_start[], __data_end[], __data_start[], __data_data[]; uint32_t appBase = ((uint32_t)&__code_start) & ~1; bool forceLoad = false; //make sure we're the vector table and no ints happen (BL does not use them) blDisableInts(); SCB->VTOR = (uint32_t)&BL; //init things a little for the higher levels memset(__bss_start, 0, __bss_end - __bss_start); memcpy(__data_start, __data_data, __data_end - __data_start); //say hello blLog("NanohubOS bootloader up @ %p\n", &__blEntry); //enter SPI loader if requested do { uint32_t res; struct OsUpdateHdr *os; blSpiLoader(forceLoad); res = blVerifyOsUpdate(&os, NULL); if (res == OS_UPDT_SUCCESS) blApplyVerifiedUpdate(os); else if (res != OS_UPDT_HDR_CHECK_FAILED) blExtApiEraseSharedArea(BL_FLASH_KEY1, BL_FLASH_KEY2); forceLoad = true; } while (*(volatile uint32_t*)appBase == 0xFFFFFFFF); //call main app with ints off blDisableInts(); SCB->VTOR = appBase; asm volatile( "LDR SP, [%0, #0] \n" "LDR PC, [%0, #4] \n" : :"r"(appBase) :"memory", "cc" ); //we should never return here while(1); }