From: Tomas Vanek Date: Tue, 5 Feb 2019 08:39:11 +0000 (+0100) Subject: target/cortex_m: faster reading of all CPU registers X-Git-Tag: v0.12.0-rc1~431 X-Git-Url: https://review.openocd.org/gitweb?p=openocd.git;a=commitdiff_plain;h=88f429ead019fd6df96ec15f0d897385f3cef0d0 target/cortex_m: faster reading of all CPU registers Without the change cortex_m_debug_entry() reads all registers calling cortex_m_load_core_reg_u32() for each register with a poor usage of JTAG/SWD queue. It is time consuming, especially on an USB FS based adapter. Moreover if target_request debugmsgs are enabled, DCB_DCRDR is saved and restored on each register read. This change introduces cortex_m_fast_read_all_regs() which queues all register reads and a single dap_run() transaction does all work. cortex_m_fast_read_all_regs() reads all registers unconditionally regardless register cache is valid or not. This is a difference from the original cortex_m_debug_entry() code. cortex_m_debug_entry times from -d3 log, Cortex-M4F and CMSIS-DAP (Kinetis K28F-FRDM kit) target_request | time [ms] debugmsgs | without the change | with the change ---------------+--------------------+----------------- disable | 186 | 27 enable | 232 | 29 Added checking of DHCSR.S_REGRDY flag. If "not ready" is seen, cortex_m->slow_register_read is set and fallback to the old register read method cortex_m_slow_read_all_regs() is used instead of cortex_m_fast_read_all_regs(). Change-Id: I0665d94b97ede217394640871dc451ec93410254 Signed-off-by: Tomas Vanek Reviewed-on: https://review.openocd.org/c/openocd/+/5321 Tested-by: jenkins Reviewed-by: Andreas Fritiofson Reviewed-by: Antonio Borneo --- diff --git a/src/target/armv7m.c b/src/target/armv7m.c index ffc8ca875d..0a51ad4d60 100644 --- a/src/target/armv7m.c +++ b/src/target/armv7m.c @@ -251,7 +251,7 @@ static int armv7m_set_core_reg(struct reg *reg, uint8_t *buf) return ERROR_OK; } -static uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id) +uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id) { switch (arm_reg_id) { case ARMV7M_R0 ... ARMV7M_R14: @@ -289,7 +289,7 @@ static uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id) } } -static bool armv7m_map_reg_packing(unsigned int arm_reg_id, +bool armv7m_map_reg_packing(unsigned int arm_reg_id, unsigned int *reg32_id, uint32_t *offset) { diff --git a/src/target/armv7m.h b/src/target/armv7m.h index 2816a91452..d33e574928 100644 --- a/src/target/armv7m.h +++ b/src/target/armv7m.h @@ -309,6 +309,11 @@ int armv7m_invalidate_core_regs(struct target *target); int armv7m_restore_context(struct target *target); +uint32_t armv7m_map_id_to_regsel(unsigned int arm_reg_id); + +bool armv7m_map_reg_packing(unsigned int arm_reg_id, + unsigned int *reg32_id, uint32_t *offset); + int armv7m_checksum_memory(struct target *target, target_addr_t address, uint32_t count, uint32_t *checksum); int armv7m_blank_check_memory(struct target *target, diff --git a/src/target/cortex_m.c b/src/target/cortex_m.c index ba7bc17b55..82d5eff800 100644 --- a/src/target/cortex_m.c +++ b/src/target/cortex_m.c @@ -202,6 +202,163 @@ static int cortex_m_load_core_reg_u32(struct target *target, return retval; } +static int cortex_m_slow_read_all_regs(struct target *target) +{ + struct armv7m_common *armv7m = target_to_armv7m(target); + const unsigned int num_regs = armv7m->arm.core_cache->num_regs; + + for (unsigned int reg_id = 0; reg_id < num_regs; reg_id++) { + struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id]; + if (r->exist) { + int retval = armv7m->arm.read_core_reg(target, r, reg_id, ARM_MODE_ANY); + if (retval != ERROR_OK) + return retval; + } + } + return ERROR_OK; +} + +static int cortex_m_queue_reg_read(struct target *target, uint32_t regsel, + uint32_t *reg_value, uint32_t *dhcsr) +{ + struct armv7m_common *armv7m = target_to_armv7m(target); + int retval; + + retval = mem_ap_write_u32(armv7m->debug_ap, DCB_DCRSR, regsel); + if (retval != ERROR_OK) + return retval; + + retval = mem_ap_read_u32(armv7m->debug_ap, DCB_DHCSR, dhcsr); + if (retval != ERROR_OK) + return retval; + + return mem_ap_read_u32(armv7m->debug_ap, DCB_DCRDR, reg_value); +} + +static int cortex_m_fast_read_all_regs(struct target *target) +{ + struct cortex_m_common *cortex_m = target_to_cm(target); + struct armv7m_common *armv7m = target_to_armv7m(target); + int retval; + uint32_t dcrdr; + + /* because the DCB_DCRDR is used for the emulated dcc channel + * we have to save/restore the DCB_DCRDR when used */ + if (target->dbg_msg_enabled) { + retval = mem_ap_read_u32(armv7m->debug_ap, DCB_DCRDR, &dcrdr); + if (retval != ERROR_OK) + return retval; + } + + const unsigned int num_regs = armv7m->arm.core_cache->num_regs; + const unsigned int n_r32 = ARMV7M_LAST_REG - ARMV7M_CORE_FIRST_REG + 1 + + ARMV7M_FPU_LAST_REG - ARMV7M_FPU_FIRST_REG + 1; + /* we need one 32-bit word for each register except FP D0..D15, which + * need two words */ + uint32_t r_vals[n_r32]; + uint32_t dhcsr[n_r32]; + + unsigned int wi = 0; /* write index to r_vals and dhcsr arrays */ + unsigned int reg_id; /* register index in the reg_list, ARMV7M_R0... */ + for (reg_id = 0; reg_id < num_regs; reg_id++) { + struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id]; + if (!r->exist) + continue; /* skip non existent registers */ + + if (r->size <= 8) { + /* Any 8-bit or shorter register is unpacked from a 32-bit + * container register. Skip it now. */ + continue; + } + + uint32_t regsel = armv7m_map_id_to_regsel(reg_id); + retval = cortex_m_queue_reg_read(target, regsel, &r_vals[wi], + &dhcsr[wi]); + if (retval != ERROR_OK) + return retval; + wi++; + + assert(r->size == 32 || r->size == 64); + if (r->size == 32) + continue; /* done with 32-bit register */ + + assert(reg_id >= ARMV7M_FPU_FIRST_REG && reg_id <= ARMV7M_FPU_LAST_REG); + /* the odd part of FP register (S1, S3...) */ + retval = cortex_m_queue_reg_read(target, regsel + 1, &r_vals[wi], + &dhcsr[wi]); + if (retval != ERROR_OK) + return retval; + wi++; + } + + assert(wi <= n_r32); + + retval = dap_run(armv7m->debug_ap->dap); + if (retval != ERROR_OK) + return retval; + + if (target->dbg_msg_enabled) { + /* restore DCB_DCRDR - this needs to be in a separate + * transaction otherwise the emulated DCC channel breaks */ + retval = mem_ap_write_atomic_u32(armv7m->debug_ap, DCB_DCRDR, dcrdr); + if (retval != ERROR_OK) + return retval; + } + + bool not_ready = false; + for (unsigned int i = 0; i < wi; i++) { + if ((dhcsr[i] & S_REGRDY) == 0) { + not_ready = true; + LOG_DEBUG("Register %u was not ready during fast read", i); + } + cortex_m_cumulate_dhcsr_sticky(cortex_m, dhcsr[i]); + } + + if (not_ready) { + /* Any register was not ready, + * fall back to slow read with S_REGRDY polling */ + return ERROR_TIMEOUT_REACHED; + } + + LOG_DEBUG("read %u 32-bit registers", wi); + + unsigned int ri = 0; /* read index from r_vals array */ + for (reg_id = 0; reg_id < num_regs; reg_id++) { + struct reg *r = &armv7m->arm.core_cache->reg_list[reg_id]; + if (!r->exist) + continue; /* skip non existent registers */ + + r->dirty = false; + + unsigned int reg32_id; + uint32_t offset; + if (armv7m_map_reg_packing(reg_id, ®32_id, &offset)) { + /* Unpack a partial register from 32-bit container register */ + struct reg *r32 = &armv7m->arm.core_cache->reg_list[reg32_id]; + + /* The container register ought to precede all regs unpacked + * from it in the reg_list. So the value should be ready + * to unpack */ + assert(r32->valid); + buf_cpy(r32->value + offset, r->value, r->size); + + } else { + assert(r->size == 32 || r->size == 64); + buf_set_u32(r->value, 0, 32, r_vals[ri++]); + + if (r->size == 64) { + assert(reg_id >= ARMV7M_FPU_FIRST_REG && reg_id <= ARMV7M_FPU_LAST_REG); + /* the odd part of FP register (S1, S3...) */ + buf_set_u32(r->value + 4, 0, 32, r_vals[ri++]); + } + } + r->valid = true; + } + assert(ri == wi); + + return retval; +} + static int cortex_m_store_core_reg_u32(struct target *target, uint32_t regsel, uint32_t value) { @@ -610,7 +767,6 @@ static int cortex_m_examine_exception_reason(struct target *target) static int cortex_m_debug_entry(struct target *target) { - int i; uint32_t xPSR; int retval; struct cortex_m_common *cortex_m = target_to_cm(target); @@ -646,16 +802,21 @@ static int cortex_m_debug_entry(struct target *target) secure_state = (dscsr & DSCSR_CDS) == DSCSR_CDS; } - /* Examine target state and mode - * First load register accessible through core debug port */ - int num_regs = arm->core_cache->num_regs; - - for (i = 0; i < num_regs; i++) { - r = &armv7m->arm.core_cache->reg_list[i]; - if (r->exist && !r->valid) - arm->read_core_reg(target, r, i, ARM_MODE_ANY); + /* Load all registers to arm.core_cache */ + if (!cortex_m->slow_register_read) { + retval = cortex_m_fast_read_all_regs(target); + if (retval == ERROR_TIMEOUT_REACHED) { + cortex_m->slow_register_read = true; + LOG_DEBUG("Switched to slow register read"); + } } + if (cortex_m->slow_register_read) + retval = cortex_m_slow_read_all_regs(target); + + if (retval != ERROR_OK) + return retval; + r = arm->cpsr; xPSR = buf_get_u32(r->value, 0, 32); diff --git a/src/target/cortex_m.h b/src/target/cortex_m.h index a5dfbf856a..57ef1e7e84 100644 --- a/src/target/cortex_m.h +++ b/src/target/cortex_m.h @@ -238,6 +238,8 @@ struct cortex_m_common { const struct cortex_m_part_info *core_info; struct armv7m_common armv7m; + bool slow_register_read; /* A register has not been ready, poll S_REGRDY */ + int apsel; /* Whether this target has the erratum that makes C_MASKINTS not apply to