smp: move sub-command "smp_gdb" in file smp.c
[openocd.git] / src / target / cortex_a.c
index c91da3bc7d731ec68d073ba5b661e0e668c8052b..6eb6aa933836fd6bf510ccc4c422bc4f78d46366 100644 (file)
  *   michel.jaouen@stericsson.com : smp minimum support                    *
  *                                                                         *
  *   Copyright (C) Broadcom 2012                                           *
- *   ehunter@broadcom.com : Cortex R4 support                              *
+ *   ehunter@broadcom.com : Cortex-R4 support                              *
+ *                                                                         *
+ *   Copyright (C) 2013 Kamal Dasu                                         *
+ *   kdasu.kdev@gmail.com                                                  *
  *                                                                         *
  *   This program is free software; you can redistribute it and/or modify  *
  *   it under the terms of the GNU General Public License as published by  *
  *   GNU General Public License for more details.                          *
  *                                                                         *
  *   You should have received a copy of the GNU General Public License     *
- *   along with this program; if not, write to the                         *
- *   Free Software Foundation, Inc.,                                       *
- *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.           *
+ *   along with this program.  If not, see <http://www.gnu.org/licenses/>. *
  *                                                                         *
  *   Cortex-A8(tm) TRM, ARM DDI 0344H                                      *
  *   Cortex-A9(tm) TRM, ARM DDI 0407F                                      *
  *   Cortex-A4(tm) TRM, ARM DDI 0363E                                      *
+ *   Cortex-A15(tm)TRM, ARM DDI 0438C                                      *
  *                                                                         *
  ***************************************************************************/
 
 #include "breakpoints.h"
 #include "cortex_a.h"
 #include "register.h"
+#include "armv7a_mmu.h"
 #include "target_request.h"
 #include "target_type.h"
 #include "arm_opcodes.h"
+#include "arm_semihosting.h"
+#include "transport/transport.h"
+#include "smp.h"
 #include <helper/time_support.h>
 
-static int cortex_a8_poll(struct target *target);
-static int cortex_a8_debug_entry(struct target *target);
-static int cortex_a8_restore_context(struct target *target, bool bpwp);
-static int cortex_a8_set_breakpoint(struct target *target,
+static int cortex_a_poll(struct target *target);
+static int cortex_a_debug_entry(struct target *target);
+static int cortex_a_restore_context(struct target *target, bool bpwp);
+static int cortex_a_set_breakpoint(struct target *target,
        struct breakpoint *breakpoint, uint8_t matchmode);
-static int cortex_a8_set_context_breakpoint(struct target *target,
+static int cortex_a_set_context_breakpoint(struct target *target,
        struct breakpoint *breakpoint, uint8_t matchmode);
-static int cortex_a8_set_hybrid_breakpoint(struct target *target,
+static int cortex_a_set_hybrid_breakpoint(struct target *target,
        struct breakpoint *breakpoint);
-static int cortex_a8_unset_breakpoint(struct target *target,
+static int cortex_a_unset_breakpoint(struct target *target,
        struct breakpoint *breakpoint);
-static int cortex_a8_dap_read_coreregister_u32(struct target *target,
-       uint32_t *value, int regnum);
-static int cortex_a8_dap_write_coreregister_u32(struct target *target,
-       uint32_t value, int regnum);
-static int cortex_a8_mmu(struct target *target, int *enabled);
-static int cortex_a8_virt2phys(struct target *target,
-       uint32_t virt, uint32_t *phys);
-static int cortex_a8_read_apb_ab_memory(struct target *target,
+static int cortex_a_mmu(struct target *target, int *enabled);
+static int cortex_a_mmu_modify(struct target *target, int enable);
+static int cortex_a_virt2phys(struct target *target,
+       target_addr_t virt, target_addr_t *phys);
+static int cortex_a_read_cpu_memory(struct target *target,
        uint32_t address, uint32_t size, uint32_t count, uint8_t *buffer);
 
 
 /*  restore cp15_control_reg at resume */
-static int cortex_a8_restore_cp15_control_reg(struct target *target)
+static int cortex_a_restore_cp15_control_reg(struct target *target)
 {
        int retval = ERROR_OK;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
 
-       if (cortex_a8->cp15_control_reg != cortex_a8->cp15_control_reg_curr) {
-               cortex_a8->cp15_control_reg_curr = cortex_a8->cp15_control_reg;
-               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_a8->cp15_control_reg); */
+       if (cortex_a->cp15_control_reg != cortex_a->cp15_control_reg_curr) {
+               cortex_a->cp15_control_reg_curr = cortex_a->cp15_control_reg;
+               /* LOG_INFO("cp15_control_reg: %8.8" PRIx32, cortex_a->cp15_control_reg); */
                retval = armv7a->arm.mcr(target, 15,
                                0, 0,   /* op1, op2 */
                                1, 0,   /* CRn, CRm */
-                               cortex_a8->cp15_control_reg);
+                               cortex_a->cp15_control_reg);
        }
        return retval;
 }
 
-/*  check address before cortex_a8_apb read write access with mmu on
- *  remove apb predictible data abort */
-static int cortex_a8_check_address(struct target *target, uint32_t address)
+/*
+ * Set up ARM core for memory access.
+ * If !phys_access, switch to SVC mode and make sure MMU is on
+ * If phys_access, switch off mmu
+ */
+static int cortex_a_prep_memaccess(struct target *target, int phys_access)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       uint32_t os_border = armv7a->armv7a_mmu.os_border;
-       if ((address < os_border) &&
-               (armv7a->arm.core_mode == ARM_MODE_SVC)) {
-               LOG_ERROR("%x access in userspace and target in supervisor", address);
-               return ERROR_FAIL;
-       }
-       if ((address >= os_border) &&
-               (cortex_a8->curr_mode != ARM_MODE_SVC)) {
-               dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
-               cortex_a8->curr_mode = ARM_MODE_SVC;
-               LOG_INFO("%x access in kernel space and target not in supervisor",
-                       address);
-               return ERROR_OK;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       int mmu_enabled = 0;
+
+       if (phys_access == 0) {
+               arm_dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 1);
+               if (cortex_a->dacrfixup_mode == CORTEX_A_DACRFIXUP_ON) {
+                       /* overwrite DACR to all-manager */
+                       armv7a->arm.mcr(target, 15,
+                                       0, 0, 3, 0,
+                                       0xFFFFFFFF);
+               }
+       } else {
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 0);
        }
-       if ((address < os_border) &&
-               (cortex_a8->curr_mode == ARM_MODE_SVC)) {
-               dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
-               cortex_a8->curr_mode = ARM_MODE_ANY;
+       return ERROR_OK;
+}
+
+/*
+ * Restore ARM core after memory access.
+ * If !phys_access, switch to previous mode
+ * If phys_access, restore MMU setting
+ */
+static int cortex_a_post_memaccess(struct target *target, int phys_access)
+{
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       if (phys_access == 0) {
+               if (cortex_a->dacrfixup_mode == CORTEX_A_DACRFIXUP_ON) {
+                       /* restore */
+                       armv7a->arm.mcr(target, 15,
+                                       0, 0, 3, 0,
+                                       cortex_a->cp15_dacr_reg);
+               }
+               arm_dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
+       } else {
+               int mmu_enabled = 0;
+               cortex_a_mmu(target, &mmu_enabled);
+               if (mmu_enabled)
+                       cortex_a_mmu_modify(target, 1);
        }
        return ERROR_OK;
 }
+
+
 /*  modify cp15_control_reg in order to enable or disable mmu for :
  *  - virt2phys address conversion
  *  - read or write memory in phys or virt address */
-static int cortex_a8_mmu_modify(struct target *target, int enable)
+static int cortex_a_mmu_modify(struct target *target, int enable)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
        int retval = ERROR_OK;
+       int need_write = 0;
+
        if (enable) {
                /*  if mmu enabled at target stop and mmu not enable */
-               if (!(cortex_a8->cp15_control_reg & 0x1U)) {
+               if (!(cortex_a->cp15_control_reg & 0x1U)) {
                        LOG_ERROR("trying to enable mmu on target stopped with mmu disable");
                        return ERROR_FAIL;
                }
-               if (!(cortex_a8->cp15_control_reg_curr & 0x1U)) {
-                       cortex_a8->cp15_control_reg_curr |= 0x1U;
-                       retval = armv7a->arm.mcr(target, 15,
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       cortex_a8->cp15_control_reg_curr);
+               if ((cortex_a->cp15_control_reg_curr & 0x1U) == 0) {
+                       cortex_a->cp15_control_reg_curr |= 0x1U;
+                       need_write = 1;
                }
        } else {
-               if (cortex_a8->cp15_control_reg_curr & 0x4U) {
-                       /*  data cache is active */
-                       cortex_a8->cp15_control_reg_curr &= ~0x4U;
-                       /* flush data cache armv7 function to be called */
-                       if (armv7a->armv7a_mmu.armv7a_cache.flush_all_data_cache)
-                               armv7a->armv7a_mmu.armv7a_cache.flush_all_data_cache(target);
-               }
-               if ((cortex_a8->cp15_control_reg_curr & 0x1U)) {
-                       cortex_a8->cp15_control_reg_curr &= ~0x1U;
-                       retval = armv7a->arm.mcr(target, 15,
-                                       0, 0,   /* op1, op2 */
-                                       1, 0,   /* CRn, CRm */
-                                       cortex_a8->cp15_control_reg_curr);
+               if ((cortex_a->cp15_control_reg_curr & 0x1U) == 0x1U) {
+                       cortex_a->cp15_control_reg_curr &= ~0x1U;
+                       need_write = 1;
                }
        }
+
+       if (need_write) {
+               LOG_DEBUG("%s, writing cp15 ctrl: %" PRIx32,
+                       enable ? "enable mmu" : "disable mmu",
+                       cortex_a->cp15_control_reg_curr);
+
+               retval = armv7a->arm.mcr(target, 15,
+                               0, 0,   /* op1, op2 */
+                               1, 0,   /* CRn, CRm */
+                               cortex_a->cp15_control_reg_curr);
+       }
        return retval;
 }
 
 /*
- * Cortex-A8 Basic debug access, very low level assumes state is saved
+ * Cortex-A Basic debug access, very low level assumes state is saved
  */
-static int cortex_a8_init_debug_access(struct target *target)
+static int cortex_a_init_debug_access(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
+       uint32_t dscr;
        int retval;
-       uint32_t dummy;
 
-       LOG_DEBUG(" ");
+       /* lock memory-mapped access to debug registers to prevent
+        * software interference */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_LOCKACCESS, 0);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* Unlocking the debug registers for modification
-        * The debugport might be uninitialised so try twice */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-       if (retval != ERROR_OK) {
-               /* try again */
-               retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_LOCKACCESS, 0xC5ACCE55);
-               if (retval == ERROR_OK)
-                       LOG_USER(
-                               "Locking debug access failed on first, but succeeded on second try.");
-       }
+       /* Disable cacheline fills and force cache write-through in debug state */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCCR, 0);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Disable TLB lookup and refill/eviction in debug state */
+       retval = mem_ap_write_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSMCR, 0);
        if (retval != ERROR_OK)
                return retval;
-       /* Clear Sticky Power Down status Bit in PRSR to enable access to
-          the registers in the Core Power Domain */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_PRSR, &dummy);
+
+       retval = dap_run(armv7a->debug_ap->dap);
        if (retval != ERROR_OK)
                return retval;
 
@@ -197,8 +230,41 @@ static int cortex_a8_init_debug_access(struct target *target)
 
        /* Resync breakpoint registers */
 
+       /* Enable halt for breakpoint, watchpoint and vector catch */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, dscr | DSCR_HALT_DBG_MODE);
+       if (retval != ERROR_OK)
+               return retval;
+
        /* Since this is likely called from init or reset, update target state information*/
-       return cortex_a8_poll(target);
+       return cortex_a_poll(target);
+}
+
+static int cortex_a_wait_instrcmpl(struct target *target, uint32_t *dscr, bool force)
+{
+       /* Waits until InstrCmpl_l becomes 1, indicating instruction is done.
+        * Writes final value of DSCR into *dscr. Pass force to force always
+        * reading DSCR at least once. */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       int64_t then = timeval_ms();
+       while ((*dscr & DSCR_INSTR_COMP) == 0 || force) {
+               force = false;
+               int retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, dscr);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Could not read DSCR register");
+                       return retval;
+               }
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("Timeout waiting for InstrCompl=1");
+                       return ERROR_FAIL;
+               }
+       }
+       return ERROR_OK;
 }
 
 /* To reduce needless round-trips, pass in a pointer to the current
@@ -206,48 +272,37 @@ static int cortex_a8_init_debug_access(struct target *target)
  * value on return from this function; or DSCR_INSTR_COMP if you
  * happen to know that no instruction is pending.
  */
-static int cortex_a8_exec_opcode(struct target *target,
+static int cortex_a_exec_opcode(struct target *target,
        uint32_t opcode, uint32_t *dscr_p)
 {
        uint32_t dscr;
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
 
        dscr = dscr_p ? *dscr_p : 0;
 
        LOG_DEBUG("exec opcode 0x%08" PRIx32, opcode);
 
        /* Wait for InstrCompl bit to be set */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_INSTR_COMP) == 0) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK) {
-                       LOG_ERROR("Could not read DSCR register, opcode = 0x%08" PRIx32, opcode);
-                       return retval;
-               }
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
+       retval = cortex_a_wait_instrcmpl(target, dscr_p, false);
+       if (retval != ERROR_OK)
+               return retval;
 
-       retval = mem_ap_sel_write_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_write_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_ITR, opcode);
        if (retval != ERROR_OK)
                return retval;
 
-       then = timeval_ms();
+       int64_t then = timeval_ms();
        do {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK) {
                        LOG_ERROR("Could not read DSCR register");
                        return retval;
                }
                if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
+                       LOG_ERROR("Timeout waiting for cortex_a_exec_opcode");
                        return ERROR_FAIL;
                }
        } while ((dscr & DSCR_INSTR_COMP) == 0);        /* Wait for InstrCompl bit to be set */
@@ -258,191 +313,21 @@ static int cortex_a8_exec_opcode(struct target *target,
        return retval;
 }
 
-/**************************************************************************
-Read core register with very few exec_opcode, fast but needs work_area.
-This can cause problems with MMU active.
-**************************************************************************/
-static int cortex_a8_read_regs_through_mem(struct target *target, uint32_t address,
-       uint32_t *regfile)
-{
-       int retval = ERROR_OK;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-
-       retval = cortex_a8_dap_read_coreregister_u32(target, regfile, 0);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = cortex_a8_dap_write_coreregister_u32(target, address, 0);
-       if (retval != ERROR_OK)
-               return retval;
-       retval = cortex_a8_exec_opcode(target, ARMV4_5_STMIA(0, 0xFFFE, 0, 0), NULL);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_sel_read_buf_u32(swjdp, armv7a->memory_ap,
-                       (uint8_t *)(&regfile[1]), 4*15, address);
-
-       return retval;
-}
-
-static int cortex_a8_dap_read_coreregister_u32(struct target *target,
-       uint32_t *value, int regnum)
-{
-       int retval = ERROR_OK;
-       uint8_t reg = regnum&0xFF;
-       uint32_t dscr = 0;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-
-       if (reg > 17)
-               return retval;
-
-       if (reg < 15) {
-               /* Rn to DCCTX, "MCR p14, 0, Rn, c0, c5, 0"  0xEE00nE15 */
-               retval = cortex_a8_exec_opcode(target,
-                               ARMV4_5_MCR(14, 0, reg, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       } else if (reg == 15) {
-               /* "MOV r0, r15"; then move r0 to DCCTX */
-               retval = cortex_a8_exec_opcode(target, 0xE1A0000F, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_exec_opcode(target,
-                               ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       } else {
-               /* "MRS r0, CPSR" or "MRS r0, SPSR"
-                * then move r0 to DCCTX
-                */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRS(0, reg & 1), &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_exec_opcode(target,
-                               ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
-       /* Wait for DTRRXfull then read DTRRTX */
-       long long then = timeval_ms();
-       while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               if (timeval_ms() > then + 1000) {
-                       LOG_ERROR("Timeout waiting for cortex_a8_exec_opcode");
-                       return ERROR_FAIL;
-               }
-       }
-
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DTRTX, value);
-       LOG_DEBUG("read DCC 0x%08" PRIx32, *value);
-
-       return retval;
-}
-
-static int cortex_a8_dap_write_coreregister_u32(struct target *target,
-       uint32_t value, int regnum)
-{
-       int retval = ERROR_OK;
-       uint8_t Rd = regnum&0xFF;
-       uint32_t dscr;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-
-       LOG_DEBUG("register %i, value 0x%08" PRIx32, regnum, value);
-
-       /* Check that DCCRX is not full */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-       if (dscr & DSCR_DTR_RX_FULL) {
-               LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
-               /* Clear DCCRX with MRC(p14, 0, Rd, c0, c5, 0), opcode  0xEE100E15 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       }
-
-       if (Rd > 17)
-               return retval;
-
-       /* Write DTRRX ... sets DSCR.DTRRXfull but exec_opcode() won't care */
-       LOG_DEBUG("write DCC 0x%08" PRIx32, value);
-       retval = mem_ap_sel_write_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DTRRX, value);
-       if (retval != ERROR_OK)
-               return retval;
-
-       if (Rd < 15) {
-               /* DCCRX to Rn, "MRC p14, 0, Rn, c0, c5, 0", 0xEE10nE15 */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, Rd, 0, 5, 0),
-                               &dscr);
-
-               if (retval != ERROR_OK)
-                       return retval;
-       } else if (Rd == 15) {
-               /* DCCRX to R0, "MRC p14, 0, R0, c0, c5, 0", 0xEE100E15
-                * then "mov r15, r0"
-                */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_exec_opcode(target, 0xE1A0F000, &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-       } else {
-               /* DCCRX to R0, "MRC p14, 0, R0, c0, c5, 0", 0xEE100E15
-                * then "MSR CPSR_cxsf, r0" or "MSR SPSR_cxsf, r0" (all fields)
-                */
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-               retval = cortex_a8_exec_opcode(target, ARMV4_5_MSR_GP(0, 0xF, Rd & 1),
-                               &dscr);
-               if (retval != ERROR_OK)
-                       return retval;
-
-               /* "Prefetch flush" after modifying execution status in CPSR */
-               if (Rd == 16) {
-                       retval = cortex_a8_exec_opcode(target,
-                                       ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
-                                       &dscr);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-       }
-
-       return retval;
-}
-
 /* Write to memory mapped registers directly with no cache or mmu handling */
-static int cortex_a8_dap_write_memap_register_u32(struct target *target,
+static int cortex_a_dap_write_memap_register_u32(struct target *target,
        uint32_t address,
        uint32_t value)
 {
        int retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, address, value);
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap, address, value);
 
        return retval;
 }
 
 /*
- * Cortex-A8 implementation of Debug Programmer's Model
+ * Cortex-A implementation of Debug Programmer's Model
  *
  * NOTE the invariant:  these routines return with DSCR_INSTR_COMP set,
  * so there's no need to poll for it before executing an instruction.
@@ -452,22 +337,21 @@ static int cortex_a8_dap_write_memap_register_u32(struct target *target,
  * be the places to enable/disable that mode.
  */
 
-static inline struct cortex_a8_common *dpm_to_a8(struct arm_dpm *dpm)
+static inline struct cortex_a_common *dpm_to_a(struct arm_dpm *dpm)
 {
-       return container_of(dpm, struct cortex_a8_common, armv7a_common.dpm);
+       return container_of(dpm, struct cortex_a_common, armv7a_common.dpm);
 }
 
-static int cortex_a8_write_dcc(struct cortex_a8_common *a8, uint32_t data)
+static int cortex_a_write_dcc(struct cortex_a_common *a, uint32_t data)
 {
        LOG_DEBUG("write DCC 0x%08" PRIx32, data);
-       return mem_ap_sel_write_u32(a8->armv7a_common.arm.dap,
-               a8->armv7a_common.debug_ap, a8->armv7a_common.debug_base + CPUDBG_DTRRX, data);
+       return mem_ap_write_u32(a->armv7a_common.debug_ap,
+                       a->armv7a_common.debug_base + CPUDBG_DTRRX, data);
 }
 
-static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
+static int cortex_a_read_dcc(struct cortex_a_common *a, uint32_t *data,
        uint32_t *dscr_p)
 {
-       struct adiv5_dap *swjdp = a8->armv7a_common.arm.dap;
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
@@ -475,10 +359,10 @@ static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
                dscr = *dscr_p;
 
        /* Wait for DTRRXfull */
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        while ((dscr & DSCR_DTR_TX_FULL) == 0) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, a8->armv7a_common.debug_ap,
-                               a8->armv7a_common.debug_base + CPUDBG_DSCR,
+               retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                               a->armv7a_common.debug_base + CPUDBG_DSCR,
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
@@ -488,8 +372,8 @@ static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
                }
        }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, a8->armv7a_common.debug_ap,
-                       a8->armv7a_common.debug_base + CPUDBG_DTRTX, data);
+       retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                       a->armv7a_common.debug_base + CPUDBG_DTRTX, data);
        if (retval != ERROR_OK)
                return retval;
        /* LOG_DEBUG("read DCC 0x%08" PRIx32, *data); */
@@ -500,18 +384,17 @@ static int cortex_a8_read_dcc(struct cortex_a8_common *a8, uint32_t *data,
        return retval;
 }
 
-static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
+static int cortex_a_dpm_prepare(struct arm_dpm *dpm)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
-       struct adiv5_dap *swjdp = a8->armv7a_common.arm.dap;
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr;
        int retval;
 
        /* set up invariant:  INSTR_COMP is set after ever DPM operation */
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        for (;; ) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, a8->armv7a_common.debug_ap,
-                               a8->armv7a_common.debug_base + CPUDBG_DSCR,
+               retval = mem_ap_read_atomic_u32(a->armv7a_common.debug_ap,
+                               a->armv7a_common.debug_base + CPUDBG_DSCR,
                                &dscr);
                if (retval != ERROR_OK)
                        return retval;
@@ -527,8 +410,8 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
        if (dscr & DSCR_DTR_RX_FULL) {
                LOG_ERROR("DSCR_DTR_RX_FULL, dscr 0x%08" PRIx32, dscr);
                /* Clear DCCRX */
-               retval = cortex_a8_exec_opcode(
-                               a8->armv7a_common.arm.target,
+               retval = cortex_a_exec_opcode(
+                               a->armv7a_common.arm.target,
                                ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                                &dscr);
                if (retval != ERROR_OK)
@@ -538,119 +421,119 @@ static int cortex_a8_dpm_prepare(struct arm_dpm *dpm)
        return retval;
 }
 
-static int cortex_a8_dpm_finish(struct arm_dpm *dpm)
+static int cortex_a_dpm_finish(struct arm_dpm *dpm)
 {
        /* REVISIT what could be done here? */
        return ERROR_OK;
 }
 
-static int cortex_a8_instr_write_data_dcc(struct arm_dpm *dpm,
+static int cortex_a_instr_write_data_dcc(struct arm_dpm *dpm,
        uint32_t opcode, uint32_t data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        int retval;
        uint32_t dscr = DSCR_INSTR_COMP;
 
-       retval = cortex_a8_write_dcc(a8, data);
+       retval = cortex_a_write_dcc(a, data);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       return cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
 }
 
-static int cortex_a8_instr_write_data_r0(struct arm_dpm *dpm,
+static int cortex_a_instr_write_data_r0(struct arm_dpm *dpm,
        uint32_t opcode, uint32_t data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
-       retval = cortex_a8_write_dcc(a8, data);
+       retval = cortex_a_write_dcc(a, data);
        if (retval != ERROR_OK)
                return retval;
 
        /* DCCRX to R0, "MCR p14, 0, R0, c0, c5, 0", 0xEE000E15 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        ARMV4_5_MRC(14, 0, 0, 0, 5, 0),
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* then the opcode, taking data from R0 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
 
        return retval;
 }
 
-static int cortex_a8_instr_cpsr_sync(struct arm_dpm *dpm)
+static int cortex_a_instr_cpsr_sync(struct arm_dpm *dpm)
 {
        struct target *target = dpm->arm->target;
        uint32_t dscr = DSCR_INSTR_COMP;
 
        /* "Prefetch flush" after modifying execution status in CPSR */
-       return cortex_a8_exec_opcode(target,
+       return cortex_a_exec_opcode(target,
                        ARMV4_5_MCR(15, 0, 0, 7, 5, 4),
                        &dscr);
 }
 
-static int cortex_a8_instr_read_data_dcc(struct arm_dpm *dpm,
+static int cortex_a_instr_read_data_dcc(struct arm_dpm *dpm,
        uint32_t opcode, uint32_t *data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        int retval;
        uint32_t dscr = DSCR_INSTR_COMP;
 
        /* the opcode, writing data to DCC */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_read_dcc(a8, data, &dscr);
+       return cortex_a_read_dcc(a, data, &dscr);
 }
 
 
-static int cortex_a8_instr_read_data_r0(struct arm_dpm *dpm,
+static int cortex_a_instr_read_data_r0(struct arm_dpm *dpm,
        uint32_t opcode, uint32_t *data)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t dscr = DSCR_INSTR_COMP;
        int retval;
 
        /* the opcode, writing data to R0 */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        opcode,
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* write R0 to DCC */
-       retval = cortex_a8_exec_opcode(
-                       a8->armv7a_common.arm.target,
+       retval = cortex_a_exec_opcode(
+                       a->armv7a_common.arm.target,
                        ARMV4_5_MCR(14, 0, 0, 0, 5, 0),
                        &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       return cortex_a8_read_dcc(a8, data, &dscr);
+       return cortex_a_read_dcc(a, data, &dscr);
 }
 
-static int cortex_a8_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
+static int cortex_a_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
        uint32_t addr, uint32_t control)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
-       uint32_t vr = a8->armv7a_common.debug_base;
-       uint32_t cr = a8->armv7a_common.debug_base;
+       struct cortex_a_common *a = dpm_to_a(dpm);
+       uint32_t vr = a->armv7a_common.debug_base;
+       uint32_t cr = a->armv7a_common.debug_base;
        int retval;
 
        switch (index_t) {
@@ -669,29 +552,29 @@ static int cortex_a8_bpwp_enable(struct arm_dpm *dpm, unsigned index_t,
        vr += 4 * index_t;
        cr += 4 * index_t;
 
-       LOG_DEBUG("A8: bpwp enable, vr %08x cr %08x",
+       LOG_DEBUG("A: bpwp enable, vr %08x cr %08x",
                (unsigned) vr, (unsigned) cr);
 
-       retval = cortex_a8_dap_write_memap_register_u32(dpm->arm->target,
+       retval = cortex_a_dap_write_memap_register_u32(dpm->arm->target,
                        vr, addr);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(dpm->arm->target,
+       retval = cortex_a_dap_write_memap_register_u32(dpm->arm->target,
                        cr, control);
        return retval;
 }
 
-static int cortex_a8_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
+static int cortex_a_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
 {
-       struct cortex_a8_common *a8 = dpm_to_a8(dpm);
+       struct cortex_a_common *a = dpm_to_a(dpm);
        uint32_t cr;
 
        switch (index_t) {
                case 0 ... 15:
-                       cr = a8->armv7a_common.debug_base + CPUDBG_BCR_BASE;
+                       cr = a->armv7a_common.debug_base + CPUDBG_BCR_BASE;
                        break;
                case 16 ... 31:
-                       cr = a8->armv7a_common.debug_base + CPUDBG_WCR_BASE;
+                       cr = a->armv7a_common.debug_base + CPUDBG_WCR_BASE;
                        index_t -= 16;
                        break;
                default:
@@ -699,32 +582,32 @@ static int cortex_a8_bpwp_disable(struct arm_dpm *dpm, unsigned index_t)
        }
        cr += 4 * index_t;
 
-       LOG_DEBUG("A8: bpwp disable, cr %08x", (unsigned) cr);
+       LOG_DEBUG("A: bpwp disable, cr %08x", (unsigned) cr);
 
        /* clear control register */
-       return cortex_a8_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
+       return cortex_a_dap_write_memap_register_u32(dpm->arm->target, cr, 0);
 }
 
-static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)
+static int cortex_a_dpm_setup(struct cortex_a_common *a, uint32_t didr)
 {
-       struct arm_dpm *dpm = &a8->armv7a_common.dpm;
+       struct arm_dpm *dpm = &a->armv7a_common.dpm;
        int retval;
 
-       dpm->arm = &a8->armv7a_common.arm;
+       dpm->arm = &a->armv7a_common.arm;
        dpm->didr = didr;
 
-       dpm->prepare = cortex_a8_dpm_prepare;
-       dpm->finish = cortex_a8_dpm_finish;
+       dpm->prepare = cortex_a_dpm_prepare;
+       dpm->finish = cortex_a_dpm_finish;
 
-       dpm->instr_write_data_dcc = cortex_a8_instr_write_data_dcc;
-       dpm->instr_write_data_r0 = cortex_a8_instr_write_data_r0;
-       dpm->instr_cpsr_sync = cortex_a8_instr_cpsr_sync;
+       dpm->instr_write_data_dcc = cortex_a_instr_write_data_dcc;
+       dpm->instr_write_data_r0 = cortex_a_instr_write_data_r0;
+       dpm->instr_cpsr_sync = cortex_a_instr_cpsr_sync;
 
-       dpm->instr_read_data_dcc = cortex_a8_instr_read_data_dcc;
-       dpm->instr_read_data_r0 = cortex_a8_instr_read_data_r0;
+       dpm->instr_read_data_dcc = cortex_a_instr_read_data_dcc;
+       dpm->instr_read_data_r0 = cortex_a_instr_read_data_r0;
 
-       dpm->bpwp_enable = cortex_a8_bpwp_enable;
-       dpm->bpwp_disable = cortex_a8_bpwp_disable;
+       dpm->bpwp_enable = cortex_a_bpwp_enable;
+       dpm->bpwp_disable = cortex_a_bpwp_disable;
 
        retval = arm_dpm_setup(dpm);
        if (retval == ERROR_OK)
@@ -732,7 +615,7 @@ static int cortex_a8_dpm_setup(struct cortex_a8_common *a8, uint32_t didr)
 
        return retval;
 }
-static struct target *get_cortex_a8(struct target *target, int32_t coreid)
+static struct target *get_cortex_a(struct target *target, int32_t coreid)
 {
        struct target_list *head;
        struct target *curr;
@@ -746,9 +629,9 @@ static struct target *get_cortex_a8(struct target *target, int32_t coreid)
        }
        return target;
 }
-static int cortex_a8_halt(struct target *target);
+static int cortex_a_halt(struct target *target);
 
-static int cortex_a8_halt_smp(struct target *target)
+static int cortex_a_halt_smp(struct target *target)
 {
        int retval = 0;
        struct target_list *head;
@@ -756,8 +639,9 @@ static int cortex_a8_halt_smp(struct target *target)
        head = target->head;
        while (head != (struct target_list *)NULL) {
                curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_HALTED))
-                       retval += cortex_a8_halt(curr);
+               if ((curr != target) && (curr->state != TARGET_HALTED)
+                       && target_was_examined(curr))
+                       retval += cortex_a_halt(curr);
                head = head->next;
        }
        return retval;
@@ -765,26 +649,56 @@ static int cortex_a8_halt_smp(struct target *target)
 
 static int update_halt_gdb(struct target *target)
 {
+       struct target *gdb_target = NULL;
+       struct target_list *head;
+       struct target *curr;
        int retval = 0;
-       if (target->gdb_service->core[0] == -1) {
+
+       if (target->gdb_service && target->gdb_service->core[0] == -1) {
                target->gdb_service->target = target;
                target->gdb_service->core[0] = target->coreid;
-               retval += cortex_a8_halt_smp(target);
+               retval += cortex_a_halt_smp(target);
        }
+
+       if (target->gdb_service)
+               gdb_target = target->gdb_service->target;
+
+       foreach_smp_target(head, target->head) {
+               curr = head->target;
+               /* skip calling context */
+               if (curr == target)
+                       continue;
+               if (!target_was_examined(curr))
+                       continue;
+               /* skip targets that were already halted */
+               if (curr->state == TARGET_HALTED)
+                       continue;
+               /* Skip gdb_target; it alerts GDB so has to be polled as last one */
+               if (curr == gdb_target)
+                       continue;
+
+               /* avoid recursion in cortex_a_poll() */
+               curr->smp = 0;
+               cortex_a_poll(curr);
+               curr->smp = 1;
+       }
+
+       /* after all targets were updated, poll the gdb serving target */
+       if (gdb_target != NULL && gdb_target != target)
+               cortex_a_poll(gdb_target);
        return retval;
 }
 
 /*
- * Cortex-A8 Run control
+ * Cortex-A Run control
  */
 
-static int cortex_a8_poll(struct target *target)
+static int cortex_a_poll(struct target *target)
 {
        int retval = ERROR_OK;
        uint32_t dscr;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
        enum target_state prev_target_state = target->state;
        /*  toggle to another core is done by gdb as follow */
        /*  maint packet J core_id */
@@ -794,93 +708,66 @@ static int cortex_a8_poll(struct target *target)
                (target->gdb_service) &&
                (target->gdb_service->target == NULL)) {
                target->gdb_service->target =
-                       get_cortex_a8(target, target->gdb_service->core[1]);
+                       get_cortex_a(target, target->gdb_service->core[1]);
                target_call_event_callbacks(target, TARGET_EVENT_HALTED);
                return retval;
        }
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
-       cortex_a8->cpudbg_dscr = dscr;
+       cortex_a->cpudbg_dscr = dscr;
 
        if (DSCR_RUN_MODE(dscr) == (DSCR_CORE_HALTED | DSCR_CORE_RESTARTED)) {
                if (prev_target_state != TARGET_HALTED) {
                        /* We have a halting debug event */
                        LOG_DEBUG("Target halted");
                        target->state = TARGET_HALTED;
-                       if ((prev_target_state == TARGET_RUNNING)
-                               || (prev_target_state == TARGET_UNKNOWN)
-                               || (prev_target_state == TARGET_RESET)) {
-                               retval = cortex_a8_debug_entry(target);
+
+                       retval = cortex_a_debug_entry(target);
+                       if (retval != ERROR_OK)
+                               return retval;
+
+                       if (target->smp) {
+                               retval = update_halt_gdb(target);
                                if (retval != ERROR_OK)
                                        return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
-                               target_call_event_callbacks(target,
-                                       TARGET_EVENT_HALTED);
                        }
-                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
-                               LOG_DEBUG(" ");
 
-                               retval = cortex_a8_debug_entry(target);
-                               if (retval != ERROR_OK)
+                       if (prev_target_state == TARGET_DEBUG_RUNNING) {
+                               target_call_event_callbacks(target, TARGET_EVENT_DEBUG_HALTED);
+                       } else { /* prev_target_state is RUNNING, UNKNOWN or RESET */
+                               if (arm_semihosting(target, &retval) != 0)
                                        return retval;
-                               if (target->smp) {
-                                       retval = update_halt_gdb(target);
-                                       if (retval != ERROR_OK)
-                                               return retval;
-                               }
 
                                target_call_event_callbacks(target,
-                                       TARGET_EVENT_DEBUG_HALTED);
+                                       TARGET_EVENT_HALTED);
                        }
                }
-       } else if (DSCR_RUN_MODE(dscr) == DSCR_CORE_RESTARTED)
+       } else
                target->state = TARGET_RUNNING;
-       else {
-               LOG_DEBUG("Unknown target state dscr = 0x%08" PRIx32, dscr);
-               target->state = TARGET_UNKNOWN;
-       }
 
        return retval;
 }
 
-static int cortex_a8_halt(struct target *target)
+static int cortex_a_halt(struct target *target)
 {
        int retval = ERROR_OK;
        uint32_t dscr;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
 
        /*
         * Tell the core to be halted by writing DRCR with 0x1
         * and then wait for the core to be halted.
         */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DRCR, DRCR_HALT);
        if (retval != ERROR_OK)
                return retval;
 
-       /*
-        * enter halting debug mode
-        */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
-       if (retval != ERROR_OK)
-               return retval;
-
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, dscr | DSCR_HALT_DBG_MODE);
-       if (retval != ERROR_OK)
-               return retval;
-
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        for (;; ) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
@@ -897,8 +784,8 @@ static int cortex_a8_halt(struct target *target)
        return ERROR_OK;
 }
 
-static int cortex_a8_internal_restore(struct target *target, int current,
-       uint32_t *address, int handle_breakpoints, int debug_execution)
+static int cortex_a_internal_restore(struct target *target, int current,
+       target_addr_t *address, int handle_breakpoints, int debug_execution)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct arm *arm = &armv7a->arm;
@@ -917,15 +804,15 @@ static int cortex_a8_internal_restore(struct target *target, int current,
                 * C_MASKINTS in parallel with disabled interrupts can cause
                 * local faults to not be taken. */
                buf_set_u32(armv7m->core_cache->reg_list[ARMV7M_PRIMASK].value, 0, 32, 1);
-               armv7m->core_cache->reg_list[ARMV7M_PRIMASK].dirty = 1;
-               armv7m->core_cache->reg_list[ARMV7M_PRIMASK].valid = 1;
+               armv7m->core_cache->reg_list[ARMV7M_PRIMASK].dirty = true;
+               armv7m->core_cache->reg_list[ARMV7M_PRIMASK].valid = true;
 
                /* Make sure we are in Thumb mode */
                buf_set_u32(armv7m->core_cache->reg_list[ARMV7M_xPSR].value, 0, 32,
                        buf_get_u32(armv7m->core_cache->reg_list[ARMV7M_xPSR].value, 0,
                        32) | (1 << 24));
-               armv7m->core_cache->reg_list[ARMV7M_xPSR].dirty = 1;
-               armv7m->core_cache->reg_list[ARMV7M_xPSR].valid = 1;
+               armv7m->core_cache->reg_list[ARMV7M_xPSR].dirty = true;
+               armv7m->core_cache->reg_list[ARMV7M_xPSR].valid = true;
        }
 #endif
 
@@ -953,19 +840,23 @@ static int cortex_a8_internal_restore(struct target *target, int current,
                case ARM_STATE_JAZELLE:
                        LOG_ERROR("How do I resume into Jazelle state??");
                        return ERROR_FAIL;
+               case ARM_STATE_AARCH64:
+                       LOG_ERROR("Shoudn't be in AARCH64 state");
+                       return ERROR_FAIL;
        }
        LOG_DEBUG("resume pc = 0x%08" PRIx32, resume_pc);
        buf_set_u32(arm->pc->value, 0, 32, resume_pc);
-       arm->pc->dirty = 1;
-       arm->pc->valid = 1;
+       arm->pc->dirty = true;
+       arm->pc->valid = true;
+
        /* restore dpm_mode at system halt */
-       dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
+       arm_dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
        /* called it now before restoring context because it uses cpu
         * register r0 for restoring cp15 control register */
-       retval = cortex_a8_restore_cp15_control_reg(target);
+       retval = cortex_a_restore_cp15_control_reg(target);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_restore_context(target, handle_breakpoints);
+       retval = cortex_a_restore_context(target, handle_breakpoints);
        if (retval != ERROR_OK)
                return retval;
        target->debug_reason = DBG_REASON_NOTHALTED;
@@ -991,11 +882,10 @@ static int cortex_a8_internal_restore(struct target *target, int current,
        return retval;
 }
 
-static int cortex_a8_internal_restart(struct target *target)
+static int cortex_a_internal_restart(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct arm *arm = &armv7a->arm;
-       struct adiv5_dap *swjdp = arm->dap;
        int retval;
        uint32_t dscr;
        /*
@@ -1006,7 +896,7 @@ static int cortex_a8_internal_restart(struct target *target)
         * disable IRQs by default, with optional override...
         */
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
@@ -1014,20 +904,20 @@ static int cortex_a8_internal_restart(struct target *target)
        if ((dscr & DSCR_INSTR_COMP) == 0)
                LOG_ERROR("DSCR InstrCompl must be set before leaving debug!");
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, dscr & ~DSCR_ITR_EN);
        if (retval != ERROR_OK)
                return retval;
 
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DRCR, DRCR_RESTART |
                        DRCR_CLEAR_EXCEPTIONS);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        for (;; ) {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
                if (retval != ERROR_OK)
                        return retval;
@@ -1048,20 +938,21 @@ static int cortex_a8_internal_restart(struct target *target)
        return ERROR_OK;
 }
 
-static int cortex_a8_restore_smp(struct target *target, int handle_breakpoints)
+static int cortex_a_restore_smp(struct target *target, int handle_breakpoints)
 {
        int retval = 0;
        struct target_list *head;
        struct target *curr;
-       uint32_t address;
+       target_addr_t address;
        head = target->head;
        while (head != (struct target_list *)NULL) {
                curr = head->target;
-               if ((curr != target) && (curr->state != TARGET_RUNNING)) {
+               if ((curr != target) && (curr->state != TARGET_RUNNING)
+                       && target_was_examined(curr)) {
                        /*  resume current address , not in step mode */
-                       retval += cortex_a8_internal_restore(curr, 1, &address,
+                       retval += cortex_a_internal_restore(curr, 1, &address,
                                        handle_breakpoints, 0);
-                       retval += cortex_a8_internal_restart(curr);
+                       retval += cortex_a_internal_restart(curr);
                }
                head = head->next;
 
@@ -1069,8 +960,8 @@ static int cortex_a8_restore_smp(struct target *target, int handle_breakpoints)
        return retval;
 }
 
-static int cortex_a8_resume(struct target *target, int current,
-       uint32_t address, int handle_breakpoints, int debug_execution)
+static int cortex_a_resume(struct target *target, int current,
+       target_addr_t address, int handle_breakpoints, int debug_execution)
 {
        int retval = 0;
        /* dummy resume for smp toggle in order to reduce gdb impact  */
@@ -1082,68 +973,64 @@ static int cortex_a8_resume(struct target *target, int current,
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
                return 0;
        }
-       cortex_a8_internal_restore(target, current, &address, handle_breakpoints, debug_execution);
+       cortex_a_internal_restore(target, current, &address, handle_breakpoints, debug_execution);
        if (target->smp) {
                target->gdb_service->core[0] = -1;
-               retval = cortex_a8_restore_smp(target, handle_breakpoints);
+               retval = cortex_a_restore_smp(target, handle_breakpoints);
                if (retval != ERROR_OK)
                        return retval;
        }
-       cortex_a8_internal_restart(target);
+       cortex_a_internal_restart(target);
 
        if (!debug_execution) {
                target->state = TARGET_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_RESUMED);
-               LOG_DEBUG("target resumed at 0x%" PRIx32, address);
+               LOG_DEBUG("target resumed at " TARGET_ADDR_FMT, address);
        } else {
                target->state = TARGET_DEBUG_RUNNING;
                target_call_event_callbacks(target, TARGET_EVENT_DEBUG_RESUMED);
-               LOG_DEBUG("target debug resumed at 0x%" PRIx32, address);
+               LOG_DEBUG("target debug resumed at " TARGET_ADDR_FMT, address);
        }
 
        return ERROR_OK;
 }
 
-static int cortex_a8_debug_entry(struct target *target)
+static int cortex_a_debug_entry(struct target *target)
 {
-       int i;
-       uint32_t regfile[16], cpsr, dscr;
+       uint32_t dscr;
        int retval = ERROR_OK;
-       struct working_area *regfile_working_area = NULL;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct arm *arm = &armv7a->arm;
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       struct reg *reg;
 
-       LOG_DEBUG("dscr = 0x%08" PRIx32, cortex_a8->cpudbg_dscr);
+       LOG_DEBUG("dscr = 0x%08" PRIx32, cortex_a->cpudbg_dscr);
 
        /* REVISIT surely we should not re-read DSCR !! */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, &dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* REVISIT see A8 TRM 12.11.4 steps 2..3 -- make sure that any
+       /* REVISIT see A TRM 12.11.4 steps 2..3 -- make sure that any
         * imprecise data aborts get discarded by issuing a Data
         * Synchronization Barrier:  ARMV4_5_MCR(15, 0, 0, 7, 10, 4).
         */
 
        /* Enable the ITR execution once we are in debug mode */
        dscr |= DSCR_ITR_EN;
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_DSCR, dscr);
        if (retval != ERROR_OK)
                return retval;
 
        /* Examine debug reason */
-       arm_dpm_report_dscr(&armv7a->dpm, cortex_a8->cpudbg_dscr);
+       arm_dpm_report_dscr(&armv7a->dpm, cortex_a->cpudbg_dscr);
 
        /* save address of instruction that triggered the watchpoint? */
        if (target->debug_reason == DBG_REASON_WATCHPOINT) {
                uint32_t wfar;
 
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_WFAR,
                                &wfar);
                if (retval != ERROR_OK)
@@ -1151,66 +1038,28 @@ static int cortex_a8_debug_entry(struct target *target)
                arm_dpm_report_wfar(&armv7a->dpm, wfar);
        }
 
-       /* REVISIT fast_reg_read is never set ... */
-
-       /* Examine target state and mode */
-       if (cortex_a8->fast_reg_read)
-               target_alloc_working_area(target, 64, &regfile_working_area);
-
-       /* First load register acessible through core debug port*/
-       if (!regfile_working_area)
-               retval = arm_dpm_read_current_registers(&armv7a->dpm);
-       else {
-               retval = cortex_a8_read_regs_through_mem(target,
-                               regfile_working_area->address, regfile);
-
-               target_free_working_area(target, regfile_working_area);
-               if (retval != ERROR_OK)
-                       return retval;
+       /* First load register accessible through core debug port */
+       retval = arm_dpm_read_current_registers(&armv7a->dpm);
+       if (retval != ERROR_OK)
+               return retval;
 
-               /* read Current PSR */
-               retval = cortex_a8_dap_read_coreregister_u32(target, &cpsr, 16);
-               /*  store current cpsr */
+       if (arm->spsr) {
+               /* read SPSR */
+               retval = arm_dpm_read_reg(&armv7a->dpm, arm->spsr, 17);
                if (retval != ERROR_OK)
                        return retval;
-
-               LOG_DEBUG("cpsr: %8.8" PRIx32, cpsr);
-
-               arm_set_cpsr(arm, cpsr);
-
-               /* update cache */
-               for (i = 0; i <= ARM_PC; i++) {
-                       reg = arm_reg_current(arm, i);
-
-                       buf_set_u32(reg->value, 0, 32, regfile[i]);
-                       reg->valid = 1;
-                       reg->dirty = 0;
-               }
-
-               /* Fixup PC Resume Address */
-               if (cpsr & (1 << 5)) {
-                       /* T bit set for Thumb or ThumbEE state */
-                       regfile[ARM_PC] -= 4;
-               } else {
-                       /* ARM state */
-                       regfile[ARM_PC] -= 8;
-               }
-
-               reg = arm->pc;
-               buf_set_u32(reg->value, 0, 32, regfile[ARM_PC]);
-               reg->dirty = reg->valid;
        }
 
 #if 0
 /* TODO, Move this */
        uint32_t cp15_control_register, cp15_cacr, cp15_nacr;
-       cortex_a8_read_cp(target, &cp15_control_register, 15, 0, 1, 0, 0);
+       cortex_a_read_cp(target, &cp15_control_register, 15, 0, 1, 0, 0);
        LOG_DEBUG("cp15_control_register = 0x%08x", cp15_control_register);
 
-       cortex_a8_read_cp(target, &cp15_cacr, 15, 0, 1, 0, 2);
+       cortex_a_read_cp(target, &cp15_cacr, 15, 0, 1, 0, 2);
        LOG_DEBUG("cp15 Coprocessor Access Control Register = 0x%08x", cp15_cacr);
 
-       cortex_a8_read_cp(target, &cp15_nacr, 15, 0, 1, 1, 2);
+       cortex_a_read_cp(target, &cp15_nacr, 15, 0, 1, 1, 2);
        LOG_DEBUG("cp15 Nonsecure Access Control Register = 0x%08x", cp15_nacr);
 #endif
 
@@ -1225,43 +1074,79 @@ static int cortex_a8_debug_entry(struct target *target)
        return retval;
 }
 
-static int cortex_a8_post_debug_entry(struct target *target)
+static int cortex_a_post_debug_entry(struct target *target)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
        int retval;
 
        /* MRC p15,0,<Rt>,c1,c0,0 ; Read CP15 System Control Register */
        retval = armv7a->arm.mrc(target, 15,
                        0, 0,   /* op1, op2 */
                        1, 0,   /* CRn, CRm */
-                       &cortex_a8->cp15_control_reg);
+                       &cortex_a->cp15_control_reg);
        if (retval != ERROR_OK)
                return retval;
-       LOG_DEBUG("cp15_control_reg: %8.8" PRIx32, cortex_a8->cp15_control_reg);
-       cortex_a8->cp15_control_reg_curr = cortex_a8->cp15_control_reg;
+       LOG_DEBUG("cp15_control_reg: %8.8" PRIx32, cortex_a->cp15_control_reg);
+       cortex_a->cp15_control_reg_curr = cortex_a->cp15_control_reg;
+
+       if (!armv7a->is_armv7r)
+               armv7a_read_ttbcr(target);
 
-       if (armv7a->armv7a_mmu.armv7a_cache.ctype == -1)
+       if (armv7a->armv7a_mmu.armv7a_cache.info == -1)
                armv7a_identify_cache(target);
 
        if (armv7a->is_armv7r) {
                armv7a->armv7a_mmu.mmu_enabled = 0;
        } else {
                armv7a->armv7a_mmu.mmu_enabled =
-                       (cortex_a8->cp15_control_reg & 0x1U) ? 1 : 0;
+                       (cortex_a->cp15_control_reg & 0x1U) ? 1 : 0;
        }
        armv7a->armv7a_mmu.armv7a_cache.d_u_cache_enabled =
-               (cortex_a8->cp15_control_reg & 0x4U) ? 1 : 0;
+               (cortex_a->cp15_control_reg & 0x4U) ? 1 : 0;
        armv7a->armv7a_mmu.armv7a_cache.i_cache_enabled =
-               (cortex_a8->cp15_control_reg & 0x1000U) ? 1 : 0;
-       cortex_a8->curr_mode = armv7a->arm.core_mode;
+               (cortex_a->cp15_control_reg & 0x1000U) ? 1 : 0;
+       cortex_a->curr_mode = armv7a->arm.core_mode;
+
+       /* switch to SVC mode to read DACR */
+       arm_dpm_modeswitch(&armv7a->dpm, ARM_MODE_SVC);
+       armv7a->arm.mrc(target, 15,
+                       0, 0, 3, 0,
+                       &cortex_a->cp15_dacr_reg);
+
+       LOG_DEBUG("cp15_dacr_reg: %8.8" PRIx32,
+                       cortex_a->cp15_dacr_reg);
 
+       arm_dpm_modeswitch(&armv7a->dpm, ARM_MODE_ANY);
        return ERROR_OK;
 }
 
-static int cortex_a8_step(struct target *target, int current, uint32_t address,
+int cortex_a_set_dscr_bits(struct target *target, unsigned long bit_mask, unsigned long value)
+{
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       uint32_t dscr;
+
+       /* Read DSCR */
+       int retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (ERROR_OK != retval)
+               return retval;
+
+       /* clear bitfield */
+       dscr &= ~bit_mask;
+       /* put new value */
+       dscr |= value & bit_mask;
+
+       /* write new DSCR */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, dscr);
+       return retval;
+}
+
+static int cortex_a_step(struct target *target, int current, target_addr_t address,
        int handle_breakpoints)
 {
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
        struct armv7a_common *armv7a = target_to_armv7a(target);
        struct arm *arm = &armv7a->arm;
        struct breakpoint *breakpoint = NULL;
@@ -1282,35 +1167,43 @@ static int cortex_a8_step(struct target *target, int current, uint32_t address,
                address = buf_get_u32(r->value, 0, 32);
 
        /* The front-end may request us not to handle breakpoints.
-        * But since Cortex-A8 uses breakpoint for single step,
+        * But since Cortex-A uses breakpoint for single step,
         * we MUST handle breakpoints.
         */
        handle_breakpoints = 1;
        if (handle_breakpoints) {
                breakpoint = breakpoint_find(target, address);
                if (breakpoint)
-                       cortex_a8_unset_breakpoint(target, breakpoint);
+                       cortex_a_unset_breakpoint(target, breakpoint);
        }
 
        /* Setup single step breakpoint */
        stepbreakpoint.address = address;
+       stepbreakpoint.asid = 0;
        stepbreakpoint.length = (arm->core_state == ARM_STATE_THUMB)
                ? 2 : 4;
        stepbreakpoint.type = BKPT_HARD;
        stepbreakpoint.set = 0;
 
+       /* Disable interrupts during single step if requested */
+       if (cortex_a->isrmasking_mode == CORTEX_A_ISRMASK_ON) {
+               retval = cortex_a_set_dscr_bits(target, DSCR_INT_DIS, DSCR_INT_DIS);
+               if (ERROR_OK != retval)
+                       return retval;
+       }
+
        /* Break on IVA mismatch */
-       cortex_a8_set_breakpoint(target, &stepbreakpoint, 0x04);
+       cortex_a_set_breakpoint(target, &stepbreakpoint, 0x04);
 
        target->debug_reason = DBG_REASON_SINGLESTEP;
 
-       retval = cortex_a8_resume(target, 1, address, 0, 0);
+       retval = cortex_a_resume(target, 1, address, 0, 0);
        if (retval != ERROR_OK)
                return retval;
 
-       long long then = timeval_ms();
+       int64_t then = timeval_ms();
        while (target->state != TARGET_HALTED) {
-               retval = cortex_a8_poll(target);
+               retval = cortex_a_poll(target);
                if (retval != ERROR_OK)
                        return retval;
                if (timeval_ms() > then + 1000) {
@@ -1319,12 +1212,20 @@ static int cortex_a8_step(struct target *target, int current, uint32_t address,
                }
        }
 
-       cortex_a8_unset_breakpoint(target, &stepbreakpoint);
+       cortex_a_unset_breakpoint(target, &stepbreakpoint);
+
+       /* Re-enable interrupts if they were disabled */
+       if (cortex_a->isrmasking_mode == CORTEX_A_ISRMASK_ON) {
+               retval = cortex_a_set_dscr_bits(target, DSCR_INT_DIS, 0);
+               if (ERROR_OK != retval)
+                       return retval;
+       }
+
 
        target->debug_reason = DBG_REASON_BREAKPOINT;
 
        if (breakpoint)
-               cortex_a8_set_breakpoint(target, breakpoint, 0);
+               cortex_a_set_breakpoint(target, breakpoint, 0);
 
        if (target->state != TARGET_HALTED)
                LOG_DEBUG("target stepped");
@@ -1332,7 +1233,7 @@ static int cortex_a8_step(struct target *target, int current, uint32_t address,
        return ERROR_OK;
 }
 
-static int cortex_a8_restore_context(struct target *target, bool bpwp)
+static int cortex_a_restore_context(struct target *target, bool bpwp)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
 
@@ -1345,20 +1246,20 @@ static int cortex_a8_restore_context(struct target *target, bool bpwp)
 }
 
 /*
- * Cortex-A8 Breakpoint and watchpoint functions
+ * Cortex-A Breakpoint and watchpoint functions
  */
 
 /* Setup hardware Breakpoint Register Pair */
-static int cortex_a8_set_breakpoint(struct target *target,
+static int cortex_a_set_breakpoint(struct target *target,
        struct breakpoint *breakpoint, uint8_t matchmode)
 {
        int retval;
        int brp_i = 0;
        uint32_t control;
        uint8_t byte_addr_select = 0x0F;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp *brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1366,9 +1267,9 @@ static int cortex_a8_set_breakpoint(struct target *target,
        }
 
        if (breakpoint->type == BKPT_HARD) {
-               while (brp_list[brp_i].used && (brp_i < cortex_a8->brp_num))
+               while (brp_list[brp_i].used && (brp_i < cortex_a->brp_num))
                        brp_i++;
-               if (brp_i >= cortex_a8->brp_num) {
+               if (brp_i >= cortex_a->brp_num) {
                        LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                        return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
                }
@@ -1381,12 +1282,12 @@ static int cortex_a8_set_breakpoint(struct target *target,
                brp_list[brp_i].used = 1;
                brp_list[brp_i].value = (breakpoint->address & 0xFFFFFFFC);
                brp_list[brp_i].control = control;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+               retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].value);
                if (retval != ERROR_OK)
                        return retval;
-               retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+               retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                brp_list[brp_i].control);
                if (retval != ERROR_OK)
@@ -1396,37 +1297,63 @@ static int cortex_a8_set_breakpoint(struct target *target,
                        brp_list[brp_i].value);
        } else if (breakpoint->type == BKPT_SOFT) {
                uint8_t code[4];
+               /* length == 2: Thumb breakpoint */
                if (breakpoint->length == 2)
                        buf_set_u32(code, 0, 32, ARMV5_T_BKPT(0x11));
                else
+               /* length == 3: Thumb-2 breakpoint, actual encoding is
+                * a regular Thumb BKPT instruction but we replace a
+                * 32bit Thumb-2 instruction, so fix-up the breakpoint
+                * length
+                */
+               if (breakpoint->length == 3) {
+                       buf_set_u32(code, 0, 32, ARMV5_T_BKPT(0x11));
+                       breakpoint->length = 4;
+               } else
+                       /* length == 4, normal ARM breakpoint */
                        buf_set_u32(code, 0, 32, ARMV5_BKPT(0x11));
+
                retval = target_read_memory(target,
                                breakpoint->address & 0xFFFFFFFE,
                                breakpoint->length, 1,
                                breakpoint->orig_instr);
                if (retval != ERROR_OK)
                        return retval;
+
+               /* make sure data cache is cleaned & invalidated down to PoC */
+               if (!armv7a->armv7a_mmu.armv7a_cache.auto_cache_enabled) {
+                       armv7a_cache_flush_virt(target, breakpoint->address,
+                                               breakpoint->length);
+               }
+
                retval = target_write_memory(target,
                                breakpoint->address & 0xFFFFFFFE,
                                breakpoint->length, 1, code);
                if (retval != ERROR_OK)
                        return retval;
+
+               /* update i-cache at breakpoint location */
+               armv7a_l1_d_cache_inval_virt(target, breakpoint->address,
+                                       breakpoint->length);
+               armv7a_l1_i_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
+
                breakpoint->set = 0x11; /* Any nice value but 0 */
        }
 
        return ERROR_OK;
 }
 
-static int cortex_a8_set_context_breakpoint(struct target *target,
+static int cortex_a_set_context_breakpoint(struct target *target,
        struct breakpoint *breakpoint, uint8_t matchmode)
 {
        int retval = ERROR_FAIL;
        int brp_i = 0;
        uint32_t control;
        uint8_t byte_addr_select = 0x0F;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp *brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1434,10 +1361,10 @@ static int cortex_a8_set_context_breakpoint(struct target *target,
        }
        /*check available context BRPs*/
        while ((brp_list[brp_i].used ||
-               (brp_list[brp_i].type != BRP_CONTEXT)) && (brp_i < cortex_a8->brp_num))
+               (brp_list[brp_i].type != BRP_CONTEXT)) && (brp_i < cortex_a->brp_num))
                brp_i++;
 
-       if (brp_i >= cortex_a8->brp_num) {
+       if (brp_i >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
@@ -1449,12 +1376,12 @@ static int cortex_a8_set_context_breakpoint(struct target *target,
        brp_list[brp_i].used = 1;
        brp_list[brp_i].value = (breakpoint->asid);
        brp_list[brp_i].control = control;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].value);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                        brp_list[brp_i].control);
        if (retval != ERROR_OK)
@@ -1466,7 +1393,7 @@ static int cortex_a8_set_context_breakpoint(struct target *target,
 
 }
 
-static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_set_hybrid_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
        int retval = ERROR_FAIL;
        int brp_1 = 0;  /* holds the contextID pair */
@@ -1476,9 +1403,9 @@ static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpo
        uint8_t IVA_byte_addr_select = 0x0F;
        uint8_t CTX_machmode = 0x03;
        uint8_t IVA_machmode = 0x01;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp *brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
        if (breakpoint->set) {
                LOG_WARNING("breakpoint already set");
@@ -1486,21 +1413,21 @@ static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpo
        }
        /*check available context BRPs*/
        while ((brp_list[brp_1].used ||
-               (brp_list[brp_1].type != BRP_CONTEXT)) && (brp_1 < cortex_a8->brp_num))
+               (brp_list[brp_1].type != BRP_CONTEXT)) && (brp_1 < cortex_a->brp_num))
                brp_1++;
 
        printf("brp(CTX) found num: %d\n", brp_1);
-       if (brp_1 >= cortex_a8->brp_num) {
+       if (brp_1 >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
 
        while ((brp_list[brp_2].used ||
-               (brp_list[brp_2].type != BRP_NORMAL)) && (brp_2 < cortex_a8->brp_num))
+               (brp_list[brp_2].type != BRP_NORMAL)) && (brp_2 < cortex_a->brp_num))
                brp_2++;
 
        printf("brp(IVA) found num: %d\n", brp_2);
-       if (brp_2 >= cortex_a8->brp_num) {
+       if (brp_2 >= cortex_a->brp_num) {
                LOG_ERROR("ERROR Can not find free Breakpoint Register Pair");
                return ERROR_FAIL;
        }
@@ -1515,12 +1442,12 @@ static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpo
        brp_list[brp_1].used = 1;
        brp_list[brp_1].value = (breakpoint->asid);
        brp_list[brp_1].control = control_CTX;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].value);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_1].BRPn,
                        brp_list[brp_1].control);
        if (retval != ERROR_OK)
@@ -1533,12 +1460,12 @@ static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpo
        brp_list[brp_2].used = 1;
        brp_list[brp_2].value = (breakpoint->address & 0xFFFFFFFC);
        brp_list[brp_2].control = control_IVA;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_2].BRPn,
                        brp_list[brp_2].value);
        if (retval != ERROR_OK)
                return retval;
-       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_2].BRPn,
                        brp_list[brp_2].control);
        if (retval != ERROR_OK)
@@ -1547,12 +1474,12 @@ static int cortex_a8_set_hybrid_breakpoint(struct target *target, struct breakpo
        return ERROR_OK;
 }
 
-static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_unset_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
        int retval;
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct cortex_a8_brp *brp_list = cortex_a8->brp_list;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct cortex_a_brp *brp_list = cortex_a->brp_list;
 
        if (!breakpoint->set) {
                LOG_WARNING("breakpoint not set");
@@ -1563,7 +1490,7 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                if ((breakpoint->address != 0) && (breakpoint->asid != 0)) {
                        int brp_i = breakpoint->set - 1;
                        int brp_j = breakpoint->linked_BRP;
-                       if ((brp_i < 0) || (brp_i >= cortex_a8->brp_num)) {
+                       if ((brp_i < 0) || (brp_i >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
@@ -1572,17 +1499,17 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        brp_list[brp_i].used = 0;
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
                                return retval;
-                       if ((brp_j < 0) || (brp_j >= cortex_a8->brp_num)) {
+                       if ((brp_j < 0) || (brp_j >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
@@ -1591,12 +1518,12 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        brp_list[brp_j].used = 0;
                        brp_list[brp_j].value = 0;
                        brp_list[brp_j].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_j].BRPn,
                                        brp_list[brp_j].value);
                        if (retval != ERROR_OK)
@@ -1607,7 +1534,7 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
 
                } else {
                        int brp_i = breakpoint->set - 1;
-                       if ((brp_i < 0) || (brp_i >= cortex_a8->brp_num)) {
+                       if ((brp_i < 0) || (brp_i >= cortex_a->brp_num)) {
                                LOG_DEBUG("Invalid BRP number in breakpoint");
                                return ERROR_OK;
                        }
@@ -1616,12 +1543,12 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        brp_list[brp_i].used = 0;
                        brp_list[brp_i].value = 0;
                        brp_list[brp_i].control = 0;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BCR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].control);
                        if (retval != ERROR_OK)
                                return retval;
-                       retval = cortex_a8_dap_write_memap_register_u32(target, armv7a->debug_base
+                       retval = cortex_a_dap_write_memap_register_u32(target, armv7a->debug_base
                                        + CPUDBG_BVR_BASE + 4 * brp_list[brp_i].BRPn,
                                        brp_list[brp_i].value);
                        if (retval != ERROR_OK)
@@ -1630,6 +1557,13 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        return ERROR_OK;
                }
        } else {
+
+               /* make sure data cache is cleaned & invalidated down to PoC */
+               if (!armv7a->armv7a_mmu.armv7a_cache.auto_cache_enabled) {
+                       armv7a_cache_flush_virt(target, breakpoint->address,
+                                               breakpoint->length);
+               }
+
                /* restore original instruction (kept in target endianness) */
                if (breakpoint->length == 4) {
                        retval = target_write_memory(target,
@@ -1644,64 +1578,70 @@ static int cortex_a8_unset_breakpoint(struct target *target, struct breakpoint *
                        if (retval != ERROR_OK)
                                return retval;
                }
+
+               /* update i-cache at breakpoint location */
+               armv7a_l1_d_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
+               armv7a_l1_i_cache_inval_virt(target, breakpoint->address,
+                                                breakpoint->length);
        }
        breakpoint->set = 0;
 
        return ERROR_OK;
 }
 
-static int cortex_a8_add_breakpoint(struct target *target,
+static int cortex_a_add_breakpoint(struct target *target,
        struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1)) {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_breakpoint(target, breakpoint, 0x00);      /* Exact match */
+       return cortex_a_set_breakpoint(target, breakpoint, 0x00);       /* Exact match */
 }
 
-static int cortex_a8_add_context_breakpoint(struct target *target,
+static int cortex_a_add_context_breakpoint(struct target *target,
        struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1)) {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_context_breakpoint(target, breakpoint, 0x02);      /* asid match */
+       return cortex_a_set_context_breakpoint(target, breakpoint, 0x02);       /* asid match */
 }
 
-static int cortex_a8_add_hybrid_breakpoint(struct target *target,
+static int cortex_a_add_hybrid_breakpoint(struct target *target,
        struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
-       if ((breakpoint->type == BKPT_HARD) && (cortex_a8->brp_num_available < 1)) {
+       if ((breakpoint->type == BKPT_HARD) && (cortex_a->brp_num_available < 1)) {
                LOG_INFO("no hardware breakpoint available");
                return ERROR_TARGET_RESOURCE_NOT_AVAILABLE;
        }
 
        if (breakpoint->type == BKPT_HARD)
-               cortex_a8->brp_num_available--;
+               cortex_a->brp_num_available--;
 
-       return cortex_a8_set_hybrid_breakpoint(target, breakpoint);     /* ??? */
+       return cortex_a_set_hybrid_breakpoint(target, breakpoint);      /* ??? */
 }
 
 
-static int cortex_a8_remove_breakpoint(struct target *target, struct breakpoint *breakpoint)
+static int cortex_a_remove_breakpoint(struct target *target, struct breakpoint *breakpoint)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
 
 #if 0
 /* It is perfectly possible to remove breakpoints while the target is running */
@@ -1712,9 +1652,9 @@ static int cortex_a8_remove_breakpoint(struct target *target, struct breakpoint
 #endif
 
        if (breakpoint->set) {
-               cortex_a8_unset_breakpoint(target, breakpoint);
+               cortex_a_unset_breakpoint(target, breakpoint);
                if (breakpoint->type == BKPT_HARD)
-                       cortex_a8->brp_num_available++;
+                       cortex_a->brp_num_available++;
        }
 
 
@@ -1722,10 +1662,10 @@ static int cortex_a8_remove_breakpoint(struct target *target, struct breakpoint
 }
 
 /*
- * Cortex-A8 Reset functions
+ * Cortex-A Reset functions
  */
 
-static int cortex_a8_assert_reset(struct target *target)
+static int cortex_a_assert_reset(struct target *target)
 {
        struct armv7a_common *armv7a = target_to_armv7a(target);
 
@@ -1733,6 +1673,8 @@ static int cortex_a8_assert_reset(struct target *target)
 
        /* FIXME when halt is requested, make it work somehow... */
 
+       /* This function can be called in "target not examined" state */
+
        /* Issue some kind of warm reset. */
        if (target_has_event_action(target, TARGET_EVENT_RESET_ASSERT))
                target_handle_event(target, TARGET_EVENT_RESET_ASSERT);
@@ -1740,21 +1682,30 @@ static int cortex_a8_assert_reset(struct target *target)
                /* REVISIT handle "pulls" cases, if there's
                 * hardware that needs them to work.
                 */
-               jtag_add_reset(0, 1);
+
+               /*
+                * FIXME: fix reset when transport is SWD. This is a temporary
+                * work-around for release v0.10 that is not intended to stay!
+                */
+               if (transport_is_swd() ||
+                               (target->reset_halt && (jtag_get_reset_config() & RESET_SRST_NO_GATING)))
+                       jtag_add_reset(0, 1);
+
        } else {
                LOG_ERROR("%s: how to reset?", target_name(target));
                return ERROR_FAIL;
        }
 
        /* registers are now invalid */
-       register_cache_invalidate(armv7a->arm.core_cache);
+       if (target_was_examined(target))
+               register_cache_invalidate(armv7a->arm.core_cache);
 
        target->state = TARGET_RESET;
 
        return ERROR_OK;
 }
 
-static int cortex_a8_deassert_reset(struct target *target)
+static int cortex_a_deassert_reset(struct target *target)
 {
        int retval;
 
@@ -1763,590 +1714,888 @@ static int cortex_a8_deassert_reset(struct target *target)
        /* be certain SRST is off */
        jtag_add_reset(0, 0);
 
-       retval = cortex_a8_poll(target);
-       if (retval != ERROR_OK)
-               return retval;
+       if (target_was_examined(target)) {
+               retval = cortex_a_poll(target);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
        if (target->reset_halt) {
                if (target->state != TARGET_HALTED) {
                        LOG_WARNING("%s: ran after reset and before halt ...",
                                target_name(target));
-                       retval = target_halt(target);
-                       if (retval != ERROR_OK)
-                               return retval;
+                       if (target_was_examined(target)) {
+                               retval = target_halt(target);
+                               if (retval != ERROR_OK)
+                                       return retval;
+                       } else
+                               target->state = TARGET_UNKNOWN;
                }
        }
 
        return ERROR_OK;
 }
 
-static int cortex_a8_write_apb_ab_memory(struct target *target,
-       uint32_t address, uint32_t size,
-       uint32_t count, const uint8_t *buffer)
+static int cortex_a_set_dcc_mode(struct target *target, uint32_t mode, uint32_t *dscr)
 {
-       /* write memory through APB-AP */
+       /* Changes the mode of the DCC between non-blocking, stall, and fast mode.
+        * New desired mode must be in mode. Current value of DSCR must be in
+        * *dscr, which is updated with new value.
+        *
+        * This function elides actually sending the mode-change over the debug
+        * interface if the mode is already set as desired.
+        */
+       uint32_t new_dscr = (*dscr & ~DSCR_EXT_DCC_MASK) | mode;
+       if (new_dscr != *dscr) {
+               struct armv7a_common *armv7a = target_to_armv7a(target);
+               int retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, new_dscr);
+               if (retval == ERROR_OK)
+                       *dscr = new_dscr;
+               return retval;
+       } else {
+               return ERROR_OK;
+       }
+}
 
-       int retval = ERROR_COMMAND_SYNTAX_ERROR;
+static int cortex_a_wait_dscr_bits(struct target *target, uint32_t mask,
+       uint32_t value, uint32_t *dscr)
+{
+       /* Waits until the specified bit(s) of DSCR take on a specified value. */
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct arm *arm = &armv7a->arm;
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       int end_byte   = (address + total_bytes) & 0x3;
-       struct reg *reg;
-       uint32_t dscr;
-       uint8_t *tmp_buff = NULL;
+       int64_t then = timeval_ms();
+       int retval;
 
-       if (target->state != TARGET_HALTED) {
-               LOG_WARNING("target not halted");
-               return ERROR_TARGET_NOT_HALTED;
+       while ((*dscr & mask) != value) {
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (timeval_ms() > then + 1000) {
+                       LOG_ERROR("timeout waiting for DSCR bit change");
+                       return ERROR_FAIL;
+               }
        }
+       return ERROR_OK;
+}
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
+static int cortex_a_read_copro(struct target *target, uint32_t opcode,
+       uint32_t *data, uint32_t *dscr)
+{
+       int retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
 
-       /* Mark register R0 as dirty, as it will be used
-        * for transferring the data.
-        * It will be restored automatically when exiting
-        * debug mode
-        */
-       reg = arm_reg_current(arm, 0);
-       reg->dirty = true;
+       /* Move from coprocessor to R0. */
+       retval = cortex_a_exec_opcode(target, opcode, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /*  clear any abort  */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, armv7a->debug_base + CPUDBG_DRCR, 1<<2);
+       /* Move from R0 to DTRTX. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MCR(14, 0, 0, 0, 5, 0), dscr);
        if (retval != ERROR_OK)
                return retval;
 
-       /* This algorithm comes from either :
-        * Cortex-A8 TRM Example 12-25
-        * Cortex-R4 TRM Example 11-26
-        * (slight differences)
-        */
+       /* Wait until DTRTX is full (according to ARMv7-A/-R architecture
+        * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+        * must also check TXfull_l). Most of the time this will be free
+        * because TXfull_l will be set immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                       DSCR_DTRTX_FULL_LATCHED, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* The algorithm only copies 32 bit words, so the buffer
-        * should be expanded to include the words at either end.
-        * The first and last words will be read first to avoid
-        * corruption if needed.
-        */
-       tmp_buff = (uint8_t *) malloc(total_u32 << 2);
+       /* Read the value transferred to DTRTX. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRTX, data);
+       if (retval != ERROR_OK)
+               return retval;
+
+       return ERROR_OK;
+}
 
+static int cortex_a_read_dfar_dfsr(struct target *target, uint32_t *dfar,
+       uint32_t *dfsr, uint32_t *dscr)
+{
+       int retval;
 
-       if ((start_byte != 0) && (total_u32 > 1)) {
-               /* First bytes not aligned - read the 32 bit word to avoid corrupting
-                * the other bytes in the word.
-                */
-               retval = cortex_a8_read_apb_ab_memory(target, (address & ~0x3), 4, 1, tmp_buff);
+       if (dfar) {
+               retval = cortex_a_read_copro(target, ARMV4_5_MRC(15, 0, 0, 6, 0, 0), dfar, dscr);
                if (retval != ERROR_OK)
-                       goto error_free_buff_w;
+                       return retval;
        }
 
-       /* If end of write is not aligned, or the write is less than 4 bytes */
-       if ((end_byte != 0) ||
-               ((total_u32 == 1) && (total_bytes != 4))) {
-
-               /* Read the last word to avoid corruption during 32 bit write */
-               int mem_offset = (total_u32-1) << 4;
-               retval = cortex_a8_read_apb_ab_memory(target, (address & ~0x3) + mem_offset, 4, 1, &tmp_buff[mem_offset]);
+       if (dfsr) {
+               retval = cortex_a_read_copro(target, ARMV4_5_MRC(15, 0, 0, 5, 0, 0), dfsr, dscr);
                if (retval != ERROR_OK)
-                       goto error_free_buff_w;
+                       return retval;
        }
 
-       /* Copy the write buffer over the top of the temporary buffer */
-       memcpy(&tmp_buff[start_byte], buffer, total_bytes);
+       return ERROR_OK;
+}
 
-       /* We now have a 32 bit aligned buffer that can be written */
+static int cortex_a_write_copro(struct target *target, uint32_t opcode,
+       uint32_t data, uint32_t *dscr)
+{
+       int retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
 
-       /* Read DSCR */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       /* Write the value into DTRRX. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, data);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
-       /* Set DTR mode to Fast (2) */
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_FAST_MODE;
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, dscr);
+       /* Move from DTRRX to R0. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
+               return retval;
 
-       /* Copy the destination address into R0 */
-       /*  - pend an instruction  MRC p14, 0, R0, c5, c0 */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_ITR, ARMV4_5_MRC(14, 0, 0, 0, 5, 0));
+       /* Move from R0 to coprocessor. */
+       retval = cortex_a_exec_opcode(target, opcode, dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
-       /* Write address into DTRRX, which triggers previous instruction */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DTRRX, address & (~0x3));
+               return retval;
+
+       /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check RXfull_l). Most of the time this will be free because RXfull_l
+        * will be cleared immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Write the data transfer instruction into the ITR
-        * (STC p14, c5, [R0], 4)
-        */
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_ITR, ARMV4_5_STC(0, 1, 0, 1, 14, 5, 0, 4));
+       return ERROR_OK;
+}
+
+static int cortex_a_write_dfar_dfsr(struct target *target, uint32_t dfar,
+       uint32_t dfsr, uint32_t *dscr)
+{
+       int retval;
+
+       retval = cortex_a_write_copro(target, ARMV4_5_MCR(15, 0, 0, 6, 0, 0), dfar, dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
-       /* Do the write */
-       retval = mem_ap_sel_write_buf_u32_noincr(swjdp, armv7a->debug_ap,
-                                       tmp_buff, (total_u32)<<2, armv7a->debug_base + CPUDBG_DTRRX);
+       retval = cortex_a_write_copro(target, ARMV4_5_MCR(15, 0, 0, 5, 0, 0), dfsr, dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+               return retval;
 
+       return ERROR_OK;
+}
 
-       /* Switch DTR mode back to non-blocking (0) */
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       retval = mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, dscr);
-       if (retval != ERROR_OK)
-               goto error_unset_dtr_w;
+static int cortex_a_dfsr_to_error_code(uint32_t dfsr)
+{
+       uint32_t status, upper4;
 
-    /* Check for sticky abort flags in the DSCR */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (dfsr & (1 << 9)) {
+               /* LPAE format. */
+               status = dfsr & 0x3f;
+               upper4 = status >> 2;
+               if (upper4 == 1 || upper4 == 2 || upper4 == 3 || upper4 == 15)
+                       return ERROR_TARGET_TRANSLATION_FAULT;
+               else if (status == 33)
+                       return ERROR_TARGET_UNALIGNED_ACCESS;
+               else
+                       return ERROR_TARGET_DATA_ABORT;
+       } else {
+               /* Normal format. */
+               status = ((dfsr >> 6) & 0x10) | (dfsr & 0xf);
+               if (status == 1)
+                       return ERROR_TARGET_UNALIGNED_ACCESS;
+               else if (status == 5 || status == 7 || status == 3 || status == 6 ||
+                               status == 9 || status == 11 || status == 13 || status == 15)
+                       return ERROR_TARGET_TRANSLATION_FAULT;
+               else
+                       return ERROR_TARGET_DATA_ABORT;
+       }
+}
+
+static int cortex_a_write_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       /* Writes count objects of size size from *buffer. Old value of DSCR must
+        * be in *dscr; updated to new value. This is slow because it works for
+        * non-word-sized objects and (maybe) unaligned accesses. If size == 4 and
+        * the address is aligned, cortex_a_write_cpu_memory_fast should be
+        * preferred.
+        * Preconditions:
+        * - Address is in R0.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       int retval;
+
+       /* Mark register R1 as dirty, to use for transferring data. */
+       arm_reg_current(arm, 1)->dirty = true;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_w;
-       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
-               /* Abort occurred - clear it and exit */
-               LOG_ERROR("abort occurred - dscr = 0x%08x", dscr);
-               mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                                       armv7a->debug_base + CPUDBG_DRCR, 1<<2);
-               goto error_free_buff_w;
+               return retval;
+
+       /* Go through the objects. */
+       while (count) {
+               /* Write the value to store into DTRRX. */
+               uint32_t data, opcode;
+               if (size == 1)
+                       data = *buffer;
+               else if (size == 2)
+                       data = target_buffer_get_u16(target, buffer);
+               else
+                       data = target_buffer_get_u32(target, buffer);
+               retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRRX, data);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Transfer the value from DTRRX to R1. */
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Write the value transferred to R1 into memory. */
+               if (size == 1)
+                       opcode = ARMV4_5_STRB_IP(1, 0);
+               else if (size == 2)
+                       opcode = ARMV4_5_STRH_IP(1, 0);
+               else
+                       opcode = ARMV4_5_STRW_IP(1, 0);
+               retval = cortex_a_exec_opcode(target, opcode, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Check for faults and return early. */
+               if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+                       return ERROR_OK; /* A data fault is not considered a system failure. */
+
+               /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture
+                * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+                * must also check RXfull_l). Most of the time this will be free
+                * because RXfull_l will be cleared immediately and cached in dscr. */
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Advance. */
+               buffer += size;
+               --count;
        }
 
-       /* Done */
-       free(tmp_buff);
        return ERROR_OK;
+}
 
-error_unset_dtr_w:
-       /* Unset DTR mode */
-       mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, dscr);
-error_free_buff_w:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+static int cortex_a_write_cpu_memory_fast(struct target *target,
+       uint32_t count, const uint8_t *buffer, uint32_t *dscr)
+{
+       /* Writes count objects of size 4 from *buffer. Old value of DSCR must be
+        * in *dscr; updated to new value. This is fast but only works for
+        * word-sized objects at aligned addresses.
+        * Preconditions:
+        * - Address is in R0 and must be a multiple of 4.
+        * - R0 is marked dirty.
+        */
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       int retval;
+
+       /* Switch to fast mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_FAST_MODE, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Latch STC instruction. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_ITR, ARMV4_5_STC(0, 1, 0, 1, 14, 5, 0, 4));
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Transfer all the data and issue all the instructions. */
+       return mem_ap_write_buf_noincr(armv7a->debug_ap, buffer,
+                       4, count, armv7a->debug_base + CPUDBG_DTRRX);
 }
 
-static int cortex_a8_read_apb_ab_memory(struct target *target,
+static int cortex_a_write_cpu_memory(struct target *target,
        uint32_t address, uint32_t size,
-       uint32_t count, uint8_t *buffer)
+       uint32_t count, const uint8_t *buffer)
 {
-       /* read memory through APB-AP */
-
-       int retval = ERROR_COMMAND_SYNTAX_ERROR;
+       /* Write memory through the CPU. */
+       int retval, final_retval;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
        struct arm *arm = &armv7a->arm;
-       int total_bytes = count * size;
-       int total_u32;
-       int start_byte = address & 0x3;
-       struct reg *reg;
-       uint32_t dscr;
-       uint32_t *tmp_buff;
-       uint32_t buff32[2];
+       uint32_t dscr, orig_dfar, orig_dfsr, fault_dscr, fault_dfar, fault_dfsr;
+
+       LOG_DEBUG("Writing CPU memory address 0x%" PRIx32 " size %"  PRIu32 " count %"  PRIu32,
+                         address, size, count);
        if (target->state != TARGET_HALTED) {
                LOG_WARNING("target not halted");
                return ERROR_TARGET_NOT_HALTED;
        }
 
-       total_u32 = DIV_ROUND_UP((address & 3) + total_bytes, 4);
+       if (!count)
+               return ERROR_OK;
 
-       /* Due to offset word alignment, the  buffer may not have space
-        * to read the full first and last int32 words,
-        * hence, malloc space to read into, then copy and align into the buffer.
-        */
-       tmp_buff = malloc(total_u32 * 4);
-       if (tmp_buff == NULL)
-               return ERROR_FAIL;
+       /* Clear any abort. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /* Mark register R0 as dirty, as it will be used
-        * for transferring the data.
-        * It will be restored automatically when exiting
-        * debug mode
-        */
-       reg = arm_reg_current(arm, 0);
-       reg->dirty = true;
+       /* Read DSCR. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       /*  clear any abort  */
-       retval =
-               mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap, armv7a->debug_base + CPUDBG_DRCR, 1<<2);
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               goto out;
 
-       /* Read DSCR */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       /* Mark R0 as dirty. */
+       arm_reg_current(arm, 0)->dirty = true;
 
-       /* This algorithm comes from either :
-        * Cortex-A8 TRM Example 12-24
-        * Cortex-R4 TRM Example 11-25
-        * (slight differences)
-        */
+       /* Read DFAR and DFSR, as they will be modified in the event of a fault. */
+       retval = cortex_a_read_dfar_dfsr(target, &orig_dfar, &orig_dfsr, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-       /* Set DTR access mode to stall mode b01  */
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_STALL_MODE;
-       retval +=  mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DSCR, dscr);
+       /* Get the memory address into R0. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, address);
+       if (retval != ERROR_OK)
+               goto out;
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
+       if (retval != ERROR_OK)
+               goto out;
+
+       if (size == 4 && (address % 4) == 0) {
+               /* We are doing a word-aligned transfer, so use fast mode. */
+               retval = cortex_a_write_cpu_memory_fast(target, count, buffer, &dscr);
+       } else {
+               /* Use slow path. */
+               retval = cortex_a_write_cpu_memory_slow(target, size, count, buffer, &dscr);
+       }
+
+out:
+       final_retval = retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, &dscr, true);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait until DTRRX is empty (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check RXfull_l). Most of the time this will be free because RXfull_l
+        * will be cleared immediately and cached in dscr. However, don't do this
+        * if there is fault, because then the instruction might not have completed
+        * successfully. */
+       if (!(dscr & DSCR_STICKY_ABORT_PRECISE)) {
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRRX_FULL_LATCHED, 0, &dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
+
+       /* If there were any sticky abort flags, clear them. */
+       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
+               fault_dscr = dscr;
+               mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+               dscr &= ~(DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE);
+       } else {
+               fault_dscr = 0;
+       }
 
-    /* Write R0 with value 'address' using write procedure for stall mode */
-       /*   - Write the address for read access into DTRRX */
-       retval += mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DTRRX, address & ~0x3);
-       /*  - Copy value from DTRRX to R0 using instruction mrc p14, 0, r0, c5, c0 */
-       cortex_a8_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
+       /* Handle synchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_PRECISE) {
+               if (final_retval == ERROR_OK) {
+                       /* Final return value will reflect cause of fault. */
+                       retval = cortex_a_read_dfar_dfsr(target, &fault_dfar, &fault_dfsr, &dscr);
+                       if (retval == ERROR_OK) {
+                               LOG_ERROR("data abort at 0x%08" PRIx32 ", dfsr = 0x%08" PRIx32, fault_dfar, fault_dfsr);
+                               final_retval = cortex_a_dfsr_to_error_code(fault_dfsr);
+                       } else
+                               final_retval = retval;
+               }
+               /* Fault destroyed DFAR/DFSR; restore them. */
+               retval = cortex_a_write_dfar_dfsr(target, orig_dfar, orig_dfsr, &dscr);
+               if (retval != ERROR_OK)
+                       LOG_ERROR("error restoring dfar/dfsr - dscr = 0x%08" PRIx32, dscr);
+       }
+
+       /* Handle asynchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_IMPRECISE) {
+               if (final_retval == ERROR_OK)
+                       /* No other error has been recorded so far, so keep this one. */
+                       final_retval = ERROR_TARGET_DATA_ABORT;
+       }
+
+       /* If the DCC is nonempty, clear it. */
+       if (dscr & DSCR_DTRTX_FULL_LATCHED) {
+               uint32_t dummy;
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &dummy);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+       if (dscr & DSCR_DTRRX_FULL_LATCHED) {
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), &dscr);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
 
+       /* Done. */
+       return final_retval;
+}
 
-       /* Write the data transfer instruction (ldc p14, c5, [r0],4)
-        * and the DTR mode setting to fast mode
-        * in one combined write (since they are adjacent registers)
+static int cortex_a_read_cpu_memory_slow(struct target *target,
+       uint32_t size, uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       /* Reads count objects of size size into *buffer. Old value of DSCR must be
+        * in *dscr; updated to new value. This is slow because it works for
+        * non-word-sized objects and (maybe) unaligned accesses. If size == 4 and
+        * the address is aligned, cortex_a_read_cpu_memory_fast should be
+        * preferred.
+        * Preconditions:
+        * - Address is in R0.
+        * - R0 is marked dirty.
         */
-       buff32[0] = ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4);
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_FAST_MODE;
-       buff32[1] = dscr;
-       /*  group the 2 access CPUDBG_ITR 0x84 and CPUDBG_DSCR 0x88 */
-       retval += mem_ap_sel_write_buf_u32(swjdp, armv7a->debug_ap, (uint8_t *)buff32, 8,
-                       armv7a->debug_base + CPUDBG_ITR);
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       int retval;
+
+       /* Mark register R1 as dirty, to use for transferring data. */
+       arm_reg_current(arm, 1)->dirty = true;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
        if (retval != ERROR_OK)
-               goto error_unset_dtr_r;
+               return retval;
+
+       /* Go through the objects. */
+       while (count) {
+               /* Issue a load of the appropriate size to R1. */
+               uint32_t opcode, data;
+               if (size == 1)
+                       opcode = ARMV4_5_LDRB_IP(1, 0);
+               else if (size == 2)
+                       opcode = ARMV4_5_LDRH_IP(1, 0);
+               else
+                       opcode = ARMV4_5_LDRW_IP(1, 0);
+               retval = cortex_a_exec_opcode(target, opcode, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Issue a write of R1 to DTRTX. */
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MCR(14, 0, 1, 0, 5, 0), dscr);
+               if (retval != ERROR_OK)
+                       return retval;
 
+               /* Check for faults and return early. */
+               if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+                       return ERROR_OK; /* A data fault is not considered a system failure. */
 
-       /* The last word needs to be handled separately - read all other words in one go.
+               /* Wait until DTRTX is full (according to ARMv7-A/-R architecture
+                * manual section C8.4.3, checking InstrCmpl_l is not sufficient; one
+                * must also check TXfull_l). Most of the time this will be free
+                * because TXfull_l will be set immediately and cached in dscr. */
+               retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                               DSCR_DTRTX_FULL_LATCHED, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Read the value transferred to DTRTX into the buffer. */
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &data);
+               if (retval != ERROR_OK)
+                       return retval;
+               if (size == 1)
+                       *buffer = (uint8_t) data;
+               else if (size == 2)
+                       target_buffer_set_u16(target, buffer, (uint16_t) data);
+               else
+                       target_buffer_set_u32(target, buffer, data);
+
+               /* Advance. */
+               buffer += size;
+               --count;
+       }
+
+       return ERROR_OK;
+}
+
+static int cortex_a_read_cpu_memory_fast(struct target *target,
+       uint32_t count, uint8_t *buffer, uint32_t *dscr)
+{
+       /* Reads count objects of size 4 into *buffer. Old value of DSCR must be in
+        * *dscr; updated to new value. This is fast but only works for word-sized
+        * objects at aligned addresses.
+        * Preconditions:
+        * - Address is in R0 and must be a multiple of 4.
+        * - R0 is marked dirty.
         */
-       if (total_u32 > 1) {
-               /* Read the data - Each read of the DTRTX register causes the instruction to be reissued
-                * Abort flags are sticky, so can be read at end of transactions
-                *
-                * This data is read in aligned to 32 bit boundary, hence may need shifting later.
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       uint32_t u32;
+       int retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       /* Issue the LDC instruction via a write to ITR. */
+       retval = cortex_a_exec_opcode(target, ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4), dscr);
+       if (retval != ERROR_OK)
+               return retval;
+
+       count--;
+
+       if (count > 0) {
+               /* Switch to fast mode if not already in that mode. */
+               retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_FAST_MODE, dscr);
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Latch LDC instruction. */
+               retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_ITR, ARMV4_5_LDC(0, 1, 0, 1, 14, 5, 0, 4));
+               if (retval != ERROR_OK)
+                       return retval;
+
+               /* Read the value transferred to DTRTX into the buffer. Due to fast
+                * mode rules, this blocks until the instruction finishes executing and
+                * then reissues the read instruction to read the next word from
+                * memory. The last read of DTRTX in this call reads the second-to-last
+                * word from memory and issues the read instruction for the last word.
                 */
-               retval = mem_ap_sel_read_buf_u32_noincr(swjdp, armv7a->debug_ap, (uint8_t *)tmp_buff, (total_u32-1) * 4,
-                                                                       armv7a->debug_base + CPUDBG_DTRTX);
+               retval = mem_ap_read_buf_noincr(armv7a->debug_ap, buffer,
+                               4, count, armv7a->debug_base + CPUDBG_DTRTX);
                if (retval != ERROR_OK)
-                       goto error_unset_dtr_r;
+                       return retval;
+
+               /* Advance. */
+               buffer += count * 4;
        }
 
-       /* set DTR access mode back to non blocking b00  */
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       retval =  mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                                       armv7a->debug_base + CPUDBG_DSCR, dscr);
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, dscr, false);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
 
-       /* Wait for the final read instruction to finish */
-       do {
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
-               if (retval != ERROR_OK)
-                       goto error_free_buff_r;
-       } while ((dscr & DSCR_INSTR_COMP) == 0);
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
+       /* Check for faults and return early. */
+       if (*dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE))
+               return ERROR_OK; /* A data fault is not considered a system failure. */
 
-       /* Check for sticky abort flags in the DSCR */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       /* Wait until DTRTX is full (according to ARMv7-A/-R architecture manual
+        * section C8.4.3, checking InstrCmpl_l is not sufficient; one must also
+        * check TXfull_l). Most of the time this will be free because TXfull_l
+        * will be set immediately and cached in dscr. */
+       retval = cortex_a_wait_dscr_bits(target, DSCR_DTRTX_FULL_LATCHED,
+                       DSCR_DTRTX_FULL_LATCHED, dscr);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
-       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
-               /* Abort occurred - clear it and exit */
-               LOG_ERROR("abort occurred - dscr = 0x%08x", dscr);
-               mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                                       armv7a->debug_base + CPUDBG_DRCR, 1<<2);
-               goto error_free_buff_r;
+               return retval;
+
+       /* Read the value transferred to DTRTX into the buffer. This is the last
+        * word. */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRTX, &u32);
+       if (retval != ERROR_OK)
+               return retval;
+       target_buffer_set_u32(target, buffer, u32);
+
+       return ERROR_OK;
+}
+
+static int cortex_a_read_cpu_memory(struct target *target,
+       uint32_t address, uint32_t size,
+       uint32_t count, uint8_t *buffer)
+{
+       /* Read memory through the CPU. */
+       int retval, final_retval;
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+       struct arm *arm = &armv7a->arm;
+       uint32_t dscr, orig_dfar, orig_dfsr, fault_dscr, fault_dfar, fault_dfsr;
+
+       LOG_DEBUG("Reading CPU memory address 0x%" PRIx32 " size %"  PRIu32 " count %"  PRIu32,
+                         address, size, count);
+       if (target->state != TARGET_HALTED) {
+               LOG_WARNING("target not halted");
+               return ERROR_TARGET_NOT_HALTED;
        }
 
-       /* Read the last word */
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DTRTX, &tmp_buff[total_u32 - 1]);
+       if (!count)
+               return ERROR_OK;
+
+       /* Clear any abort. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
        if (retval != ERROR_OK)
-               goto error_free_buff_r;
+               return retval;
 
-       /* Copy and align the data into the output buffer */
-       memcpy(buffer, (uint8_t *)tmp_buff + start_byte, total_bytes);
+       /* Read DSCR */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DSCR, &dscr);
+       if (retval != ERROR_OK)
+               return retval;
 
-       free(tmp_buff);
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-       /* Done */
-       return ERROR_OK;
+       /* Mark R0 as dirty. */
+       arm_reg_current(arm, 0)->dirty = true;
 
+       /* Read DFAR and DFSR, as they will be modified in the event of a fault. */
+       retval = cortex_a_read_dfar_dfsr(target, &orig_dfar, &orig_dfsr, &dscr);
+       if (retval != ERROR_OK)
+               goto out;
 
-error_unset_dtr_r:
-       /* Unset DTR mode */
-       mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
-       dscr = (dscr & ~DSCR_EXT_DCC_MASK) | DSCR_EXT_DCC_NON_BLOCKING;
-       mem_ap_sel_write_atomic_u32(swjdp, armv7a->debug_ap,
-                               armv7a->debug_base + CPUDBG_DSCR, dscr);
-error_free_buff_r:
-       LOG_ERROR("error");
-       free(tmp_buff);
-       return ERROR_FAIL;
+       /* Get the memory address into R0. */
+       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DTRRX, address);
+       if (retval != ERROR_OK)
+               goto out;
+       retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 0, 0, 5, 0), &dscr);
+       if (retval != ERROR_OK)
+               goto out;
+
+       if (size == 4 && (address % 4) == 0) {
+               /* We are doing a word-aligned transfer, so use fast mode. */
+               retval = cortex_a_read_cpu_memory_fast(target, count, buffer, &dscr);
+       } else {
+               /* Use slow path. */
+               retval = cortex_a_read_cpu_memory_slow(target, size, count, buffer, &dscr);
+       }
+
+out:
+       final_retval = retval;
+
+       /* Switch to non-blocking mode if not already in that mode. */
+       retval = cortex_a_set_dcc_mode(target, DSCR_EXT_DCC_NON_BLOCKING, &dscr);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* Wait for last issued instruction to complete. */
+       retval = cortex_a_wait_instrcmpl(target, &dscr, true);
+       if (final_retval == ERROR_OK)
+               final_retval = retval;
+
+       /* If there were any sticky abort flags, clear them. */
+       if (dscr & (DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE)) {
+               fault_dscr = dscr;
+               mem_ap_write_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DRCR, DRCR_CLEAR_EXCEPTIONS);
+               dscr &= ~(DSCR_STICKY_ABORT_PRECISE | DSCR_STICKY_ABORT_IMPRECISE);
+       } else {
+               fault_dscr = 0;
+       }
+
+       /* Handle synchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_PRECISE) {
+               if (final_retval == ERROR_OK) {
+                       /* Final return value will reflect cause of fault. */
+                       retval = cortex_a_read_dfar_dfsr(target, &fault_dfar, &fault_dfsr, &dscr);
+                       if (retval == ERROR_OK) {
+                               LOG_ERROR("data abort at 0x%08" PRIx32 ", dfsr = 0x%08" PRIx32, fault_dfar, fault_dfsr);
+                               final_retval = cortex_a_dfsr_to_error_code(fault_dfsr);
+                       } else
+                               final_retval = retval;
+               }
+               /* Fault destroyed DFAR/DFSR; restore them. */
+               retval = cortex_a_write_dfar_dfsr(target, orig_dfar, orig_dfsr, &dscr);
+               if (retval != ERROR_OK)
+                       LOG_ERROR("error restoring dfar/dfsr - dscr = 0x%08" PRIx32, dscr);
+       }
+
+       /* Handle asynchronous data faults. */
+       if (fault_dscr & DSCR_STICKY_ABORT_IMPRECISE) {
+               if (final_retval == ERROR_OK)
+                       /* No other error has been recorded so far, so keep this one. */
+                       final_retval = ERROR_TARGET_DATA_ABORT;
+       }
+
+       /* If the DCC is nonempty, clear it. */
+       if (dscr & DSCR_DTRTX_FULL_LATCHED) {
+               uint32_t dummy;
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DTRTX, &dummy);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+       if (dscr & DSCR_DTRRX_FULL_LATCHED) {
+               retval = cortex_a_exec_opcode(target, ARMV4_5_MRC(14, 0, 1, 0, 5, 0), &dscr);
+               if (final_retval == ERROR_OK)
+                       final_retval = retval;
+       }
+
+       /* Done. */
+       return final_retval;
 }
 
 
 /*
- * Cortex-A8 Memory access
+ * Cortex-A Memory access
  *
- * This is same Cortex M3 but we must also use the correct
+ * This is same Cortex-M3 but we must also use the correct
  * ap number for every access.
  */
 
-static int cortex_a8_read_phys_memory(struct target *target,
-       uint32_t address, uint32_t size,
+static int cortex_a_read_phys_memory(struct target *target,
+       target_addr_t address, uint32_t size,
        uint32_t count, uint8_t *buffer)
 {
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
-       LOG_DEBUG("Reading memory at real address 0x%x; size %d; count %d",
+       int retval;
+
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
+
+       LOG_DEBUG("Reading memory at real address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
                address, size, count);
 
-       if (count && buffer) {
-
-               if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap)) {
-
-                       /* read memory through AHB-AP */
-
-                       switch (size) {
-                               case 4:
-                                       retval = mem_ap_sel_read_buf_u32(swjdp, armv7a->memory_ap,
-                                               buffer, 4 * count, address);
-                                       break;
-                               case 2:
-                                       retval = mem_ap_sel_read_buf_u16(swjdp, armv7a->memory_ap,
-                                               buffer, 2 * count, address);
-                                       break;
-                               case 1:
-                                       retval = mem_ap_sel_read_buf_u8(swjdp, armv7a->memory_ap,
-                                               buffer, count, address);
-                                       break;
-                       }
-               } else {
+       /* read memory through the CPU */
+       cortex_a_prep_memaccess(target, 1);
+       retval = cortex_a_read_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 1);
 
-                       /* read memory through APB-AP */
-                       if (!armv7a->is_armv7r) {
-                               /*  disable mmu */
-                               retval = cortex_a8_mmu_modify(target, 0);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-                       retval = cortex_a8_read_apb_ab_memory(target, address, size, count, buffer);
-               }
-       }
        return retval;
 }
 
-static int cortex_a8_read_memory(struct target *target, uint32_t address,
+static int cortex_a_read_memory(struct target *target, target_addr_t address,
        uint32_t size, uint32_t count, uint8_t *buffer)
 {
-       int enabled = 0;
-       uint32_t virt, phys;
        int retval;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-
-       /* cortex_a8 handles unaligned memory access */
-       LOG_DEBUG("Reading memory at address 0x%x; size %d; count %d", address,
-               size, count);
-       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap)) {
-               if (!armv7a->is_armv7r) {
-                       retval = cortex_a8_mmu(target, &enabled);
-                       if (retval != ERROR_OK)
-                               return retval;
 
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Reading memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-                       if (enabled) {
-                               virt = address;
-                               retval = cortex_a8_virt2phys(target, virt, &phys);
-                               if (retval != ERROR_OK)
-                                       return retval;
+       cortex_a_prep_memaccess(target, 0);
+       retval = cortex_a_read_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 0);
 
-                               LOG_DEBUG("Reading at virtual address. Translating v:0x%x to r:0x%x",
-                                       virt, phys);
-                               address = phys;
-                       }
-               }
-               retval = cortex_a8_read_phys_memory(target, address, size, count, buffer);
-       } else {
-               if (!armv7a->is_armv7r) {
-                       retval = cortex_a8_check_address(target, address);
-                       if (retval != ERROR_OK)
-                               return retval;
-                       /*  enable mmu */
-                       retval = cortex_a8_mmu_modify(target, 1);
-                       if (retval != ERROR_OK)
-                               return retval;
-               }
-               retval = cortex_a8_read_apb_ab_memory(target, address, size, count, buffer);
-       }
        return retval;
 }
 
-static int cortex_a8_write_phys_memory(struct target *target,
-       uint32_t address, uint32_t size,
+static int cortex_a_write_phys_memory(struct target *target,
+       target_addr_t address, uint32_t size,
        uint32_t count, const uint8_t *buffer)
 {
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       int retval = ERROR_COMMAND_SYNTAX_ERROR;
-       uint8_t apsel = swjdp->apsel;
-
-       LOG_DEBUG("Writing memory to real address 0x%x; size %d; count %d", address,
-               size, count);
-
-       if (count && buffer) {
-
-               if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap)) {
-
-                       /* write memory through AHB-AP */
-
-                       switch (size) {
-                               case 4:
-                                       retval = mem_ap_sel_write_buf_u32(swjdp, armv7a->memory_ap,
-                                               buffer, 4 * count, address);
-                                       break;
-                               case 2:
-                                       retval = mem_ap_sel_write_buf_u16(swjdp, armv7a->memory_ap,
-                                               buffer, 2 * count, address);
-                                       break;
-                               case 1:
-                                       retval = mem_ap_sel_write_buf_u8(swjdp, armv7a->memory_ap,
-                                               buffer, count, address);
-                                       break;
-                       }
-
-               } else {
-
-                       /* write memory through APB-AP */
-                       if (!armv7a->is_armv7r) {
-                               retval = cortex_a8_mmu_modify(target, 0);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-                       return cortex_a8_write_apb_ab_memory(target, address, size, count, buffer);
-               }
-       }
+       int retval;
 
+       if (!count || !buffer)
+               return ERROR_COMMAND_SYNTAX_ERROR;
 
-       /* REVISIT this op is generic ARMv7-A/R stuff */
-       if (retval == ERROR_OK && target->state == TARGET_HALTED) {
-               struct arm_dpm *dpm = armv7a->arm.dpm;
+       LOG_DEBUG("Writing memory to real address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-               retval = dpm->prepare(dpm);
-               if (retval != ERROR_OK)
-                       return retval;
+       /* write memory through the CPU */
+       cortex_a_prep_memaccess(target, 1);
+       retval = cortex_a_write_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 1);
 
-               /* The Cache handling will NOT work with MMU active, the
-                * wrong addresses will be invalidated!
-                *
-                * For both ICache and DCache, walk all cache lines in the
-                * address range. Cortex-A8 has fixed 64 byte line length.
-                *
-                * REVISIT per ARMv7, these may trigger watchpoints ...
-                */
+       return retval;
+}
 
-               /* invalidate I-Cache */
-               if (armv7a->armv7a_mmu.armv7a_cache.i_cache_enabled) {
-                       /* ICIMVAU - Invalidate Cache single entry
-                        * with MVA to PoU
-                        *      MCR p15, 0, r0, c7, c5, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 5, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
+static int cortex_a_write_memory(struct target *target, target_addr_t address,
+       uint32_t size, uint32_t count, const uint8_t *buffer)
+{
+       int retval;
 
-               /* invalidate D-Cache */
-               if (armv7a->armv7a_mmu.armv7a_cache.d_u_cache_enabled) {
-                       /* DCIMVAC - Invalidate data Cache line
-                        * with MVA to PoC
-                        *      MCR p15, 0, r0, c7, c6, 1
-                        */
-                       for (uint32_t cacheline = address;
-                               cacheline < address + size * count;
-                               cacheline += 64) {
-                               retval = dpm->instr_write_data_r0(dpm,
-                                               ARMV4_5_MCR(15, 0, 0, 7, 6, 1),
-                                               cacheline);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                       }
-               }
+       /* cortex_a handles unaligned memory access */
+       LOG_DEBUG("Writing memory at address " TARGET_ADDR_FMT "; size %" PRId32 "; count %" PRId32,
+               address, size, count);
 
-               /* (void) */ dpm->finish(dpm);
-       }
+       /* memory writes bypass the caches, must flush before writing */
+       armv7a_cache_auto_flush_on_write(target, address, size * count);
 
+       cortex_a_prep_memaccess(target, 0);
+       retval = cortex_a_write_cpu_memory(target, address, size, count, buffer);
+       cortex_a_post_memaccess(target, 0);
        return retval;
 }
 
-static int cortex_a8_write_memory(struct target *target, uint32_t address,
-       uint32_t size, uint32_t count, const uint8_t *buffer)
+static int cortex_a_read_buffer(struct target *target, target_addr_t address,
+                               uint32_t count, uint8_t *buffer)
 {
-       int enabled = 0;
-       uint32_t virt, phys;
-       int retval;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       /* cortex_a8 handles unaligned memory access */
-       LOG_DEBUG("Writing memory at address 0x%x; size %d; count %d", address,
-               size, count);
-       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap)) {
-
-               LOG_DEBUG("Writing memory to address 0x%x; size %d; count %d", address, size,
-                       count);
-               if (!armv7a->is_armv7r) {
-                       retval = cortex_a8_mmu(target, &enabled);
+       uint32_t size;
+
+       /* Align up to maximum 4 bytes. The loop condition makes sure the next pass
+        * will have something to do with the size we leave to it. */
+       for (size = 1; size < 4 && count >= size * 2 + (address & size); size *= 2) {
+               if (address & size) {
+                       int retval = target_read_memory(target, address, size, 1, buffer);
                        if (retval != ERROR_OK)
                                return retval;
+                       address += size;
+                       count -= size;
+                       buffer += size;
+               }
+       }
 
-                       if (enabled) {
-                               virt = address;
-                               retval = cortex_a8_virt2phys(target, virt, &phys);
-                               if (retval != ERROR_OK)
-                                       return retval;
-                               LOG_DEBUG("Writing to virtual address. Translating v:0x%x to r:0x%x",
-                                       virt,
-                                       phys);
-                               address = phys;
-                       }
+       /* Read the data with as large access size as possible. */
+       for (; size > 0; size /= 2) {
+               uint32_t aligned = count - count % size;
+               if (aligned > 0) {
+                       int retval = target_read_memory(target, address, size, aligned / size, buffer);
+                       if (retval != ERROR_OK)
+                               return retval;
+                       address += aligned;
+                       count -= aligned;
+                       buffer += aligned;
                }
+       }
 
-               retval = cortex_a8_write_phys_memory(target, address, size,
-                               count, buffer);
-       } else {
-               if (!armv7a->is_armv7r) {
-                       retval = cortex_a8_check_address(target, address);
+       return ERROR_OK;
+}
+
+static int cortex_a_write_buffer(struct target *target, target_addr_t address,
+                                uint32_t count, const uint8_t *buffer)
+{
+       uint32_t size;
+
+       /* Align up to maximum 4 bytes. The loop condition makes sure the next pass
+        * will have something to do with the size we leave to it. */
+       for (size = 1; size < 4 && count >= size * 2 + (address & size); size *= 2) {
+               if (address & size) {
+                       int retval = target_write_memory(target, address, size, 1, buffer);
                        if (retval != ERROR_OK)
                                return retval;
-                       /*  enable mmu  */
-                       retval = cortex_a8_mmu_modify(target, 1);
+                       address += size;
+                       count -= size;
+                       buffer += size;
+               }
+       }
+
+       /* Write the data with as large access size as possible. */
+       for (; size > 0; size /= 2) {
+               uint32_t aligned = count - count % size;
+               if (aligned > 0) {
+                       int retval = target_write_memory(target, address, size, aligned / size, buffer);
                        if (retval != ERROR_OK)
                                return retval;
+                       address += aligned;
+                       count -= aligned;
+                       buffer += aligned;
                }
-               retval = cortex_a8_write_apb_ab_memory(target, address, size, count, buffer);
        }
-       return retval;
+
+       return ERROR_OK;
 }
 
-static int cortex_a8_handle_target_request(void *priv)
+static int cortex_a_handle_target_request(void *priv)
 {
        struct target *target = priv;
        struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
        int retval;
 
        if (!target_was_examined(target))
@@ -2357,18 +2606,23 @@ static int cortex_a8_handle_target_request(void *priv)
        if (target->state == TARGET_RUNNING) {
                uint32_t request;
                uint32_t dscr;
-               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
 
                /* check if we have data */
+               int64_t then = timeval_ms();
                while ((dscr & DSCR_DTR_TX_FULL) && (retval == ERROR_OK)) {
-                       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+                       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                        armv7a->debug_base + CPUDBG_DTRTX, &request);
                        if (retval == ERROR_OK) {
                                target_request(target, request);
-                               retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+                               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                                                armv7a->debug_base + CPUDBG_DSCR, &dscr);
                        }
+                       if (timeval_ms() > then + 1000) {
+                               LOG_ERROR("Timeout waiting for dtr tx full");
+                               return ERROR_FAIL;
+                       }
                }
        }
 
@@ -2376,255 +2630,324 @@ static int cortex_a8_handle_target_request(void *priv)
 }
 
 /*
- * Cortex-A8 target information and configuration
+ * Cortex-A target information and configuration
  */
 
-static int cortex_a8_examine_first(struct target *target)
+static int cortex_a_examine_first(struct target *target)
 {
-       struct cortex_a8_common *cortex_a8 = target_to_cortex_a8(target);
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
        struct adiv5_dap *swjdp = armv7a->arm.dap;
+
        int i;
        int retval = ERROR_OK;
-       uint32_t didr, ctypr, ttypr, cpuid;
+       uint32_t didr, cpuid, dbg_osreg;
 
-       /* We do one extra read to ensure DAP is configured,
-        * we call ahbap_debugport_init(swjdp) instead
-        */
-       retval = ahbap_debugport_init(swjdp);
-       if (retval != ERROR_OK)
-               return retval;
-
-       /* Search for the APB-AB - it is needed for access to debug registers */
+       /* Search for the APB-AP - it is needed for access to debug registers */
        retval = dap_find_ap(swjdp, AP_TYPE_APB_AP, &armv7a->debug_ap);
        if (retval != ERROR_OK) {
                LOG_ERROR("Could not find APB-AP for debug access");
                return retval;
        }
-       /* Search for the AHB-AB */
-       retval = dap_find_ap(swjdp, AP_TYPE_AHB_AP, &armv7a->memory_ap);
+
+       retval = mem_ap_init(armv7a->debug_ap);
        if (retval != ERROR_OK) {
-               /* AHB-AP not found - use APB-AP */
-               LOG_DEBUG("Could not find AHB-AP - using APB-AP for memory access");
-               armv7a->memory_ap_available = false;
-       } else {
-               armv7a->memory_ap_available = true;
+               LOG_ERROR("Could not initialize the APB-AP");
+               return retval;
        }
 
+       armv7a->debug_ap->memaccess_tck = 80;
 
        if (!target->dbgbase_set) {
                uint32_t dbgbase;
                /* Get ROM Table base */
                uint32_t apid;
-               retval = dap_get_debugbase(swjdp, 1, &dbgbase, &apid);
+               int32_t coreidx = target->coreid;
+               LOG_DEBUG("%s's dbgbase is not set, trying to detect using the ROM table",
+                         target->cmd_name);
+               retval = dap_get_debugbase(armv7a->debug_ap, &dbgbase, &apid);
                if (retval != ERROR_OK)
                        return retval;
                /* Lookup 0x15 -- Processor DAP */
-               retval = dap_lookup_cs_component(swjdp, 1, dbgbase, 0x15,
-                               &armv7a->debug_base);
-               if (retval != ERROR_OK)
+               retval = dap_lookup_cs_component(armv7a->debug_ap, dbgbase, 0x15,
+                               &armv7a->debug_base, &coreidx);
+               if (retval != ERROR_OK) {
+                       LOG_ERROR("Can't detect %s's dbgbase from the ROM table; you need to specify it explicitly.",
+                                 target->cmd_name);
                        return retval;
+               }
+               LOG_DEBUG("Detected core %" PRId32 " dbgbase: %08" PRIx32,
+                         target->coreid, armv7a->debug_base);
        } else
                armv7a->debug_base = target->dbgbase;
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_CPUID, &cpuid);
-       if (retval != ERROR_OK)
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                       armv7a->debug_base + CPUDBG_DIDR, &didr);
+       if (retval != ERROR_OK) {
+               LOG_DEBUG("Examine %s failed", "DIDR");
                return retval;
+       }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
                        armv7a->debug_base + CPUDBG_CPUID, &cpuid);
        if (retval != ERROR_OK) {
                LOG_DEBUG("Examine %s failed", "CPUID");
                return retval;
        }
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_CTYPR, &ctypr);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "CTYPR");
-               return retval;
-       }
+       LOG_DEBUG("didr = 0x%08" PRIx32, didr);
+       LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_TTYPR, &ttypr);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "TTYPR");
-               return retval;
-       }
+       cortex_a->didr = didr;
+       cortex_a->cpuid = cpuid;
 
-       retval = mem_ap_sel_read_atomic_u32(swjdp, armv7a->debug_ap,
-                       armv7a->debug_base + CPUDBG_DIDR, &didr);
-       if (retval != ERROR_OK) {
-               LOG_DEBUG("Examine %s failed", "DIDR");
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                   armv7a->debug_base + CPUDBG_PRSR, &dbg_osreg);
+       if (retval != ERROR_OK)
                return retval;
+       LOG_DEBUG("target->coreid %" PRId32 " DBGPRSR  0x%" PRIx32, target->coreid, dbg_osreg);
+
+       if ((dbg_osreg & PRSR_POWERUP_STATUS) == 0) {
+               LOG_ERROR("target->coreid %" PRId32 " powered down!", target->coreid);
+               target->state = TARGET_UNKNOWN; /* TARGET_NO_POWER? */
+               return ERROR_TARGET_INIT_FAILED;
        }
 
-       LOG_DEBUG("cpuid = 0x%08" PRIx32, cpuid);
-       LOG_DEBUG("ctypr = 0x%08" PRIx32, ctypr);
-       LOG_DEBUG("ttypr = 0x%08" PRIx32, ttypr);
-       LOG_DEBUG("didr = 0x%08" PRIx32, didr);
+       if (dbg_osreg & PRSR_STICKY_RESET_STATUS)
+               LOG_DEBUG("target->coreid %" PRId32 " was reset!", target->coreid);
 
-       armv7a->arm.core_type = ARM_MODE_MON;
-       retval = cortex_a8_dpm_setup(cortex_a8, didr);
+       /* Read DBGOSLSR and check if OSLK is implemented */
+       retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_OSLSR, &dbg_osreg);
        if (retval != ERROR_OK)
                return retval;
+       LOG_DEBUG("target->coreid %" PRId32 " DBGOSLSR 0x%" PRIx32, target->coreid, dbg_osreg);
+
+       /* check if OS Lock is implemented */
+       if ((dbg_osreg & OSLSR_OSLM) == OSLSR_OSLM0 || (dbg_osreg & OSLSR_OSLM) == OSLSR_OSLM1) {
+               /* check if OS Lock is set */
+               if (dbg_osreg & OSLSR_OSLK) {
+                       LOG_DEBUG("target->coreid %" PRId32 " OSLock set! Trying to unlock", target->coreid);
+
+                       retval = mem_ap_write_atomic_u32(armv7a->debug_ap,
+                                                       armv7a->debug_base + CPUDBG_OSLAR,
+                                                       0);
+                       if (retval == ERROR_OK)
+                               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                                                       armv7a->debug_base + CPUDBG_OSLSR, &dbg_osreg);
+
+                       /* if we fail to access the register or cannot reset the OSLK bit, bail out */
+                       if (retval != ERROR_OK || (dbg_osreg & OSLSR_OSLK) != 0) {
+                               LOG_ERROR("target->coreid %" PRId32 " OSLock sticky, core not powered?",
+                                               target->coreid);
+                               target->state = TARGET_UNKNOWN; /* TARGET_NO_POWER? */
+                               return ERROR_TARGET_INIT_FAILED;
+                       }
+               }
+       }
+
+       armv7a->arm.core_type = ARM_MODE_MON;
+
+       /* Avoid recreating the registers cache */
+       if (!target_was_examined(target)) {
+               retval = cortex_a_dpm_setup(cortex_a, didr);
+               if (retval != ERROR_OK)
+                       return retval;
+       }
 
        /* Setup Breakpoint Register Pairs */
-       cortex_a8->brp_num = ((didr >> 24) & 0x0F) + 1;
-       cortex_a8->brp_num_context = ((didr >> 20) & 0x0F) + 1;
-       cortex_a8->brp_num_available = cortex_a8->brp_num;
-       cortex_a8->brp_list = calloc(cortex_a8->brp_num, sizeof(struct cortex_a8_brp));
-/*     cortex_a8->brb_enabled = ????; */
-       for (i = 0; i < cortex_a8->brp_num; i++) {
-               cortex_a8->brp_list[i].used = 0;
-               if (i < (cortex_a8->brp_num-cortex_a8->brp_num_context))
-                       cortex_a8->brp_list[i].type = BRP_NORMAL;
+       cortex_a->brp_num = ((didr >> 24) & 0x0F) + 1;
+       cortex_a->brp_num_context = ((didr >> 20) & 0x0F) + 1;
+       cortex_a->brp_num_available = cortex_a->brp_num;
+       free(cortex_a->brp_list);
+       cortex_a->brp_list = calloc(cortex_a->brp_num, sizeof(struct cortex_a_brp));
+/*     cortex_a->brb_enabled = ????; */
+       for (i = 0; i < cortex_a->brp_num; i++) {
+               cortex_a->brp_list[i].used = 0;
+               if (i < (cortex_a->brp_num-cortex_a->brp_num_context))
+                       cortex_a->brp_list[i].type = BRP_NORMAL;
                else
-                       cortex_a8->brp_list[i].type = BRP_CONTEXT;
-               cortex_a8->brp_list[i].value = 0;
-               cortex_a8->brp_list[i].control = 0;
-               cortex_a8->brp_list[i].BRPn = i;
+                       cortex_a->brp_list[i].type = BRP_CONTEXT;
+               cortex_a->brp_list[i].value = 0;
+               cortex_a->brp_list[i].control = 0;
+               cortex_a->brp_list[i].BRPn = i;
        }
 
-       LOG_DEBUG("Configured %i hw breakpoints", cortex_a8->brp_num);
+       LOG_DEBUG("Configured %i hw breakpoints", cortex_a->brp_num);
+
+       /* select debug_ap as default */
+       swjdp->apsel = armv7a->debug_ap->ap_num;
 
        target_set_examined(target);
        return ERROR_OK;
 }
 
-static int cortex_a8_examine(struct target *target)
+static int cortex_a_examine(struct target *target)
 {
        int retval = ERROR_OK;
 
-       /* don't re-probe hardware after each reset */
-       if (!target_was_examined(target))
-               retval = cortex_a8_examine_first(target);
+       /* Reestablish communication after target reset */
+       retval = cortex_a_examine_first(target);
 
        /* Configure core debug access */
        if (retval == ERROR_OK)
-               retval = cortex_a8_init_debug_access(target);
+               retval = cortex_a_init_debug_access(target);
 
        return retval;
 }
 
 /*
- *     Cortex-A8 target creation and initialization
+ *     Cortex-A target creation and initialization
  */
 
-static int cortex_a8_init_target(struct command_context *cmd_ctx,
+static int cortex_a_init_target(struct command_context *cmd_ctx,
        struct target *target)
 {
        /* examine_first() does a bunch of this */
+       arm_semihosting_init(target);
        return ERROR_OK;
 }
 
-static int cortex_a8_init_arch_info(struct target *target,
-       struct cortex_a8_common *cortex_a8, struct jtag_tap *tap)
+static int cortex_a_init_arch_info(struct target *target,
+       struct cortex_a_common *cortex_a, struct adiv5_dap *dap)
 {
-       struct armv7a_common *armv7a = &cortex_a8->armv7a_common;
-       struct adiv5_dap *dap = &armv7a->dap;
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
 
+       /* Setup struct cortex_a_common */
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
        armv7a->arm.dap = dap;
 
-       /* Setup struct cortex_a8_common */
-       cortex_a8->common_magic = CORTEX_A8_COMMON_MAGIC;
-       /*  tap has no dap initialized */
-       if (!tap->dap) {
-               armv7a->arm.dap = dap;
-               /* Setup struct cortex_a8_common */
-
-               /* prepare JTAG information for the new target */
-               cortex_a8->jtag_info.tap = tap;
-               cortex_a8->jtag_info.scann_size = 4;
-
-               /* Leave (only) generic DAP stuff for debugport_init() */
-               dap->jtag_info = &cortex_a8->jtag_info;
-
-               /* Number of bits for tar autoincrement, impl. dep. at least 10 */
-               dap->tar_autoincr_block = (1 << 10);
-               dap->memaccess_tck = 80;
-               tap->dap = dap;
-       } else
-               armv7a->arm.dap = tap->dap;
-
-       cortex_a8->fast_reg_read = 0;
-
        /* register arch-specific functions */
        armv7a->examine_debug_reason = NULL;
 
-       armv7a->post_debug_entry = cortex_a8_post_debug_entry;
+       armv7a->post_debug_entry = cortex_a_post_debug_entry;
 
        armv7a->pre_restore_context = NULL;
 
-       armv7a->armv7a_mmu.read_physical_memory = cortex_a8_read_phys_memory;
+       armv7a->armv7a_mmu.read_physical_memory = cortex_a_read_phys_memory;
 
 
-/*     arm7_9->handle_target_request = cortex_a8_handle_target_request; */
+/*     arm7_9->handle_target_request = cortex_a_handle_target_request; */
 
        /* REVISIT v7a setup should be in a v7a-specific routine */
        armv7a_init_arch_info(target, armv7a);
-       target_register_timer_callback(cortex_a8_handle_target_request, 1, 1, target);
+       target_register_timer_callback(cortex_a_handle_target_request, 1,
+               TARGET_TIMER_TYPE_PERIODIC, target);
 
        return ERROR_OK;
 }
 
-static int cortex_a8_target_create(struct target *target, Jim_Interp *interp)
+static int cortex_a_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct cortex_a8_common *cortex_a8 = calloc(1, sizeof(struct cortex_a8_common));
+       struct cortex_a_common *cortex_a;
+       struct adiv5_private_config *pc;
+
+       if (target->private_config == NULL)
+               return ERROR_FAIL;
+
+       pc = (struct adiv5_private_config *)target->private_config;
 
-       cortex_a8->armv7a_common.is_armv7r = false;
+       cortex_a = calloc(1, sizeof(struct cortex_a_common));
+       if (cortex_a == NULL) {
+               LOG_ERROR("Out of memory");
+               return ERROR_FAIL;
+       }
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
+       cortex_a->armv7a_common.is_armv7r = false;
+       cortex_a->armv7a_common.arm.arm_vfp_version = ARM_VFP_V3;
 
-       return cortex_a8_init_arch_info(target, cortex_a8, target->tap);
+       return cortex_a_init_arch_info(target, cortex_a, pc->dap);
 }
 
 static int cortex_r4_target_create(struct target *target, Jim_Interp *interp)
 {
-       struct cortex_a8_common *cortex_a8 = calloc(1, sizeof(struct cortex_a8_common));
+       struct cortex_a_common *cortex_a;
+       struct adiv5_private_config *pc;
+
+       pc = (struct adiv5_private_config *)target->private_config;
+       if (adiv5_verify_config(pc) != ERROR_OK)
+               return ERROR_FAIL;
 
-       cortex_a8->armv7a_common.is_armv7r = true;
+       cortex_a = calloc(1, sizeof(struct cortex_a_common));
+       if (cortex_a == NULL) {
+               LOG_ERROR("Out of memory");
+               return ERROR_FAIL;
+       }
+       cortex_a->common_magic = CORTEX_A_COMMON_MAGIC;
+       cortex_a->armv7a_common.is_armv7r = true;
 
-       return cortex_a8_init_arch_info(target, cortex_a8, target->tap);
+       return cortex_a_init_arch_info(target, cortex_a, pc->dap);
 }
 
+static void cortex_a_deinit_target(struct target *target)
+{
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+       struct armv7a_common *armv7a = &cortex_a->armv7a_common;
+       struct arm_dpm *dpm = &armv7a->dpm;
+       uint32_t dscr;
+       int retval;
+
+       if (target_was_examined(target)) {
+               /* Disable halt for breakpoint, watchpoint and vector catch */
+               retval = mem_ap_read_atomic_u32(armv7a->debug_ap,
+                               armv7a->debug_base + CPUDBG_DSCR, &dscr);
+               if (retval == ERROR_OK)
+                       mem_ap_write_atomic_u32(armv7a->debug_ap,
+                                       armv7a->debug_base + CPUDBG_DSCR,
+                                       dscr & ~DSCR_HALT_DBG_MODE);
+       }
+
+       free(cortex_a->brp_list);
+       free(dpm->dbp);
+       free(dpm->dwp);
+       free(target->private_config);
+       free(cortex_a);
+}
 
-static int cortex_a8_mmu(struct target *target, int *enabled)
+static int cortex_a_mmu(struct target *target, int *enabled)
 {
+       struct armv7a_common *armv7a = target_to_armv7a(target);
+
        if (target->state != TARGET_HALTED) {
                LOG_ERROR("%s: target not halted", __func__);
                return ERROR_TARGET_INVALID;
        }
 
-       *enabled = target_to_cortex_a8(target)->armv7a_common.armv7a_mmu.mmu_enabled;
+       if (armv7a->is_armv7r)
+               *enabled = 0;
+       else
+               *enabled = target_to_cortex_a(target)->armv7a_common.armv7a_mmu.mmu_enabled;
+
        return ERROR_OK;
 }
 
-static int cortex_a8_virt2phys(struct target *target,
-       uint32_t virt, uint32_t *phys)
+static int cortex_a_virt2phys(struct target *target,
+       target_addr_t virt, target_addr_t *phys)
 {
-       int retval = ERROR_FAIL;
-       struct armv7a_common *armv7a = target_to_armv7a(target);
-       struct adiv5_dap *swjdp = armv7a->arm.dap;
-       uint8_t apsel = swjdp->apsel;
-       if (armv7a->memory_ap_available && (apsel == armv7a->memory_ap)) {
-               uint32_t ret;
-               retval = armv7a_mmu_translate_va(target,
-                               virt, &ret);
-               if (retval != ERROR_OK)
-                       goto done;
-               *phys = ret;
-       } else {/*  use this method if armv7a->memory_ap not selected
-                *  mmu must be enable in order to get a correct translation */
-               retval = cortex_a8_mmu_modify(target, 1);
-               if (retval != ERROR_OK)
-                       goto done;
-               retval = armv7a_mmu_translate_va_pa(target, virt,  phys, 1);
+       int retval;
+       int mmu_enabled = 0;
+
+       /*
+        * If the MMU was not enabled at debug entry, there is no
+        * way of knowing if there was ever a valid configuration
+        * for it and thus it's not safe to enable it. In this case,
+        * just return the virtual address as physical.
+        */
+       cortex_a_mmu(target, &mmu_enabled);
+       if (!mmu_enabled) {
+               *phys = virt;
+               return ERROR_OK;
        }
-done:
-       return retval;
+
+       /* mmu must be enable in order to get a correct translation */
+       retval = cortex_a_mmu_modify(target, 1);
+       if (retval != ERROR_OK)
+               return retval;
+       return armv7a_mmu_translate_va_pa(target, (uint32_t)virt,
+                                                   (uint32_t *)phys, 1);
 }
 
-COMMAND_HANDLER(cortex_a8_handle_cache_info_command)
+COMMAND_HANDLER(cortex_a_handle_cache_info_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        struct armv7a_common *armv7a = target_to_armv7a(target);
@@ -2634,7 +2957,7 @@ COMMAND_HANDLER(cortex_a8_handle_cache_info_command)
 }
 
 
-COMMAND_HANDLER(cortex_a8_handle_dbginit_command)
+COMMAND_HANDLER(cortex_a_handle_dbginit_command)
 {
        struct target *target = get_current_target(CMD_CTX);
        if (!target_was_examined(target)) {
@@ -2642,105 +2965,103 @@ COMMAND_HANDLER(cortex_a8_handle_dbginit_command)
                return ERROR_FAIL;
        }
 
-       return cortex_a8_init_debug_access(target);
-}
-COMMAND_HANDLER(cortex_a8_handle_smp_off_command)
-{
-       struct target *target = get_current_target(CMD_CTX);
-       /* check target is an smp target */
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       target->smp = 0;
-       if (head != (struct target_list *)NULL) {
-               while (head != (struct target_list *)NULL) {
-                       curr = head->target;
-                       curr->smp = 0;
-                       head = head->next;
-               }
-               /*  fixes the target display to the debugger */
-               target->gdb_service->target = target;
-       }
-       return ERROR_OK;
+       return cortex_a_init_debug_access(target);
 }
 
-COMMAND_HANDLER(cortex_a8_handle_smp_on_command)
+COMMAND_HANDLER(handle_cortex_a_mask_interrupts_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       struct target_list *head;
-       struct target *curr;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               target->smp = 1;
-               while (head != (struct target_list *)NULL) {
-                       curr = head->target;
-                       curr->smp = 1;
-                       head = head->next;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       static const Jim_Nvp nvp_maskisr_modes[] = {
+               { .name = "off", .value = CORTEX_A_ISRMASK_OFF },
+               { .name = "on", .value = CORTEX_A_ISRMASK_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_maskisr_modes, CMD_ARGV[0]);
+               if (n->name == NULL) {
+                       LOG_ERROR("Unknown parameter: %s - should be off or on", CMD_ARGV[0]);
+                       return ERROR_COMMAND_SYNTAX_ERROR;
                }
+
+               cortex_a->isrmasking_mode = n->value;
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_maskisr_modes, cortex_a->isrmasking_mode);
+       command_print(CMD_CTX, "cortex_a interrupt mask %s", n->name);
+
        return ERROR_OK;
 }
 
-COMMAND_HANDLER(cortex_a8_handle_smp_gdb_command)
+COMMAND_HANDLER(handle_cortex_a_dacrfixup_command)
 {
        struct target *target = get_current_target(CMD_CTX);
-       int retval = ERROR_OK;
-       struct target_list *head;
-       head = target->head;
-       if (head != (struct target_list *)NULL) {
-               if (CMD_ARGC == 1) {
-                       int coreid = 0;
-                       COMMAND_PARSE_NUMBER(int, CMD_ARGV[0], coreid);
-                       if (ERROR_OK != retval)
-                               return retval;
-                       target->gdb_service->core[1] = coreid;
+       struct cortex_a_common *cortex_a = target_to_cortex_a(target);
+
+       static const Jim_Nvp nvp_dacrfixup_modes[] = {
+               { .name = "off", .value = CORTEX_A_DACRFIXUP_OFF },
+               { .name = "on", .value = CORTEX_A_DACRFIXUP_ON },
+               { .name = NULL, .value = -1 },
+       };
+       const Jim_Nvp *n;
+
+       if (CMD_ARGC > 0) {
+               n = Jim_Nvp_name2value_simple(nvp_dacrfixup_modes, CMD_ARGV[0]);
+               if (n->name == NULL)
+                       return ERROR_COMMAND_SYNTAX_ERROR;
+               cortex_a->dacrfixup_mode = n->value;
 
-               }
-               command_print(CMD_CTX, "gdb coreid  %d -> %d", target->gdb_service->core[0]
-                       , target->gdb_service->core[1]);
        }
+
+       n = Jim_Nvp_value2name_simple(nvp_dacrfixup_modes, cortex_a->dacrfixup_mode);
+       command_print(CMD_CTX, "cortex_a domain access control fixup %s", n->name);
+
        return ERROR_OK;
 }
 
-static const struct command_registration cortex_a8_exec_command_handlers[] = {
+static const struct command_registration cortex_a_exec_command_handlers[] = {
        {
                .name = "cache_info",
-               .handler = cortex_a8_handle_cache_info_command,
+               .handler = cortex_a_handle_cache_info_command,
                .mode = COMMAND_EXEC,
                .help = "display information about target caches",
                .usage = "",
        },
        {
                .name = "dbginit",
-               .handler = cortex_a8_handle_dbginit_command,
+               .handler = cortex_a_handle_dbginit_command,
                .mode = COMMAND_EXEC,
                .help = "Initialize core debug",
                .usage = "",
        },
-       {   .name = "smp_off",
-           .handler = cortex_a8_handle_smp_off_command,
-           .mode = COMMAND_EXEC,
-           .help = "Stop smp handling",
-           .usage = "",},
        {
-               .name = "smp_on",
-               .handler = cortex_a8_handle_smp_on_command,
-               .mode = COMMAND_EXEC,
-               .help = "Restart smp handling",
-               .usage = "",
+               .name = "maskisr",
+               .handler = handle_cortex_a_mask_interrupts_command,
+               .mode = COMMAND_ANY,
+               .help = "mask cortex_a interrupts",
+               .usage = "['on'|'off']",
        },
        {
-               .name = "smp_gdb",
-               .handler = cortex_a8_handle_smp_gdb_command,
-               .mode = COMMAND_EXEC,
-               .help = "display/fix current core played to gdb",
-               .usage = "",
+               .name = "dacrfixup",
+               .handler = handle_cortex_a_dacrfixup_command,
+               .mode = COMMAND_ANY,
+               .help = "set domain access control (DACR) to all-manager "
+                       "on memory access",
+               .usage = "['on'|'off']",
+       },
+       {
+               .chain = armv7a_mmu_command_handlers,
+       },
+       {
+               .chain = smp_command_handlers,
        },
-
 
        COMMAND_REGISTRATION_DONE
 };
-static const struct command_registration cortex_a8_command_handlers[] = {
+static const struct command_registration cortex_a_command_handlers[] = {
        {
                .chain = arm_command_handlers,
        },
@@ -2752,70 +3073,74 @@ static const struct command_registration cortex_a8_command_handlers[] = {
                .mode = COMMAND_ANY,
                .help = "Cortex-A command group",
                .usage = "",
-               .chain = cortex_a8_exec_command_handlers,
+               .chain = cortex_a_exec_command_handlers,
        },
        COMMAND_REGISTRATION_DONE
 };
 
-struct target_type cortexa8_target = {
+struct target_type cortexa_target = {
        .name = "cortex_a",
        .deprecated_name = "cortex_a8",
 
-       .poll = cortex_a8_poll,
+       .poll = cortex_a_poll,
        .arch_state = armv7a_arch_state,
 
-       .target_request_data = NULL,
-
-       .halt = cortex_a8_halt,
-       .resume = cortex_a8_resume,
-       .step = cortex_a8_step,
+       .halt = cortex_a_halt,
+       .resume = cortex_a_resume,
+       .step = cortex_a_step,
 
-       .assert_reset = cortex_a8_assert_reset,
-       .deassert_reset = cortex_a8_deassert_reset,
+       .assert_reset = cortex_a_assert_reset,
+       .deassert_reset = cortex_a_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_arch = arm_get_gdb_arch,
        .get_gdb_reg_list = arm_get_gdb_reg_list,
 
-       .read_memory = cortex_a8_read_memory,
-       .write_memory = cortex_a8_write_memory,
+       .read_memory = cortex_a_read_memory,
+       .write_memory = cortex_a_write_memory,
+
+       .read_buffer = cortex_a_read_buffer,
+       .write_buffer = cortex_a_write_buffer,
 
        .checksum_memory = arm_checksum_memory,
        .blank_check_memory = arm_blank_check_memory,
 
        .run_algorithm = armv4_5_run_algorithm,
 
-       .add_breakpoint = cortex_a8_add_breakpoint,
-       .add_context_breakpoint = cortex_a8_add_context_breakpoint,
-       .add_hybrid_breakpoint = cortex_a8_add_hybrid_breakpoint,
-       .remove_breakpoint = cortex_a8_remove_breakpoint,
+       .add_breakpoint = cortex_a_add_breakpoint,
+       .add_context_breakpoint = cortex_a_add_context_breakpoint,
+       .add_hybrid_breakpoint = cortex_a_add_hybrid_breakpoint,
+       .remove_breakpoint = cortex_a_remove_breakpoint,
        .add_watchpoint = NULL,
        .remove_watchpoint = NULL,
 
-       .commands = cortex_a8_command_handlers,
-       .target_create = cortex_a8_target_create,
-       .init_target = cortex_a8_init_target,
-       .examine = cortex_a8_examine,
-
-       .read_phys_memory = cortex_a8_read_phys_memory,
-       .write_phys_memory = cortex_a8_write_phys_memory,
-       .mmu = cortex_a8_mmu,
-       .virt2phys = cortex_a8_virt2phys,
+       .commands = cortex_a_command_handlers,
+       .target_create = cortex_a_target_create,
+       .target_jim_configure = adiv5_jim_configure,
+       .init_target = cortex_a_init_target,
+       .examine = cortex_a_examine,
+       .deinit_target = cortex_a_deinit_target,
+
+       .read_phys_memory = cortex_a_read_phys_memory,
+       .write_phys_memory = cortex_a_write_phys_memory,
+       .mmu = cortex_a_mmu,
+       .virt2phys = cortex_a_virt2phys,
 };
 
 static const struct command_registration cortex_r4_exec_command_handlers[] = {
        {
-               .name = "cache_info",
-               .handler = cortex_a8_handle_cache_info_command,
+               .name = "dbginit",
+               .handler = cortex_a_handle_dbginit_command,
                .mode = COMMAND_EXEC,
-               .help = "display information about target caches",
+               .help = "Initialize core debug",
                .usage = "",
        },
        {
-               .name = "dbginit",
-               .handler = cortex_a8_handle_dbginit_command,
+               .name = "maskisr",
+               .handler = handle_cortex_a_mask_interrupts_command,
                .mode = COMMAND_EXEC,
-               .help = "Initialize core debug",
-               .usage = "",
+               .help = "mask cortex_r4 interrupts",
+               .usage = "['on'|'off']",
        },
 
        COMMAND_REGISTRATION_DONE
@@ -2824,9 +3149,6 @@ static const struct command_registration cortex_r4_command_handlers[] = {
        {
                .chain = arm_command_handlers,
        },
-       {
-               .chain = armv7a_command_handlers,
-       },
        {
                .name = "cortex_r4",
                .mode = COMMAND_ANY,
@@ -2840,38 +3162,39 @@ static const struct command_registration cortex_r4_command_handlers[] = {
 struct target_type cortexr4_target = {
        .name = "cortex_r4",
 
-       .poll = cortex_a8_poll,
+       .poll = cortex_a_poll,
        .arch_state = armv7a_arch_state,
 
-       .target_request_data = NULL,
-
-       .halt = cortex_a8_halt,
-       .resume = cortex_a8_resume,
-       .step = cortex_a8_step,
+       .halt = cortex_a_halt,
+       .resume = cortex_a_resume,
+       .step = cortex_a_step,
 
-       .assert_reset = cortex_a8_assert_reset,
-       .deassert_reset = cortex_a8_deassert_reset,
+       .assert_reset = cortex_a_assert_reset,
+       .deassert_reset = cortex_a_deassert_reset,
 
        /* REVISIT allow exporting VFP3 registers ... */
+       .get_gdb_arch = arm_get_gdb_arch,
        .get_gdb_reg_list = arm_get_gdb_reg_list,
 
-       .read_memory = cortex_a8_read_memory,
-       .write_memory = cortex_a8_write_memory,
+       .read_memory = cortex_a_read_phys_memory,
+       .write_memory = cortex_a_write_phys_memory,
 
        .checksum_memory = arm_checksum_memory,
        .blank_check_memory = arm_blank_check_memory,
 
        .run_algorithm = armv4_5_run_algorithm,
 
-       .add_breakpoint = cortex_a8_add_breakpoint,
-       .add_context_breakpoint = cortex_a8_add_context_breakpoint,
-       .add_hybrid_breakpoint = cortex_a8_add_hybrid_breakpoint,
-       .remove_breakpoint = cortex_a8_remove_breakpoint,
+       .add_breakpoint = cortex_a_add_breakpoint,
+       .add_context_breakpoint = cortex_a_add_context_breakpoint,
+       .add_hybrid_breakpoint = cortex_a_add_hybrid_breakpoint,
+       .remove_breakpoint = cortex_a_remove_breakpoint,
        .add_watchpoint = NULL,
        .remove_watchpoint = NULL,
 
        .commands = cortex_r4_command_handlers,
        .target_create = cortex_r4_target_create,
-       .init_target = cortex_a8_init_target,
-       .examine = cortex_a8_examine,
+       .target_jim_configure = adiv5_jim_configure,
+       .init_target = cortex_a_init_target,
+       .examine = cortex_a_examine,
+       .deinit_target = cortex_a_deinit_target,
 };

Linking to existing account procedure

If you already have an account and want to add another login method you MUST first sign in with your existing account and then change URL to read https://review.openocd.org/login/?link to get to this page again but this time it'll work for linking. Thank you.

SSH host keys fingerprints

1024 SHA256:YKx8b7u5ZWdcbp7/4AeXNaqElP49m6QrwfXaqQGJAOk gerrit-code-review@openocd.zylin.com (DSA)
384 SHA256:jHIbSQa4REvwCFG4cq5LBlBLxmxSqelQPem/EXIrxjk gerrit-code-review@openocd.org (ECDSA)
521 SHA256:UAOPYkU9Fjtcao0Ul/Rrlnj/OsQvt+pgdYSZ4jOYdgs gerrit-code-review@openocd.org (ECDSA)
256 SHA256:A13M5QlnozFOvTllybRZH6vm7iSt0XLxbA48yfc2yfY gerrit-code-review@openocd.org (ECDSA)
256 SHA256:spYMBqEYoAOtK7yZBrcwE8ZpYt6b68Cfh9yEVetvbXg gerrit-code-review@openocd.org (ED25519)
+--[ED25519 256]--+
|=..              |
|+o..   .         |
|*.o   . .        |
|+B . . .         |
|Bo. = o S        |
|Oo.+ + =         |
|oB=.* = . o      |
| =+=.+   + E     |
|. .=o   . o      |
+----[SHA256]-----+
2048 SHA256:0Onrb7/PHjpo6iVZ7xQX2riKN83FJ3KGU0TvI0TaFG4 gerrit-code-review@openocd.zylin.com (RSA)