startup_ARMv8x1_GCC

来源：互联网发布：Windows7远程端口设置编辑：程序博客网时间：2024/06/03 02:22

startup.S

start64:    //gshen为各个ELx配置终端向量    //    // program the VBARs    //    ldr x1, =el1_vectors    msr VBAR_EL1, x1    ldr x1, =el2_vectors    msr VBAR_EL2, x1    ldr x1, =el3_vectors    msr VBAR_EL3, x1    //SCR_EL3, Secure Configuration Register    msr SCR_EL3, xzr  // Ensure NS bit is initially clear, so secure copy of ICC_SRE_EL1 can be configured    isb    mov x0, #15    msr ICC_SRE_EL3, x0    isb    msr ICC_SRE_EL1, x0 // Secure copy of ICC_SRE_EL1    //    // set lower exception levels as non-secure, with no access    // back to EL2 or EL3, and are AArch64 capable    //    mov x3, #(SCR_EL3_RW  | \              SCR_EL3_SMD | \              SCR_EL3_NS)      // Set NS bit, to access Non-secure registers    msr SCR_EL3, x3    isb    mov x0, #15    msr ICC_SRE_EL2, x0    isb    msr ICC_SRE_EL1, x0 // Non-secure copy of ICC_SRE_EL1    //    // no traps or VM modifications from the Hypervisor, EL1 is AArch64    //    mov x2, #HCR_EL2_RW    msr HCR_EL2, x2    //    // VMID is still significant, even when virtualisation is not    // being used, so ensure VTTBR_EL2 is properly initialised    //    msr VTTBR_EL2, xzr    //    // VMPIDR_EL2 holds the value of the Virtualization Multiprocessor ID. This is the value returned by Non-secure EL1 reads of MPIDR_EL1.    //  VPIDR_EL2 holds the value of the Virtualization Processor ID. This is the value returned by Non-secure EL1 reads of MIDR_EL1.    // Both of these registers are architecturally UNKNOWN at reset, and so they must be set to the correct value    // (even if EL2/virtualization is not being used), otherwise non-secure EL1 reads of MPIDR_EL1/MIDR_EL1 will return garbage values.    // This guarantees that any future reads of MPIDR_EL1 and MIDR_EL1 from Non-secure EL1 will return the correct value.    //    // keep MPIDR_EL1.Aff0 (i.e. the CPU no. on Cortex-A cores) in    // x19 (defined by the AAPCS as callee-saved), so we can re-use    // the number later    //    mrs x0, MPIDR_EL1    ubfx x19, x0, #MPIDR_EL1_AFF0_LSB, #MPIDR_EL1_AFF_WIDTH    msr VMPIDR_EL2, x0    mrs x0, MIDR_EL1    msr VPIDR_EL2, x0    //    // neither EL3 nor EL2 trap floating point or accesses to CPACR    //    msr CPTR_EL3, xzr    msr CPTR_EL2, xzr    //    // SCTLR_ELx may come out of reset with UNKNOWN values so we will    // set the fields to 0 except, possibly, the endianess field(s).    // Note that setting SCTLR_EL2 or the EL0 related fields of SCTLR_EL1    // is not strictly needed, since we're never in EL2 or EL0    //#ifdef __ARM_BIG_ENDIAN    mov x0, #(SCTLR_ELx_EE | SCTLR_EL1_E0E)#else    mov x0, #0#endif    msr SCTLR_EL3, x0    msr SCTLR_EL2, x0    msr SCTLR_EL1, x0#ifdef CORTEXA    //    // Configure ACTLR_EL[23]    // ----------------------    //    // These bits are IMPLEMENTATION DEFINED, so are different for    // different processors    //    // For Cortex-A57, the controls we set are:    //    //  Enable lower level access to CPUACTLR_EL1    //  Enable lower level access to CPUECTLR_EL1    //  Enable lower level access to L2CTLR_EL1    //  Enable lower level access to L2ECTLR_EL1    //  Enable lower level access to L2ACTLR_EL1    //    mov x0, #((1 << 0) | \              (1 << 1) | \              (1 << 4) | \              (1 << 5) | \              (1 << 6))    msr ACTLR_EL3, x0    msr ACTLR_EL2, x0    //    // configure CPUECTLR_EL1    //    // These bits are IMP DEF, so need to different for different    // processors    //    // SMPEN - bit 6 - Enables the processor to receive cache    //                 and TLB maintenance operations    //    // Note: For Cortex-A57/53 SMPEN should be set before enabling    //       the caches and MMU, or performing any cache and TLB    //       maintenance operations.    //    //       This register has a defined reset value, so we use a    //       read-modify-write sequence to set SMPEN    //    mrs x0, S3_1_c15_c2_1  // Read EL1 CPU Extended Control Register    orr x0, x0, #(1 << 6)  // Set the SMPEN bit    msr S3_1_c15_c2_1, x0  // Write EL1 CPU Extended Control Register    isb#endif    //    // That's the last of the control settings for now    //    // Note: no ISB after all these changes, as registers won't be    // accessed until after an exception return, which is itself a    // context synchronisation event    //    //    // Setup some EL3 stack space, ready for calling some subroutines, below.    //    // Stack space allocation is CPU-specific, so use CPU    // number already held in x19    //    // 2^12 bytes per CPU for the EL3 stacks    //    ldr x0, =__el3_stack    sub x0, x0, x19, lsl #12    mov sp, x0    //    // we need to configure the GIC while still in secure mode, specifically    // all PPIs and SPIs have to be programmed as Group1 interrupts    //    //    // Before the GIC can be reliably programmed, we need to    // enable Affinity Routing, as this affects where the configuration    // registers are (with Affinity Routing enabled, some registers are    // in the Redistributor, whereas those same registers are in the    // Distributor with Affinity Routing disabled (i.e. when in GICv2    // compatibility mode).    //    mov x0, #(1 << 4) | (1 << 5) // gicdctlr_ARE_S | gicdctlr_ARE_NS    mov x1, x19    bl  SyncAREinGICD    //    // The Redistributor comes out of reset assuming the processor is    // asleep - correct that assumption    //    mov w0, w19    bl  WakeupGICR    //    // Now we're ready to set security and other initialisations    //    // This is a per-CPU configuration for these interrupts    //    // for the first cluster, CPU number is the redistributor index    //    mov w0, w19    mov w1, #1    // gicigroupr_G1NS    bl  SetPrivateIntSecurityBlock    //    // While we're in the Secure World, set the priority mask low enough    // for it to be writable in the Non-Secure World    //    //mov x0, #16 << 3    // 5 bits of priority in the Secure world    mov x0, #0xFF  // for Non-Secure interrupts    msr ICC_PMR_EL1, x0    //    // there's more GIC setup to do, but only for the primary CPU    //    cbnz x19, drop_to_el1    //    // There's more to do to the GIC - call the utility routine to set    // all SPIs to Group1    //    mov w0, #1    // gicigroupr_G1NS    bl  SetSPISecurityAll    //    // Set up EL1 entry point and "dummy" exception return information,    // then perform exception return to enter EL1    //    .global drop_to_el1drop_to_el1:    adr x1, el1_entry_aarch64    msr ELR_EL3, x1    mov x1, #(AARCH64_SPSR_EL1h | \              AARCH64_SPSR_F  | \              AARCH64_SPSR_I  | \              AARCH64_SPSR_A)    msr SPSR_EL3, x1    //gshen通过eret跳转到EL1    eret// ------------------------------------------------------------// EL1 - Common start-up code// ------------------------------------------------------------    .global el1_entry_aarch64    .type el1_entry_aarch64, "function"el1_entry_aarch64:    //    // Now we're in EL1, setup the application stack    // the scatter file allocates 2^14 bytes per app stack    //    ldr x0, =__stack    sub x0, x0, x19, lsl #14    mov sp, x0    //    // Enable floating point    //    mov x0, #CPACR_EL1_FPEN    msr CPACR_EL1, x0    //    // Invalidate caches and TLBs for all stage 1    // translations used at EL1    //    // Cortex-A processors automatically invalidate their caches on reset    // (unless suppressed with the DBGL1RSTDISABLE or L2RSTDISABLE pins).    // It is therefore not necessary for software to invalidate the caches     // on startup, however, this is done here in case of a warm reset.    bl  InvalidateUDCaches    tlbi VMALLE1    //    // Set TTBR0 Base address    //    // The CPUs share one set of translation tables that are    // generated by CPU0 at run-time    //    // TTBR1_EL1 is not used in this example    //    ldr x1, =__ttb0_l1    msr TTBR0_EL1, x1    //    // Set up memory attributes    //    // These equate to:    //    // 0 -> 0b01000100 = 0x00000044 = Normal, Inner/Outer Non-Cacheable    // 1 -> 0b11111111 = 0x0000ff00 = Normal, Inner/Outer WriteBack Read/Write Allocate    // 2 -> 0b00000100 = 0x00040000 = Device-nGnRE    //    mov  x1, #0xff44    movk x1, #4, LSL #16    // equiv to: movk x1, #0x0000000000040000    msr MAIR_EL1, x1    //    // Set up TCR_EL1    //    // We're using only TTBR0 (EPD1 = 1), and the page table entries:    //  - are using an 8-bit ASID from TTBR0    //  - have a 4K granularity (TG0 = 0b00)    //  - are outer-shareable (SH0 = 0b10)    //  - are using Inner & Outer WBWA Normal memory ([IO]RGN0 = 0b01)    //  - map    //      + 32 bits of VA space (T0SZ = 0x20)    //      + into a 32-bit PA space (IPS = 0b000)    //    //     36   32   28   24   20   16   12    8    4    0    //  -----+----+----+----+----+----+----+----+----+----+    //       |    |    |OOII|    |    |    |OOII|    |    |    //    TT |    |    |RRRR|E T |   T|    |RRRR|E T |   T|    //    BB | I I|TTSS|GGGG|P 1 |   1|TTSS|GGGG|P 0 |   0|    //    IIA| P P|GGHH|NNNN|DAS |   S|GGHH|NNNN|D S |   S|    //    10S| S-S|1111|1111|11Z-|---Z|0000|0000|0 Z-|---Z|    //    //    000 0000 0000 0000 1000 0000 0010 0101 0010 0000    //    //                    0x    8    0    2    5    2    0    //    // Note: the ISB is needed to ensure the changes to system    //       context are before the write of SCTLR_EL1.M to enable    //       the MMU. It is likely on a "real" implementation that    //       this setup would work without an ISB, due to the    //       amount of code that gets executed before enabling the    //       MMU, but that would not be architecturally correct.    //    ldr x1, =0x0000000000802520    msr TCR_EL1, x1    isb    //    // x19 already contains the CPU number, so branch to secondary    // code if we're not on CPU0    //    cbnz x19, el1_secondary    //    // Fall through to primary code    ////// ------------------------------------------------------------//// EL1 - primary CPU init code//// This code is run on CPU0, while the other CPUs are in the// holding pen//    .global el1_primary    .type el1_primary, "function"el1_primary:    //    // We're now on the primary processor in the NS world: turn on    // the banked GIC distributor enable, ready for individual CPU    // enables later    //    mov w0, #(1 << 1)  // gicdctlr_EnableGrp1A    bl  EnableGICD    //    // Generate TTBR0 L1    //    // at 4KB granularity, 32-bit VA space, table lookup starts at    // L1, with 1GB regions    //    // we are going to create entries pointing to L2 tables for a    // couple of these 1GB regions, the first of which is the    // RAM on the VE board model - get the table addresses and    // start by emptying out the L1 page tables (4 entries at L1    // for a 4K granularity)    //    // x21 = address of L1 tables    //    ldr x21, =__ttb0_l1    mov x0, x21    mov x1, #(4 << 3)    bl  ZeroBlock    //    // time to start mapping the RAM regions - clear out the    // L2 tables and point to them from the L1 tables    //    // x22 = address of L2 tables, needs to be remembered in case    //       we want to re-use the tables for mapping peripherals    //    ldr x22, =__ttb0_l2_ram    mov x1, #(512 << 3)    mov x0, x22    bl  ZeroBlock    //    // Get the start address of RAM (the EXEC region) into x4    // and calculate the offset into the L1 table (1GB per region,    // max 4GB)    //    // x23 = L1 table offset, saved for later comparison against    //       peripheral offset    //    ldr x4, =__code_start    ubfx x23, x4, #30, #2    orr x1, x22, #TT_S1_ATTR_PAGE    str x1, [x21, x23, lsl #3]    //    // we've already used the RAM start address in x4 - we now need    // to get this in terms of an offset into the L2 page tables,    // where each entry covers 2MB    //    ubfx x2, x4, #21, #9    //    // TOP_OF_RAM in the scatter file marks the end of the    // Execute region in RAM: convert the end of this region to an    // offset too, being careful to round up, then calculate the    // number of entries to write    //    ldr x5, =__top_of_ram    sub  x3, x5, #1    ubfx x3, x3, #21, #9    add  x3, x3, #1    sub  x3, x3, x2    //    // set x1 to the required page table attributes, then orr    // in the start address (modulo 2MB)    //    // L2 tables in our configuration cover 2MB per entry - map    // memory as Shared, Normal WBWA (MAIR[1]) with a flat    // VA->PA translation    //    bic x4, x4, #((1 << 21) - 1)    mov x1, #(TT_S1_ATTR_BLOCK | \             (1 << TT_S1_ATTR_MATTR_LSB) | \              TT_S1_ATTR_NS | \              TT_S1_ATTR_AP_RW_PL1 | \              TT_S1_ATTR_SH_INNER | \              TT_S1_ATTR_AF | \              TT_S1_ATTR_nG)    orr x1, x1, x4    //    // factor the offset into the page table address and then write    // the entries    //    add x0, x22, x2, lsl #3loop1:    subs x3, x3, #1    str x1, [x0], #8    add x1, x1, #0x200, LSL #12    // equiv to add x1, x1, #(1 << 21)  // 2MB per entry    bne loop1    //    // now mapping the Peripheral regions - clear out the    // L2 tables and point to them from the L1 tables    //    // The assumption here is that all peripherals live within    // a common 1GB region (i.e. that there's a single set of    // L2 pages for all the peripherals). We only use a UART    // and the GIC in this example, so the assumption is sound    //    // x24 = address of L2 peripheral tables    //    ldr x24, =__ttb0_l2_periph    //    // get the GICD address into x4 and calculate    // the offset into the L1 table    //    // x25 = L1 table offset    //    ldr x4, =gicd    ubfx x25, x4, #30, #2    //    // here's the tricky bit: it's possible that the peripherals are    // in the same 1GB region as the RAM, in which case we don't need    // to prime a separate set of L2 page tables, nor add them to the    // L1 tables    //    // if we're going to re-use the TTB0_L2_RAM tables, get their    // address into x24, which is used later on to write the PTEs    //    cmp x25, x23    csel x24, x22, x24, EQ    b.eq nol2setup    //    // Peripherals are in a separate 1GB region, and so have their own    // set of L2 tables - clean out the tables and add them to the L1    // table    //    mov x0, x24    mov x1, #512 << 3    bl  ZeroBlock    orr x1, x24, #TT_S1_ATTR_PAGE    str x1, [x21, x25, lsl #3]    //    // there's only going to be a single 2MB region for GICD (in    // x4) - get this in terms of an offset into the L2 page tables    //    // with larger systems, it is possible that the GIC redistributor    // registers require extra 2MB pages, in which case extra code    // would be required here    //nol2setup:    ubfx x2, x4, #21, #9    //    // set x1 to the required page table attributes, then orr    // in the start address (modulo 2MB)    //    // L2 tables in our configuration cover 2MB per entry - map    // memory as NS Device-nGnRE (MAIR[2]) with a flat VA->PA    // translation    //    bic x4, x4, #((1 << 21) - 1)  // start address mod 2MB    mov x1, #(TT_S1_ATTR_BLOCK | \             (2 << TT_S1_ATTR_MATTR_LSB) | \              TT_S1_ATTR_NS | \              TT_S1_ATTR_AP_RW_PL1 | \              TT_S1_ATTR_AF | \              TT_S1_ATTR_nG)    orr x1, x1, x4    //    // only a single L2 entry for this, so no loop as we have for RAM, above    //    str x1, [x24, x2, lsl #3]    //    // we have CS3_PERIPHERALS that include the UART controller    //    // Again, the code is making assumptions - this time that the CS3_PERIPHERALS    // region uses the same 1GB portion of the address space as the GICD,    // and thus shares the same set of L2 page tables    //    // Get CS3_PERIPHERALS address into x4 and calculate the offset into the    // L2 tables    //    ldr x4, =__cs3_peripherals    ubfx x2, x4, #21, #9    //    // set x1 to the required page table attributes, then orr    // in the start address (modulo 2MB)    //    // L2 tables in our configuration cover 2MB per entry - map    // memory as NS Device-nGnRE (MAIR[2]) with a flat VA->PA    // translation    //    bic x4, x4, #((1 << 21) - 1)  // start address mod 2MB    mov x1, #(TT_S1_ATTR_BLOCK | \             (2 << TT_S1_ATTR_MATTR_LSB) | \              TT_S1_ATTR_NS | \              TT_S1_ATTR_AP_RW_PL1 | \              TT_S1_ATTR_AF | \              TT_S1_ATTR_nG)    orr x1, x1, x4    //    // only a single L2 entry again - write it    //    str x1, [x24, x2, lsl #3]    //    // issue a barrier to ensure all table entry writes are complete    //    dsb ish    //    // Enable the MMU.  Caches will be enabled later, after scatterloading.    //    mrs x1, SCTLR_EL1    orr x1, x1, #SCTLR_ELx_M    bic x1, x1, #SCTLR_ELx_A // Disable alignment fault checking.  To enable, change bic to orr    msr SCTLR_EL1, x1    isb    //    // The ARM Architecture Reference Manual for ARMv8-A states:    //    //     Instruction accesses to Non-cacheable Normal memory can be held in instruction caches.    //     Correspondingly, the sequence for ensuring that modifications to instructions are available    //     for execution must include invalidation of the modified locations from the instruction cache,    //     even if the instructions are held in Normal Non-cacheable memory.    //     This includes cases where the instruction cache is disabled.    //    dsb ish     // ensure all previous stores have completed before invalidating    ic  ialluis // I cache invalidate all inner shareable to PoU (which includes secondary cores)    dsb ish     // ensure completion on inner shareable domain   (which includes secondary cores)    isb    // Scatter-loading is complete, so enable the caches here, so that the C-library's mutex initialization later will work    mrs x1, SCTLR_EL1    orr x1, x1, #SCTLR_ELx_C    orr x1, x1, #SCTLR_ELx_I    msr SCTLR_EL1, x1    isb    // Zero the bss    ldr x0, =__bss_start__ // Start of block    mov x1, #0             // Fill value    ldr x2, =__bss_end__   // End of block    sub x2, x2, x0         // Length of block    bl  memset    // Set up the standard file handles    bl  initialise_monitor_handles    // Set up _fini and fini_array to be called at exit    ldr x0, =__libc_fini_array    bl  atexit    // Call preinit_array, _init and init_array    bl  __libc_init_array    // Set argc = 1, argv[0] = "" and then call main    .pushsection .data    .align 3argv:    .dword arg0    .dword 0arg0:    .byte 0    .popsection    mov x0, #1    ldr x1, =argv    bl main    b exit // Will not return// ------------------------------------------------------------// EL1 - secondary CPU init code//// This code is run on CPUs 1, 2, 3 etc....// ------------------------------------------------------------    .global el1_secondary    .type el1_secondary, "function"el1_secondary:loop_wfi:    dsb SY      // Clear all pending data accesses    wfi         // Go to sleep

timer_interrupts.c

/* Bare-metal example for ARMv8 Foundation Platform model *//* Timer and interrupts *//* Copyright (C) ARM Limited, 2016. All rights reserved. */#include <stdio.h>#include "GICv3.h"#include "GICv3_gicc.h"#include "sp804_timer.h"// LED Base address#define LED_BASE (volatile unsigned int *)0x1C010008void nudge_leds(void) // Move LEDs along{    static int state = 1;    static int value = 1;    if (state)    {        int max = (1 << 7);        value <<= 1;        if (value == max)            state = 0;    }    else    {        value >>= 1;        if (value == 1)            state = 1;    }    *LED_BASE = value;  // Update LEDs hardware}// Initialize Timer 0 and Interrupt Controllervoid init_timer(void){    // Enable interrupts    __asm("MSR DAIFClr, #0xF");    setICC_IGRPEN1_EL1(igrpEnable);    // Configure the SP804 timer to generate an interrupt    setTimerBaseAddress(0x1C110000);    initTimer(0x8000, SP804_AUTORELOAD, SP804_GENERATE_IRQ);    startTimer();    // The SP804 timer generates SPI INTID 34.  Enable    // this ID, and route it to core 0.0.0.0 (this one!)    SetSPIRoute(34, 0, gicdirouter_ModeSpecific);    // Route INTID 34 to 0.0.0.0 (this core)    SetSPIPriority(34, 0);                           // Set INTID 34 to priority to 0    ConfigureSPI(34, gicdicfgr_Level);               // Set INTID 34 as level-sensitive    EnableSPI(34);                                   // Enable INTID 34}// --------------------------------------------------------void irqHandler(void){  unsigned int ID;  ID = getICC_IAR1(); // readIntAck();  // Check for reserved IDs  if ((1020 <= ID) && (ID <= 1023))  {      printf("irqHandler() - Reserved INTID %d\n\n", ID);      return;  }  switch(ID)  {    case 34:      // Dual-Timer 0 (SP804)      printf("irqHandler() - External timer interrupt\n\n");      nudge_leds();      clearTimerIrq();      break;    default:      // Unexpected ID value      printf("irqHandler() - Unexpected INTID %d\n\n", ID);      break;  }  // Write the End of Interrupt register to tell the GIC  // we've finished handling the interrupt  setICC_EOIR1(ID); // writeAliasedEOI(ID);}// --------------------------------------------------------// Not actually used in this example, but provided for completenessvoid fiqHandler(void){  unsigned int ID;  unsigned int aliased = 0;  ID = getICC_IAR0(); // readIntAck();  printf("fiqHandler() - Read %d from IAR0\n", ID);  // Check for reserved IDs  if ((1020 <= ID) && (ID <= 1023))  {    printf("fiqHandler() - Reserved INTID %d\n\n", ID);    ID = getICC_IAR1(); // readAliasedIntAck();    printf("fiqHandler() - Read %d from AIAR\n", ID);    aliased = 1;    // If still spurious then simply return    if ((1020 <= ID) && (ID <= 1023))        return;  }  switch(ID)  {    case 34:      // Dual-Timer 0 (SP804)      printf("fiqHandler() - External timer interrupt\n\n");      clearTimerIrq();      break;    default:      // Unexpected ID value      printf("fiqHandler() - Unexpected INTID %d\n\n", ID);      break;  }  // Write the End of Interrupt register to tell the GIC  // we've finished handling the interrupt  // NOTE: If the ID was read from the Aliased IAR, then  // the aliased EOI register must be used  if (aliased == 0)    setICC_EOIR0(ID); // writeEOI(ID);  else    setICC_EOIR1(ID); // writeAliasedEOI(ID);}

0 0