/*
 * OpenRISC head.S
 *
 * Linux architectural port borrowing liberally from similar works of
 * others.  All original copyrights apply as per the original source
 * declaration.
 *
 * Modifications for the OpenRISC architecture:
 * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
 * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/linkage.h>
#include <linux/threads.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/serial_reg.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/pgtable.h>
#include <asm/cache.h>
#include <asm/spr_defs.h>
#include <asm/asm-offsets.h>
#include <linux/of_fdt.h>

#define tophys(rd,rs)				\
	l.movhi	rd,hi(-KERNELBASE)		;\
	l.add	rd,rd,rs

#define CLEAR_GPR(gpr)				\
	l.or    gpr,r0,r0

#define LOAD_SYMBOL_2_GPR(gpr,symbol)		\
	l.movhi gpr,hi(symbol)			;\
	l.ori   gpr,gpr,lo(symbol)


#define UART_BASE_ADD      0x90000000

#define EXCEPTION_SR  (SPR_SR_DME | SPR_SR_IME | SPR_SR_DCE | SPR_SR_ICE | SPR_SR_SM)
#define SYSCALL_SR  (SPR_SR_DME | SPR_SR_IME | SPR_SR_DCE | SPR_SR_ICE | SPR_SR_IEE | SPR_SR_TEE | SPR_SR_SM)

/* ============================================[ tmp store locations ]=== */

/*
 * emergency_print temporary stores
 */
#define EMERGENCY_PRINT_STORE_GPR4	l.sw    0x20(r0),r4
#define EMERGENCY_PRINT_LOAD_GPR4	l.lwz   r4,0x20(r0)

#define EMERGENCY_PRINT_STORE_GPR5	l.sw    0x24(r0),r5
#define EMERGENCY_PRINT_LOAD_GPR5	l.lwz   r5,0x24(r0)

#define EMERGENCY_PRINT_STORE_GPR6	l.sw    0x28(r0),r6
#define EMERGENCY_PRINT_LOAD_GPR6	l.lwz   r6,0x28(r0)

#define EMERGENCY_PRINT_STORE_GPR7	l.sw    0x2c(r0),r7
#define EMERGENCY_PRINT_LOAD_GPR7	l.lwz   r7,0x2c(r0)

#define EMERGENCY_PRINT_STORE_GPR8	l.sw    0x30(r0),r8
#define EMERGENCY_PRINT_LOAD_GPR8	l.lwz   r8,0x30(r0)

#define EMERGENCY_PRINT_STORE_GPR9	l.sw    0x34(r0),r9
#define EMERGENCY_PRINT_LOAD_GPR9	l.lwz   r9,0x34(r0)


/*
 * TLB miss handlers temorary stores
 */
#define EXCEPTION_STORE_GPR9		l.sw    0x10(r0),r9
#define EXCEPTION_LOAD_GPR9		l.lwz   r9,0x10(r0)

#define EXCEPTION_STORE_GPR2		l.sw    0x64(r0),r2
#define EXCEPTION_LOAD_GPR2		l.lwz   r2,0x64(r0)

#define EXCEPTION_STORE_GPR3		l.sw    0x68(r0),r3
#define EXCEPTION_LOAD_GPR3		l.lwz   r3,0x68(r0)

#define EXCEPTION_STORE_GPR4		l.sw    0x6c(r0),r4
#define EXCEPTION_LOAD_GPR4		l.lwz   r4,0x6c(r0)

#define EXCEPTION_STORE_GPR5		l.sw    0x70(r0),r5
#define EXCEPTION_LOAD_GPR5		l.lwz   r5,0x70(r0)

#define EXCEPTION_STORE_GPR6		l.sw    0x74(r0),r6
#define EXCEPTION_LOAD_GPR6		l.lwz   r6,0x74(r0)


/*
 * EXCEPTION_HANDLE temporary stores
 */

#define EXCEPTION_T_STORE_GPR30		l.sw    0x78(r0),r30
#define EXCEPTION_T_LOAD_GPR30(reg)	l.lwz   reg,0x78(r0)

#define EXCEPTION_T_STORE_GPR10		l.sw    0x7c(r0),r10
#define EXCEPTION_T_LOAD_GPR10(reg)	l.lwz   reg,0x7c(r0)

#define EXCEPTION_T_STORE_SP		l.sw	0x80(r0),r1
#define EXCEPTION_T_LOAD_SP(reg)	l.lwz   reg,0x80(r0)

/*
 * For UNHANLDED_EXCEPTION
 */

#define EXCEPTION_T_STORE_GPR31		l.sw    0x84(r0),r31
#define EXCEPTION_T_LOAD_GPR31(reg)	l.lwz   reg,0x84(r0)

/* =========================================================[ macros ]=== */


#define GET_CURRENT_PGD(reg,t1)					\
	LOAD_SYMBOL_2_GPR(reg,current_pgd)			;\
	tophys  (t1,reg)					;\
	l.lwz   reg,0(t1)


/*
 * DSCR: this is a common hook for handling exceptions. it will save
 *       the needed registers, set up stack and pointer to current
 *	 then jump to the handler while enabling MMU
 *
 * PRMS: handler	- a function to jump to. it has to save the
 *			remaining registers to kernel stack, call
 *			appropriate arch-independant exception handler
 *			and finaly jump to ret_from_except
 *
 * PREQ: unchanged state from the time exception happened
 *
 * POST: SAVED the following registers original value
 *	       to the new created exception frame pointed to by r1
 *
 *	 r1  - ksp	pointing to the new (exception) frame
 *	 r4  - EEAR     exception EA
 *	 r10 - current	pointing to current_thread_info struct
 *	 r12 - syscall  0, since we didn't come from syscall
 *	 r13 - temp	it actually contains new SR, not needed anymore
 *	 r31 - handler	address of the handler we'll jump to
 *
 *	 handler has to save remaining registers to the exception
 *	 ksp frame *before* tainting them!
 *
 * NOTE: this function is not reentrant per se. reentrancy is guaranteed
 *       by processor disabling all exceptions/interrupts when exception
 *	 accours.
 *
 * OPTM: no need to make it so wasteful to extract ksp when in user mode
 */

#define EXCEPTION_HANDLE(handler)				\
	EXCEPTION_T_STORE_GPR30					;\
	l.mfspr r30,r0,SPR_ESR_BASE				;\
	l.andi  r30,r30,SPR_SR_SM				;\
	l.sfeqi r30,0						;\
	EXCEPTION_T_STORE_GPR10					;\
	l.bnf   2f                            /* kernel_mode */	;\
	 EXCEPTION_T_STORE_SP                 /* delay slot */	;\
1: /* user_mode:   */						;\
	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
	tophys  (r30,r1)					;\
	/* r10: current_thread_info  */				;\
	l.lwz   r10,0(r30)					;\
	tophys  (r30,r10)					;\
	l.lwz   r1,(TI_KSP)(r30)				;\
	/* fall through */					;\
2: /* kernel_mode: */						;\
	/* create new stack frame, save only needed gprs */	;\
	/* r1: KSP, r10: current, r4: EEAR, r31: __pa(KSP) */	;\
	/* r12:	temp, syscall indicator */			;\
	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
	/* r1 is KSP, r30 is __pa(KSP) */			;\
	tophys  (r30,r1)					;\
	l.sw    PT_GPR12(r30),r12				;\
	l.mfspr r12,r0,SPR_EPCR_BASE				;\
	l.sw    PT_PC(r30),r12					;\
	l.mfspr r12,r0,SPR_ESR_BASE				;\
	l.sw    PT_SR(r30),r12					;\
	/* save r30 */						;\
	EXCEPTION_T_LOAD_GPR30(r12)				;\
	l.sw	PT_GPR30(r30),r12				;\
	/* save r10 as was prior to exception */		;\
	EXCEPTION_T_LOAD_GPR10(r12)				;\
	l.sw	PT_GPR10(r30),r12				;\
	/* save PT_SP as was prior to exception */		;\
	EXCEPTION_T_LOAD_SP(r12)				;\
	l.sw	PT_SP(r30),r12					;\
	/* save exception r4, set r4 = EA */			;\
	l.sw	PT_GPR4(r30),r4					;\
	l.mfspr r4,r0,SPR_EEAR_BASE				;\
	/* r12 == 1 if we come from syscall */			;\
	CLEAR_GPR(r12)						;\
	/* ----- turn on MMU ----- */				;\
	l.ori	r30,r0,(EXCEPTION_SR)				;\
	l.mtspr	r0,r30,SPR_ESR_BASE				;\
	/* r30:	EA address of handler */			;\
	LOAD_SYMBOL_2_GPR(r30,handler)				;\
	l.mtspr r0,r30,SPR_EPCR_BASE				;\
	l.rfe

/*
 * this doesn't work
 *
 *
 * #ifdef CONFIG_JUMP_UPON_UNHANDLED_EXCEPTION
 * #define UNHANDLED_EXCEPTION(handler)				\
 *	l.ori   r3,r0,0x1					;\
 *	l.mtspr r0,r3,SPR_SR					;\
 *      l.movhi r3,hi(0xf0000100)				;\
 *      l.ori   r3,r3,lo(0xf0000100)				;\
 *	l.jr	r3						;\
 *	l.nop	1
 *
 * #endif
 */

/* DSCR: this is the same as EXCEPTION_HANDLE(), we are just
 *       a bit more carefull (if we have a PT_SP or current pointer
 *       corruption) and set them up from 'current_set'
 *
 */
#define UNHANDLED_EXCEPTION(handler)				\
	EXCEPTION_T_STORE_GPR31					;\
	EXCEPTION_T_STORE_GPR10					;\
	EXCEPTION_T_STORE_SP					;\
	/* temporary store r3, r9 into r1, r10 */		;\
	l.addi	r1,r3,0x0					;\
	l.addi	r10,r9,0x0					;\
	/* the string referenced by r3 must be low enough */	;\
	l.jal	_emergency_print				;\
	l.ori	r3,r0,lo(_string_unhandled_exception)		;\
	l.mfspr	r3,r0,SPR_NPC					;\
	l.jal	_emergency_print_nr				;\
	l.andi	r3,r3,0x1f00					;\
	/* the string referenced by r3 must be low enough */	;\
	l.jal	_emergency_print				;\
	l.ori	r3,r0,lo(_string_epc_prefix)			;\
	l.jal	_emergency_print_nr				;\
	l.mfspr	r3,r0,SPR_EPCR_BASE				;\
	l.jal	_emergency_print				;\
	l.ori	r3,r0,lo(_string_nl)				;\
	/* end of printing */					;\
	l.addi	r3,r1,0x0					;\
	l.addi	r9,r10,0x0					;\
	/* extract current, ksp from current_set */		;\
	LOAD_SYMBOL_2_GPR(r1,_unhandled_stack_top)		;\
	LOAD_SYMBOL_2_GPR(r10,init_thread_union)		;\
	/* create new stack frame, save only needed gprs */	;\
	/* r1: KSP, r10: current, r31: __pa(KSP) */		;\
	/* r12:	temp, syscall indicator, r13 temp */		;\
	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
	/* r1 is KSP, r31 is __pa(KSP) */			;\
	tophys  (r31,r1)					;\
	l.sw    PT_GPR12(r31),r12					;\
	l.mfspr r12,r0,SPR_EPCR_BASE				;\
	l.sw    PT_PC(r31),r12					;\
	l.mfspr r12,r0,SPR_ESR_BASE				;\
	l.sw    PT_SR(r31),r12					;\
	/* save r31 */						;\
	EXCEPTION_T_LOAD_GPR31(r12)				;\
	l.sw	PT_GPR31(r31),r12					;\
	/* save r10 as was prior to exception */		;\
	EXCEPTION_T_LOAD_GPR10(r12)				;\
	l.sw	PT_GPR10(r31),r12					;\
	/* save PT_SP as was prior to exception */			;\
	EXCEPTION_T_LOAD_SP(r12)				;\
	l.sw	PT_SP(r31),r12					;\
	l.sw    PT_GPR13(r31),r13					;\
	/* --> */						;\
	/* save exception r4, set r4 = EA */			;\
	l.sw	PT_GPR4(r31),r4					;\
	l.mfspr r4,r0,SPR_EEAR_BASE				;\
	/* r12 == 1 if we come from syscall */			;\
	CLEAR_GPR(r12)						;\
	/* ----- play a MMU trick ----- */			;\
	l.ori	r31,r0,(EXCEPTION_SR)				;\
	l.mtspr	r0,r31,SPR_ESR_BASE				;\
	/* r31:	EA address of handler */			;\
	LOAD_SYMBOL_2_GPR(r31,handler)				;\
	l.mtspr r0,r31,SPR_EPCR_BASE				;\
	l.rfe

/* =====================================================[ exceptions] === */

/* ---[ 0x100: RESET exception ]----------------------------------------- */
    .org 0x100
	/* Jump to .init code at _start which lives in the .head section
	 * and will be discarded after boot.
	 */
	LOAD_SYMBOL_2_GPR(r15, _start)
	tophys	(r13,r15)			/* MMU disabled */
	l.jr	r13
	 l.nop

/* ---[ 0x200: BUS exception ]------------------------------------------- */
    .org 0x200
_dispatch_bus_fault:
	EXCEPTION_HANDLE(_bus_fault_handler)

/* ---[ 0x300: Data Page Fault exception ]------------------------------- */
    .org 0x300
_dispatch_do_dpage_fault:
//      totaly disable timer interrupt
// 	l.mtspr	r0,r0,SPR_TTMR
//	DEBUG_TLB_PROBE(0x300)
//	EXCEPTION_DEBUG_VALUE_ER_ENABLED(0x300)
	EXCEPTION_HANDLE(_data_page_fault_handler)

/* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
    .org 0x400
_dispatch_do_ipage_fault:
//      totaly disable timer interrupt
//	l.mtspr	r0,r0,SPR_TTMR
//	DEBUG_TLB_PROBE(0x400)
//	EXCEPTION_DEBUG_VALUE_ER_ENABLED(0x400)
	EXCEPTION_HANDLE(_insn_page_fault_handler)

/* ---[ 0x500: Timer exception ]----------------------------------------- */
    .org 0x500
	EXCEPTION_HANDLE(_timer_handler)

/* ---[ 0x600: Aligment exception ]-------------------------------------- */
    .org 0x600
	EXCEPTION_HANDLE(_alignment_handler)

/* ---[ 0x700: Illegal insn exception ]---------------------------------- */
    .org 0x700
	EXCEPTION_HANDLE(_illegal_instruction_handler)

/* ---[ 0x800: External interrupt exception ]---------------------------- */
    .org 0x800
	EXCEPTION_HANDLE(_external_irq_handler)

/* ---[ 0x900: DTLB miss exception ]------------------------------------- */
    .org 0x900
	l.j	boot_dtlb_miss_handler
	l.nop

/* ---[ 0xa00: ITLB miss exception ]------------------------------------- */
    .org 0xa00
	l.j	boot_itlb_miss_handler
	l.nop

/* ---[ 0xb00: Range exception ]----------------------------------------- */
    .org 0xb00
	UNHANDLED_EXCEPTION(_vector_0xb00)

/* ---[ 0xc00: Syscall exception ]--------------------------------------- */
    .org 0xc00
	EXCEPTION_HANDLE(_sys_call_handler)

/* ---[ 0xd00: Trap exception ]------------------------------------------ */
    .org 0xd00
	UNHANDLED_EXCEPTION(_vector_0xd00)

/* ---[ 0xe00: Trap exception ]------------------------------------------ */
    .org 0xe00
//	UNHANDLED_EXCEPTION(_vector_0xe00)
	EXCEPTION_HANDLE(_trap_handler)

/* ---[ 0xf00: Reserved exception ]-------------------------------------- */
    .org 0xf00
	UNHANDLED_EXCEPTION(_vector_0xf00)

/* ---[ 0x1000: Reserved exception ]------------------------------------- */
    .org 0x1000
	UNHANDLED_EXCEPTION(_vector_0x1000)

/* ---[ 0x1100: Reserved exception ]------------------------------------- */
    .org 0x1100
	UNHANDLED_EXCEPTION(_vector_0x1100)

/* ---[ 0x1200: Reserved exception ]------------------------------------- */
    .org 0x1200
	UNHANDLED_EXCEPTION(_vector_0x1200)

/* ---[ 0x1300: Reserved exception ]------------------------------------- */
    .org 0x1300
	UNHANDLED_EXCEPTION(_vector_0x1300)

/* ---[ 0x1400: Reserved exception ]------------------------------------- */
    .org 0x1400
	UNHANDLED_EXCEPTION(_vector_0x1400)

/* ---[ 0x1500: Reserved exception ]------------------------------------- */
    .org 0x1500
	UNHANDLED_EXCEPTION(_vector_0x1500)

/* ---[ 0x1600: Reserved exception ]------------------------------------- */
    .org 0x1600
	UNHANDLED_EXCEPTION(_vector_0x1600)

/* ---[ 0x1700: Reserved exception ]------------------------------------- */
    .org 0x1700
	UNHANDLED_EXCEPTION(_vector_0x1700)

/* ---[ 0x1800: Reserved exception ]------------------------------------- */
    .org 0x1800
	UNHANDLED_EXCEPTION(_vector_0x1800)

/* ---[ 0x1900: Reserved exception ]------------------------------------- */
    .org 0x1900
	UNHANDLED_EXCEPTION(_vector_0x1900)

/* ---[ 0x1a00: Reserved exception ]------------------------------------- */
    .org 0x1a00
	UNHANDLED_EXCEPTION(_vector_0x1a00)

/* ---[ 0x1b00: Reserved exception ]------------------------------------- */
    .org 0x1b00
	UNHANDLED_EXCEPTION(_vector_0x1b00)

/* ---[ 0x1c00: Reserved exception ]------------------------------------- */
    .org 0x1c00
	UNHANDLED_EXCEPTION(_vector_0x1c00)

/* ---[ 0x1d00: Reserved exception ]------------------------------------- */
    .org 0x1d00
	UNHANDLED_EXCEPTION(_vector_0x1d00)

/* ---[ 0x1e00: Reserved exception ]------------------------------------- */
    .org 0x1e00
	UNHANDLED_EXCEPTION(_vector_0x1e00)

/* ---[ 0x1f00: Reserved exception ]------------------------------------- */
    .org 0x1f00
	UNHANDLED_EXCEPTION(_vector_0x1f00)

    .org 0x2000
/* ===================================================[ kernel start ]=== */

/*    .text*/

/* This early stuff belongs in HEAD, but some of the functions below definitely
 * don't... */

	__HEAD
	.global _start
_start:
	/* save kernel parameters */
	l.or	r25,r0,r3	/* pointer to fdt */

	/*
	 * ensure a deterministic start
	 */

	l.ori	r3,r0,0x1
	l.mtspr	r0,r3,SPR_SR

	CLEAR_GPR(r1)
	CLEAR_GPR(r2)
	CLEAR_GPR(r3)
	CLEAR_GPR(r4)
	CLEAR_GPR(r5)
	CLEAR_GPR(r6)
	CLEAR_GPR(r7)
	CLEAR_GPR(r8)
	CLEAR_GPR(r9)
	CLEAR_GPR(r10)
	CLEAR_GPR(r11)
	CLEAR_GPR(r12)
	CLEAR_GPR(r13)
	CLEAR_GPR(r14)
	CLEAR_GPR(r15)
	CLEAR_GPR(r16)
	CLEAR_GPR(r17)
	CLEAR_GPR(r18)
	CLEAR_GPR(r19)
	CLEAR_GPR(r20)
	CLEAR_GPR(r21)
	CLEAR_GPR(r22)
	CLEAR_GPR(r23)
	CLEAR_GPR(r24)
	CLEAR_GPR(r26)
	CLEAR_GPR(r27)
	CLEAR_GPR(r28)
	CLEAR_GPR(r29)
	CLEAR_GPR(r30)
	CLEAR_GPR(r31)

	/*
	 * set up initial ksp and current
	 */
	LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000)	// setup kernel stack
	LOAD_SYMBOL_2_GPR(r10,init_thread_union)	// setup current
	tophys	(r31,r10)
	l.sw	TI_KSP(r31), r1

	l.ori	r4,r0,0x0


	/*
	 * .data contains initialized data,
	 * .bss contains uninitialized data - clear it up
	 */
clear_bss:
	LOAD_SYMBOL_2_GPR(r24, __bss_start)
	LOAD_SYMBOL_2_GPR(r26, _end)
	tophys(r28,r24)
	tophys(r30,r26)
	CLEAR_GPR(r24)
	CLEAR_GPR(r26)
1:
	l.sw    (0)(r28),r0
	l.sfltu r28,r30
	l.bf    1b
	l.addi  r28,r28,4

enable_ic:
	l.jal	_ic_enable
	 l.nop

enable_dc:
	l.jal	_dc_enable
	 l.nop

flush_tlb:
	/*
	 *  I N V A L I D A T E   T L B   e n t r i e s
	 */
	LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0))
	LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0))
	l.addi	r7,r0,128 /* Maximum number of sets */
1:
	l.mtspr	r5,r0,0x0
	l.mtspr	r6,r0,0x0

	l.addi	r5,r5,1
	l.addi	r6,r6,1
	l.sfeq	r7,r0
	l.bnf	1b
	 l.addi	r7,r7,-1


/* The MMU needs to be enabled before or32_early_setup is called */

enable_mmu:
	/*
	 * enable dmmu & immu
	 * SR[5] = 0, SR[6] = 0, 6th and 7th bit of SR set to 0
	 */
	l.mfspr	r30,r0,SPR_SR
	l.movhi	r28,hi(SPR_SR_DME | SPR_SR_IME)
	l.ori	r28,r28,lo(SPR_SR_DME | SPR_SR_IME)
	l.or	r30,r30,r28
	l.mtspr	r0,r30,SPR_SR
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop

	// reset the simulation counters
	l.nop 5

	/* check fdt header magic word */
	l.lwz	r3,0(r25)	/* load magic from fdt into r3 */
	l.movhi	r4,hi(OF_DT_HEADER)
	l.ori	r4,r4,lo(OF_DT_HEADER)
	l.sfeq	r3,r4
	l.bf	_fdt_found
	 l.nop
	/* magic number mismatch, set fdt pointer to null */
	l.or	r25,r0,r0
_fdt_found:
	/* pass fdt pointer to or32_early_setup in r3 */
	l.or	r3,r0,r25
	LOAD_SYMBOL_2_GPR(r24, or32_early_setup)
	l.jalr r24
	 l.nop

clear_regs:
	/*
	 * clear all GPRS to increase determinism
	 */
	CLEAR_GPR(r2)
	CLEAR_GPR(r3)
	CLEAR_GPR(r4)
	CLEAR_GPR(r5)
	CLEAR_GPR(r6)
	CLEAR_GPR(r7)
	CLEAR_GPR(r8)
	CLEAR_GPR(r9)
	CLEAR_GPR(r11)
	CLEAR_GPR(r12)
	CLEAR_GPR(r13)
	CLEAR_GPR(r14)
	CLEAR_GPR(r15)
	CLEAR_GPR(r16)
	CLEAR_GPR(r17)
	CLEAR_GPR(r18)
	CLEAR_GPR(r19)
	CLEAR_GPR(r20)
	CLEAR_GPR(r21)
	CLEAR_GPR(r22)
	CLEAR_GPR(r23)
	CLEAR_GPR(r24)
	CLEAR_GPR(r25)
	CLEAR_GPR(r26)
	CLEAR_GPR(r27)
	CLEAR_GPR(r28)
	CLEAR_GPR(r29)
	CLEAR_GPR(r30)
	CLEAR_GPR(r31)

jump_start_kernel:
	/*
	 * jump to kernel entry (start_kernel)
	 */
	LOAD_SYMBOL_2_GPR(r30, start_kernel)
	l.jr    r30
	 l.nop

/* ========================================[ cache ]=== */

	/* aligment here so we don't change memory offsets with
	 * memory controler defined
	 */
	.align 0x2000

_ic_enable:
	/* Check if IC present and skip enabling otherwise */
	l.mfspr r24,r0,SPR_UPR
	l.andi  r26,r24,SPR_UPR_ICP
	l.sfeq  r26,r0
	l.bf	9f
	l.nop

	/* Disable IC */
	l.mfspr r6,r0,SPR_SR
	l.addi  r5,r0,-1
	l.xori  r5,r5,SPR_SR_ICE
	l.and   r5,r6,r5
	l.mtspr r0,r5,SPR_SR

	/* Establish cache block size
	   If BS=0, 16;
	   If BS=1, 32;
	   r14 contain block size
	*/
	l.mfspr r24,r0,SPR_ICCFGR
	l.andi	r26,r24,SPR_ICCFGR_CBS
	l.srli	r28,r26,7
	l.ori	r30,r0,16
	l.sll	r14,r30,r28

	/* Establish number of cache sets
	   r16 contains number of cache sets
	   r28 contains log(# of cache sets)
	*/
	l.andi  r26,r24,SPR_ICCFGR_NCS
	l.srli 	r28,r26,3
	l.ori   r30,r0,1
	l.sll   r16,r30,r28

	/* Invalidate IC */
	l.addi  r6,r0,0
	l.sll   r5,r14,r28
//        l.mul   r5,r14,r16
//	l.trap  1
//	l.addi  r5,r0,IC_SIZE
1:
	l.mtspr r0,r6,SPR_ICBIR
	l.sfne  r6,r5
	l.bf    1b
	l.add   r6,r6,r14
 //       l.addi   r6,r6,IC_LINE

	/* Enable IC */
	l.mfspr r6,r0,SPR_SR
	l.ori   r6,r6,SPR_SR_ICE
	l.mtspr r0,r6,SPR_SR
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
	l.nop
9:
	l.jr    r9
	l.nop

_dc_enable:
	/* Check if DC present and skip enabling otherwise */
	l.mfspr r24,r0,SPR_UPR
	l.andi  r26,r24,SPR_UPR_DCP
	l.sfeq  r26,r0
	l.bf	9f
	l.nop

	/* Disable DC */
	l.mfspr r6,r0,SPR_SR
	l.addi  r5,r0,-1
	l.xori  r5,r5,SPR_SR_DCE
	l.and   r5,r6,r5
	l.mtspr r0,r5,SPR_SR

	/* Establish cache block size
	   If BS=0, 16;
	   If BS=1, 32;
	   r14 contain block size
	*/
	l.mfspr r24,r0,SPR_DCCFGR
	l.andi	r26,r24,SPR_DCCFGR_CBS
	l.srli	r28,r26,7
	l.ori	r30,r0,16
	l.sll	r14,r30,r28

	/* Establish number of cache sets
	   r16 contains number of cache sets
	   r28 contains log(# of cache sets)
	*/
	l.andi  r26,r24,SPR_DCCFGR_NCS
	l.srli 	r28,r26,3
	l.ori   r30,r0,1
	l.sll   r16,r30,r28

	/* Invalidate DC */
	l.addi  r6,r0,0
	l.sll   r5,r14,r28
1:
	l.mtspr r0,r6,SPR_DCBIR
	l.sfne  r6,r5
	l.bf    1b
	l.add   r6,r6,r14

	/* Enable DC */
	l.mfspr r6,r0,SPR_SR
	l.ori   r6,r6,SPR_SR_DCE
	l.mtspr r0,r6,SPR_SR
9:
	l.jr    r9
	l.nop

/* ===============================================[ page table masks ]=== */

/* bit 4 is used in hardware as write back cache bit. we never use this bit
 * explicitly, so we can reuse it as _PAGE_FILE bit and mask it out when
 * writing into hardware pte's
 */

#define DTLB_UP_CONVERT_MASK  0x3fa
#define ITLB_UP_CONVERT_MASK  0x3a

/* for SMP we'd have (this is a bit subtle, CC must be always set
 * for SMP, but since we have _PAGE_PRESENT bit always defined
 * we can just modify the mask)
 */
#define DTLB_SMP_CONVERT_MASK  0x3fb
#define ITLB_SMP_CONVERT_MASK  0x3b

/* ---[ boot dtlb miss handler ]----------------------------------------- */

boot_dtlb_miss_handler:

/* mask for DTLB_MR register: - (0) sets V (valid) bit,
 *                            - (31-12) sets bits belonging to VPN (31-12)
 */
#define DTLB_MR_MASK 0xfffff001

/* mask for DTLB_TR register: - (2) sets CI (cache inhibit) bit,
 *			      - (4) sets A (access) bit,
 *                            - (5) sets D (dirty) bit,
 *                            - (8) sets SRE (superuser read) bit
 *                            - (9) sets SWE (superuser write) bit
 *                            - (31-12) sets bits belonging to VPN (31-12)
 */
#define DTLB_TR_MASK 0xfffff332

/* These are for masking out the VPN/PPN value from the MR/TR registers...
 * it's not the same as the PFN */
#define VPN_MASK 0xfffff000
#define PPN_MASK 0xfffff000


	EXCEPTION_STORE_GPR6

#if 0
	l.mfspr r6,r0,SPR_ESR_BASE	   //
	l.andi  r6,r6,SPR_SR_SM            // are we in kernel mode ?
	l.sfeqi r6,0                       // r6 == 0x1 --> SM
	l.bf    exit_with_no_dtranslation  //
	l.nop
#endif

	/* this could be optimized by moving storing of
	 * non r6 registers here, and jumping r6 restore
	 * if not in supervisor mode
	 */

	EXCEPTION_STORE_GPR2
	EXCEPTION_STORE_GPR3
	EXCEPTION_STORE_GPR4
	EXCEPTION_STORE_GPR5

	l.mfspr r4,r0,SPR_EEAR_BASE        // get the offending EA

immediate_translation:
	CLEAR_GPR(r6)

	l.srli	r3,r4,0xd                  // r3 <- r4 / 8192 (sets are relative to page size (8Kb) NOT VPN size (4Kb)

	l.mfspr r6, r0, SPR_DMMUCFGR
	l.andi	r6, r6, SPR_DMMUCFGR_NTS
	l.srli	r6, r6, SPR_DMMUCFGR_NTS_OFF
	l.ori	r5, r0, 0x1
	l.sll	r5, r5, r6 	// r5 = number DMMU sets
	l.addi	r6, r5, -1  	// r6 = nsets mask
	l.and	r2, r3, r6	// r2 <- r3 % NSETS_MASK

	l.or    r6,r6,r4                   // r6 <- r4
	l.ori   r6,r6,~(VPN_MASK)          // r6 <- VPN :VPN .xfff - clear up lo(r6) to 0x**** *fff
	l.movhi r5,hi(DTLB_MR_MASK)        // r5 <- ffff:0000.x000
	l.ori   r5,r5,lo(DTLB_MR_MASK)     // r5 <- ffff:1111.x001 - apply DTLB_MR_MASK
	l.and   r5,r5,r6                   // r5 <- VPN :VPN .x001 - we have DTLBMR entry
	l.mtspr r2,r5,SPR_DTLBMR_BASE(0)   // set DTLBMR

	/* set up DTLB with no translation for EA <= 0xbfffffff */
	LOAD_SYMBOL_2_GPR(r6,0xbfffffff)
	l.sfgeu  r6,r4                     // flag if r6 >= r4 (if 0xbfffffff >= EA)
	l.bf     1f                        // goto out
	l.and    r3,r4,r4                  // delay slot :: 24 <- r4 (if flag==1)

	tophys(r3,r4)                      // r3 <- PA
1:
	l.ori   r3,r3,~(PPN_MASK)          // r3 <- PPN :PPN .xfff - clear up lo(r6) to 0x**** *fff
	l.movhi r5,hi(DTLB_TR_MASK)        // r5 <- ffff:0000.x000
	l.ori   r5,r5,lo(DTLB_TR_MASK)     // r5 <- ffff:1111.x330 - apply DTLB_MR_MASK
	l.and   r5,r5,r3                   // r5 <- PPN :PPN .x330 - we have DTLBTR entry
	l.mtspr r2,r5,SPR_DTLBTR_BASE(0)   // set DTLBTR

	EXCEPTION_LOAD_GPR6
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR2

	l.rfe                              // SR <- ESR, PC <- EPC

exit_with_no_dtranslation:
	/* EA out of memory or not in supervisor mode */
	EXCEPTION_LOAD_GPR6
	EXCEPTION_LOAD_GPR4
	l.j	_dispatch_bus_fault

/* ---[ boot itlb miss handler ]----------------------------------------- */

boot_itlb_miss_handler:

/* mask for ITLB_MR register: - sets V (valid) bit,
 *                            - sets bits belonging to VPN (15-12)
 */
#define ITLB_MR_MASK 0xfffff001

/* mask for ITLB_TR register: - sets A (access) bit,
 *                            - sets SXE (superuser execute) bit
 *                            - sets bits belonging to VPN (15-12)
 */
#define ITLB_TR_MASK 0xfffff050

/*
#define VPN_MASK 0xffffe000
#define PPN_MASK 0xffffe000
*/



	EXCEPTION_STORE_GPR2
	EXCEPTION_STORE_GPR3
	EXCEPTION_STORE_GPR4
	EXCEPTION_STORE_GPR5
	EXCEPTION_STORE_GPR6

#if 0
	l.mfspr r6,r0,SPR_ESR_BASE         //
	l.andi  r6,r6,SPR_SR_SM            // are we in kernel mode ?
	l.sfeqi r6,0                       // r6 == 0x1 --> SM
	l.bf    exit_with_no_itranslation
	l.nop
#endif


	l.mfspr r4,r0,SPR_EEAR_BASE        // get the offending EA

earlyearly:
	CLEAR_GPR(r6)

	l.srli  r3,r4,0xd                  // r3 <- r4 / 8192 (sets are relative to page size (8Kb) NOT VPN size (4Kb)

	l.mfspr r6, r0, SPR_IMMUCFGR
	l.andi	r6, r6, SPR_IMMUCFGR_NTS
	l.srli	r6, r6, SPR_IMMUCFGR_NTS_OFF
	l.ori	r5, r0, 0x1
	l.sll	r5, r5, r6 	// r5 = number IMMU sets from IMMUCFGR
	l.addi	r6, r5, -1  	// r6 = nsets mask
	l.and	r2, r3, r6	// r2 <- r3 % NSETS_MASK

	l.or    r6,r6,r4                   // r6 <- r4
	l.ori   r6,r6,~(VPN_MASK)          // r6 <- VPN :VPN .xfff - clear up lo(r6) to 0x**** *fff
	l.movhi r5,hi(ITLB_MR_MASK)        // r5 <- ffff:0000.x000
	l.ori   r5,r5,lo(ITLB_MR_MASK)     // r5 <- ffff:1111.x001 - apply ITLB_MR_MASK
	l.and   r5,r5,r6                   // r5 <- VPN :VPN .x001 - we have ITLBMR entry
	l.mtspr r2,r5,SPR_ITLBMR_BASE(0)   // set ITLBMR

	/*
	 * set up ITLB with no translation for EA <= 0x0fffffff
	 *
	 * we need this for head.S mapping (EA = PA). if we move all functions
	 * which run with mmu enabled into entry.S, we might be able to eliminate this.
	 *
	 */
	LOAD_SYMBOL_2_GPR(r6,0x0fffffff)
	l.sfgeu  r6,r4                     // flag if r6 >= r4 (if 0xb0ffffff >= EA)
	l.bf     1f                        // goto out
	l.and    r3,r4,r4                  // delay slot :: 24 <- r4 (if flag==1)

	tophys(r3,r4)                      // r3 <- PA
1:
	l.ori   r3,r3,~(PPN_MASK)          // r3 <- PPN :PPN .xfff - clear up lo(r6) to 0x**** *fff
	l.movhi r5,hi(ITLB_TR_MASK)        // r5 <- ffff:0000.x000
	l.ori   r5,r5,lo(ITLB_TR_MASK)     // r5 <- ffff:1111.x050 - apply ITLB_MR_MASK
	l.and   r5,r5,r3                   // r5 <- PPN :PPN .x050 - we have ITLBTR entry
	l.mtspr r2,r5,SPR_ITLBTR_BASE(0)   // set ITLBTR

	EXCEPTION_LOAD_GPR6
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR2

	l.rfe                              // SR <- ESR, PC <- EPC

exit_with_no_itranslation:
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR6
	l.j    _dispatch_bus_fault
	l.nop

/* ====================================================================== */
/*
 * Stuff below here shouldn't go into .head section... maybe this stuff
 * can be moved to entry.S ???
 */

/* ==============================================[ DTLB miss handler ]=== */

/*
 * Comments:
 *   Exception handlers are entered with MMU off so the following handler
 *   needs to use physical addressing
 *
 */

	.text
ENTRY(dtlb_miss_handler)
	EXCEPTION_STORE_GPR2
	EXCEPTION_STORE_GPR3
	EXCEPTION_STORE_GPR4
	EXCEPTION_STORE_GPR5
	EXCEPTION_STORE_GPR6
	/*
	 * get EA of the miss
	 */
	l.mfspr	r2,r0,SPR_EEAR_BASE
	/*
	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
	 */
	GET_CURRENT_PGD(r3,r5)		// r3 is current_pgd, r5 is temp
	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
	l.slli	r4,r4,0x2		// to get address << 2
	l.add	r5,r4,r3		// r4 is pgd_index(daddr)
	/*
	 * if (pmd_none(*pmd))
	 *   goto pmd_none:
	 */
	tophys	(r4,r5)
	l.lwz	r3,0x0(r4)		// get *pmd value
	l.sfne	r3,r0
	l.bnf	d_pmd_none
	 l.andi	r3,r3,~PAGE_MASK //0x1fff		// ~PAGE_MASK
	/*
	 * if (pmd_bad(*pmd))
	 *   pmd_clear(pmd)
	 *   goto pmd_bad:
	 */
//	l.sfeq	r3,r0			// check *pmd value
//	l.bf	d_pmd_good
	l.addi	r3,r0,0xffffe000	// PAGE_MASK
//	l.j	d_pmd_bad
//	l.sw	0x0(r4),r0		// clear pmd
d_pmd_good:
	/*
	 * pte = *pte_offset(pmd, daddr);
	 */
	l.lwz	r4,0x0(r4)		// get **pmd value
	l.and	r4,r4,r3		// & PAGE_MASK
	l.srli	r5,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
	l.andi	r3,r5,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
	l.slli	r3,r3,0x2		// to get address << 2
	l.add	r3,r3,r4
	l.lwz	r2,0x0(r3)		// this is pte at last
	/*
	 * if (!pte_present(pte))
	 */
	l.andi	r4,r2,0x1
	l.sfne	r4,r0			// is pte present
	l.bnf	d_pte_not_present
	l.addi	r3,r0,0xffffe3fa	// PAGE_MASK | DTLB_UP_CONVERT_MASK
	/*
	 * fill DTLB TR register
	 */
	l.and	r4,r2,r3		// apply the mask
	// Determine number of DMMU sets
	l.mfspr r6, r0, SPR_DMMUCFGR
	l.andi	r6, r6, SPR_DMMUCFGR_NTS
	l.srli	r6, r6, SPR_DMMUCFGR_NTS_OFF
	l.ori	r3, r0, 0x1
	l.sll	r3, r3, r6 	// r3 = number DMMU sets DMMUCFGR
	l.addi	r6, r3, -1  	// r6 = nsets mask
	l.and	r5, r5, r6	// calc offset:	 & (NUM_TLB_ENTRIES-1)
	                                                   //NUM_TLB_ENTRIES
	l.mtspr	r5,r4,SPR_DTLBTR_BASE(0)
	/*
	 * fill DTLB MR register
	 */
	l.mfspr	r2,r0,SPR_EEAR_BASE
	l.addi	r3,r0,0xffffe000	// PAGE_MASK
	l.and	r4,r2,r3		// apply PAGE_MASK to EA (__PHX__ do we really need this?)
	l.ori	r4,r4,0x1		// set hardware valid bit: DTBL_MR entry
	l.mtspr	r5,r4,SPR_DTLBMR_BASE(0)

	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	l.rfe
d_pmd_bad:
	l.nop	1
	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	l.rfe
d_pmd_none:
d_pte_not_present:
	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler)

/* ==============================================[ ITLB miss handler ]=== */
ENTRY(itlb_miss_handler)
	EXCEPTION_STORE_GPR2
	EXCEPTION_STORE_GPR3
	EXCEPTION_STORE_GPR4
	EXCEPTION_STORE_GPR5
	EXCEPTION_STORE_GPR6
	/*
	 * get EA of the miss
	 */
	l.mfspr	r2,r0,SPR_EEAR_BASE

	/*
	 * pmd = (pmd_t *)(current_pgd + pgd_index(daddr));
	 *
	 */
	GET_CURRENT_PGD(r3,r5)		// r3 is current_pgd, r5 is temp
	l.srli	r4,r2,0x18		// >> PAGE_SHIFT + (PAGE_SHIFT - 2)
	l.slli	r4,r4,0x2		// to get address << 2
	l.add	r5,r4,r3		// r4 is pgd_index(daddr)
	/*
	 * if (pmd_none(*pmd))
	 *   goto pmd_none:
	 */
	tophys	(r4,r5)
	l.lwz	r3,0x0(r4)		// get *pmd value
	l.sfne	r3,r0
	l.bnf	i_pmd_none
	l.andi	r3,r3,0x1fff		// ~PAGE_MASK
	/*
	 * if (pmd_bad(*pmd))
	 *   pmd_clear(pmd)
	 *   goto pmd_bad:
	 */

//	l.sfeq	r3,r0			// check *pmd value
//	l.bf	i_pmd_good
	l.addi	r3,r0,0xffffe000	// PAGE_MASK
//	l.j	i_pmd_bad
//	l.sw	0x0(r4),r0		// clear pmd

i_pmd_good:
	/*
	 * pte = *pte_offset(pmd, iaddr);
	 *
	 */
	l.lwz	r4,0x0(r4)		// get **pmd value
	l.and	r4,r4,r3		// & PAGE_MASK
	l.srli	r5,r2,0xd		// >> PAGE_SHIFT, r2 == EEAR
	l.andi	r3,r5,0x7ff		// (1UL << PAGE_SHIFT - 2) - 1
	l.slli	r3,r3,0x2		// to get address << 2
	l.add	r3,r3,r4
	l.lwz	r2,0x0(r3)		// this is pte at last
	/*
	 * if (!pte_present(pte))
	 *
	 */
	l.andi	r4,r2,0x1
	l.sfne	r4,r0			// is pte present
	l.bnf	i_pte_not_present
	l.addi	r3,r0,0xffffe03a	// PAGE_MASK | ITLB_UP_CONVERT_MASK
	/*
	 * fill ITLB TR register
	 */
	l.and	r4,r2,r3		// apply the mask
	l.andi	r3,r2,0x7c0		// _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE |  _PAGE_URE | _PAGE_UWE
//	l.andi	r3,r2,0x400		// _PAGE_EXEC
	l.sfeq	r3,r0
	l.bf	itlb_tr_fill //_workaround
	// Determine number of IMMU sets
	l.mfspr r6, r0, SPR_IMMUCFGR
	l.andi	r6, r6, SPR_IMMUCFGR_NTS
	l.srli	r6, r6, SPR_IMMUCFGR_NTS_OFF
	l.ori	r3, r0, 0x1
	l.sll	r3, r3, r6 	// r3 = number IMMU sets IMMUCFGR
	l.addi	r6, r3, -1  	// r6 = nsets mask
	l.and	r5, r5, r6	// calc offset:	 & (NUM_TLB_ENTRIES-1)

/*
 * __PHX__ :: fixme
 * we should not just blindly set executable flags,
 * but it does help with ping. the clean way would be to find out
 * (and fix it) why stack doesn't have execution permissions
 */

itlb_tr_fill_workaround:
	l.ori	r4,r4,0xc0		// | (SPR_ITLBTR_UXE | ITLBTR_SXE)
itlb_tr_fill:
	l.mtspr	r5,r4,SPR_ITLBTR_BASE(0)
	/*
	 * fill DTLB MR register
	 */
	l.mfspr	r2,r0,SPR_EEAR_BASE
	l.addi	r3,r0,0xffffe000	// PAGE_MASK
	l.and	r4,r2,r3		// apply PAGE_MASK to EA (__PHX__ do we really need this?)
	l.ori	r4,r4,0x1		// set hardware valid bit: DTBL_MR entry
	l.mtspr	r5,r4,SPR_ITLBMR_BASE(0)

	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	l.rfe

i_pmd_bad:
	l.nop	1
	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	l.rfe
i_pmd_none:
i_pte_not_present:
	EXCEPTION_LOAD_GPR2
	EXCEPTION_LOAD_GPR3
	EXCEPTION_LOAD_GPR4
	EXCEPTION_LOAD_GPR5
	EXCEPTION_LOAD_GPR6
	EXCEPTION_HANDLE(_itlb_miss_page_fault_handler)

/* ==============================================[ boot tlb handlers ]=== */


/* =================================================[ debugging aids ]=== */

	.align 64
_immu_trampoline:
	.space 64
_immu_trampoline_top:

#define TRAMP_SLOT_0		(0x0)
#define TRAMP_SLOT_1		(0x4)
#define TRAMP_SLOT_2		(0x8)
#define TRAMP_SLOT_3		(0xc)
#define TRAMP_SLOT_4		(0x10)
#define TRAMP_SLOT_5		(0x14)
#define TRAMP_FRAME_SIZE	(0x18)

ENTRY(_immu_trampoline_workaround)
	// r2 EEA
	// r6 is physical EEA
	tophys(r6,r2)

	LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
	tophys	(r3,r5)			// r3 is trampoline (physical)

	LOAD_SYMBOL_2_GPR(r4,0x15000000)
	l.sw	TRAMP_SLOT_0(r3),r4
	l.sw	TRAMP_SLOT_1(r3),r4
	l.sw	TRAMP_SLOT_4(r3),r4
	l.sw	TRAMP_SLOT_5(r3),r4

					// EPC = EEA - 0x4
	l.lwz	r4,0x0(r6)		// load op @ EEA + 0x0 (fc address)
	l.sw	TRAMP_SLOT_3(r3),r4	// store it to _immu_trampoline_data
	l.lwz	r4,-0x4(r6)		// load op @ EEA - 0x4 (f8 address)
	l.sw	TRAMP_SLOT_2(r3),r4	// store it to _immu_trampoline_data

	l.srli  r5,r4,26                // check opcode for write access
	l.sfeqi r5,0                    // l.j
	l.bf    0f
	l.sfeqi r5,0x11                 // l.jr
	l.bf    1f
	l.sfeqi r5,1                    // l.jal
	l.bf    2f
	l.sfeqi r5,0x12                 // l.jalr
	l.bf    3f
	l.sfeqi r5,3                    // l.bnf
	l.bf    4f
	l.sfeqi r5,4                    // l.bf
	l.bf    5f
99:
	l.nop
	l.j	99b			// should never happen
	l.nop	1

	// r2 is EEA
	// r3 is trampoline address (physical)
	// r4 is instruction
	// r6 is physical(EEA)
	//
	// r5

2:	// l.jal

	/* 19 20 aa aa	l.movhi r9,0xaaaa
	 * a9 29 bb bb  l.ori	r9,0xbbbb
	 *
	 * where 0xaaaabbbb is EEA + 0x4 shifted right 2
	 */

	l.addi	r6,r2,0x4		// this is 0xaaaabbbb

					// l.movhi r9,0xaaaa
	l.ori	r5,r0,0x1920		// 0x1920 == l.movhi r9
	l.sh	(TRAMP_SLOT_0+0x0)(r3),r5
	l.srli	r5,r6,16
	l.sh	(TRAMP_SLOT_0+0x2)(r3),r5

					// l.ori   r9,0xbbbb
	l.ori	r5,r0,0xa929		// 0xa929 == l.ori r9
	l.sh	(TRAMP_SLOT_1+0x0)(r3),r5
	l.andi	r5,r6,0xffff
	l.sh	(TRAMP_SLOT_1+0x2)(r3),r5

	/* falthrough, need to set up new jump offset */


0:	// l.j
	l.slli	r6,r4,6			// original offset shifted left 6 - 2
//	l.srli	r6,r6,6			// original offset shifted right 2

	l.slli	r4,r2,4			// old jump position: EEA shifted left 4
//	l.srli	r4,r4,6			// old jump position: shifted right 2

	l.addi	r5,r3,0xc		// new jump position (physical)
	l.slli	r5,r5,4			// new jump position: shifted left 4

	// calculate new jump offset
	// new_off = old_off + (old_jump - new_jump)

	l.sub	r5,r4,r5		// old_jump - new_jump
	l.add	r5,r6,r5		// orig_off + (old_jump - new_jump)
	l.srli	r5,r5,6			// new offset shifted right 2

	// r5 is new jump offset
					// l.j has opcode 0x0...
	l.sw	TRAMP_SLOT_2(r3),r5	// write it back

	l.j	trampoline_out
	l.nop

/* ----------------------------- */

3:	// l.jalr

	/* 19 20 aa aa	l.movhi r9,0xaaaa
	 * a9 29 bb bb  l.ori	r9,0xbbbb
	 *
	 * where 0xaaaabbbb is EEA + 0x4 shifted right 2
	 */

	l.addi	r6,r2,0x4		// this is 0xaaaabbbb

					// l.movhi r9,0xaaaa
	l.ori	r5,r0,0x1920		// 0x1920 == l.movhi r9
	l.sh	(TRAMP_SLOT_0+0x0)(r3),r5
	l.srli	r5,r6,16
	l.sh	(TRAMP_SLOT_0+0x2)(r3),r5

					// l.ori   r9,0xbbbb
	l.ori	r5,r0,0xa929		// 0xa929 == l.ori r9
	l.sh	(TRAMP_SLOT_1+0x0)(r3),r5
	l.andi	r5,r6,0xffff
	l.sh	(TRAMP_SLOT_1+0x2)(r3),r5

	l.lhz	r5,(TRAMP_SLOT_2+0x0)(r3)	// load hi part of jump instruction
	l.andi	r5,r5,0x3ff		// clear out opcode part
	l.ori	r5,r5,0x4400		// opcode changed from l.jalr -> l.jr
	l.sh	(TRAMP_SLOT_2+0x0)(r3),r5 // write it back

	/* falthrough */

1:	// l.jr
	l.j	trampoline_out
	l.nop

/* ----------------------------- */

4:	// l.bnf
5:	// l.bf
	l.slli	r6,r4,6			// original offset shifted left 6 - 2
//	l.srli	r6,r6,6			// original offset shifted right 2

	l.slli	r4,r2,4			// old jump position: EEA shifted left 4
//	l.srli	r4,r4,6			// old jump position: shifted right 2

	l.addi	r5,r3,0xc		// new jump position (physical)
	l.slli	r5,r5,4			// new jump position: shifted left 4

	// calculate new jump offset
	// new_off = old_off + (old_jump - new_jump)

	l.add	r6,r6,r4		// (orig_off + old_jump)
	l.sub	r6,r6,r5		// (orig_off + old_jump) - new_jump
	l.srli	r6,r6,6			// new offset shifted right 2

	// r6 is new jump offset
	l.lwz	r4,(TRAMP_SLOT_2+0x0)(r3)	// load jump instruction
	l.srli	r4,r4,16
	l.andi	r4,r4,0xfc00		// get opcode part
	l.slli	r4,r4,16
	l.or	r6,r4,r6		// l.b(n)f new offset
	l.sw	TRAMP_SLOT_2(r3),r6	// write it back

	/* we need to add l.j to EEA + 0x8 */
	tophys	(r4,r2)			// may not be needed (due to shifts down_
	l.addi	r4,r4,(0x8 - 0x8)	// jump target = r2 + 0x8 (compensate for 0x8)
					// jump position = r5 + 0x8 (0x8 compensated)
	l.sub	r4,r4,r5		// jump offset = target - new_position + 0x8

	l.slli	r4,r4,4			// the amount of info in imediate of jump
	l.srli	r4,r4,6			// jump instruction with offset
	l.sw	TRAMP_SLOT_4(r3),r4	// write it to 4th slot

	/* fallthrough */

trampoline_out:
	// set up new EPC to point to our trampoline code
	LOAD_SYMBOL_2_GPR(r5,_immu_trampoline)
	l.mtspr	r0,r5,SPR_EPCR_BASE

	// immu_trampoline is (4x) CACHE_LINE aligned
	// and only 6 instructions long,
	// so we need to invalidate only 2 lines

	/* Establish cache block size
	   If BS=0, 16;
	   If BS=1, 32;
	   r14 contain block size
	*/
	l.mfspr r21,r0,SPR_ICCFGR
	l.andi	r21,r21,SPR_ICCFGR_CBS
	l.srli	r21,r21,7
	l.ori	r23,r0,16
	l.sll	r14,r23,r21

	l.mtspr	r0,r5,SPR_ICBIR
	l.add	r5,r5,r14
	l.mtspr	r0,r5,SPR_ICBIR

	l.jr	r9
	l.nop


/*
 * DSCR: prints a string referenced by r3.
 *
 * PRMS: r3     	- address of the first character of null
 *			terminated string to be printed
 *
 * PREQ: UART at UART_BASE_ADD has to be initialized
 *
 * POST: caller should be aware that r3, r9 are changed
 */
ENTRY(_emergency_print)
	EMERGENCY_PRINT_STORE_GPR4
	EMERGENCY_PRINT_STORE_GPR5
	EMERGENCY_PRINT_STORE_GPR6
	EMERGENCY_PRINT_STORE_GPR7
2:
	l.lbz	r7,0(r3)
	l.sfeq	r7,r0
	l.bf	9f
	l.nop

// putc:
	l.movhi r4,hi(UART_BASE_ADD)

	l.addi  r6,r0,0x20
1:      l.lbz   r5,5(r4)
	l.andi  r5,r5,0x20
	l.sfeq  r5,r6
	l.bnf   1b
	l.nop

	l.sb    0(r4),r7

	l.addi  r6,r0,0x60
1:      l.lbz   r5,5(r4)
	l.andi  r5,r5,0x60
	l.sfeq  r5,r6
	l.bnf   1b
	l.nop

	/* next character */
	l.j	2b
	l.addi	r3,r3,0x1

9:
	EMERGENCY_PRINT_LOAD_GPR7
	EMERGENCY_PRINT_LOAD_GPR6
	EMERGENCY_PRINT_LOAD_GPR5
	EMERGENCY_PRINT_LOAD_GPR4
	l.jr	r9
	l.nop

ENTRY(_emergency_print_nr)
	EMERGENCY_PRINT_STORE_GPR4
	EMERGENCY_PRINT_STORE_GPR5
	EMERGENCY_PRINT_STORE_GPR6
	EMERGENCY_PRINT_STORE_GPR7
	EMERGENCY_PRINT_STORE_GPR8

	l.addi	r8,r0,32		// shift register

1:	/* remove leading zeros */
	l.addi	r8,r8,-0x4
	l.srl	r7,r3,r8
	l.andi	r7,r7,0xf

	/* don't skip the last zero if number == 0x0 */
	l.sfeqi	r8,0x4
	l.bf	2f
	l.nop

	l.sfeq	r7,r0
	l.bf	1b
	l.nop

2:
	l.srl	r7,r3,r8

	l.andi	r7,r7,0xf
	l.sflts	r8,r0
	l.bf	9f

	l.sfgtui r7,0x9
	l.bnf	8f
	l.nop
	l.addi	r7,r7,0x27

8:
	l.addi	r7,r7,0x30
// putc:
	l.movhi r4,hi(UART_BASE_ADD)

	l.addi  r6,r0,0x20
1:      l.lbz   r5,5(r4)
	l.andi  r5,r5,0x20
	l.sfeq  r5,r6
	l.bnf   1b
	l.nop

	l.sb    0(r4),r7

	l.addi  r6,r0,0x60
1:      l.lbz   r5,5(r4)
	l.andi  r5,r5,0x60
	l.sfeq  r5,r6
	l.bnf   1b
	l.nop

	/* next character */
	l.j	2b
	l.addi	r8,r8,-0x4

9:
	EMERGENCY_PRINT_LOAD_GPR8
	EMERGENCY_PRINT_LOAD_GPR7
	EMERGENCY_PRINT_LOAD_GPR6
	EMERGENCY_PRINT_LOAD_GPR5
	EMERGENCY_PRINT_LOAD_GPR4
	l.jr	r9
	l.nop


/*
 * This should be used for debugging only.
 * It messes up the Linux early serial output
 * somehow, so use it sparingly and essentially
 * only if you need to debug something that goes wrong
 * before Linux gets the early serial going.
 *
 * Furthermore, you'll have to make sure you set the
 * UART_DEVISOR correctly according to the system
 * clock rate.
 *
 *
 */



#define SYS_CLK            20000000
//#define SYS_CLK            1843200
#define OR32_CONSOLE_BAUD  115200
#define UART_DIVISOR       SYS_CLK/(16*OR32_CONSOLE_BAUD)

ENTRY(_early_uart_init)
	l.movhi	r3,hi(UART_BASE_ADD)

	l.addi	r4,r0,0x7
	l.sb	0x2(r3),r4

	l.addi	r4,r0,0x0
	l.sb	0x1(r3),r4

	l.addi	r4,r0,0x3
	l.sb	0x3(r3),r4

	l.lbz	r5,3(r3)
	l.ori	r4,r5,0x80
	l.sb	0x3(r3),r4
	l.addi	r4,r0,((UART_DIVISOR>>8) & 0x000000ff)
	l.sb	UART_DLM(r3),r4
	l.addi  r4,r0,((UART_DIVISOR) & 0x000000ff)
	l.sb	UART_DLL(r3),r4
	l.sb	0x3(r3),r5

	l.jr	r9
	l.nop

_string_copying_linux:
	.string "\n\n\n\n\n\rCopying Linux... \0"

_string_ok_booting:
	.string "Ok, booting the kernel.\n\r\0"

_string_unhandled_exception:
	.string "\n\rRunarunaround: Unhandled exception 0x\0"

_string_epc_prefix:
	.string ": EPC=0x\0"

_string_nl:
	.string "\n\r\0"

	.global	_string_esr_irq_bug
_string_esr_irq_bug:
	.string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0"



/* ========================================[ page aligned structures ]=== */

/*
 * .data section should be page aligned
 *	(look into arch/or32/kernel/vmlinux.lds)
 */
	.section .data,"aw"
	.align	8192
	.global  empty_zero_page
empty_zero_page:
	.space  8192

	.global  swapper_pg_dir
swapper_pg_dir:
	.space  8192

	.global	_unhandled_stack
_unhandled_stack:
	.space	8192
_unhandled_stack_top:

/* ============================================================[ EOF ]=== */