cpwcr("cpcr8", 0x02000000);
 }
 
-static inline void setup_pgd(pgd_t *pgd)
+static inline void setup_pgd(pgd_t *pgd, int asid)
 {
        cpwcr("cpcr29", __pa(pgd) | BIT(0));
+       write_mmu_entryhi(asid);
 }
 
 static inline pgd_t *get_pgd(void)
 
 static inline void tlb_invalid_all(void)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.alls\n":::"memory");
        sync_is();
+       asm volatile(
+               "tlbi.alls      \n"
+               "sync.i         \n"
+               :
+               :
+               : "memory");
 #else
        mtcr("cr<8, 15>", 0x04000000);
 #endif
 static inline void local_tlb_invalid_all(void)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.all\n":::"memory");
        sync_is();
+       asm volatile(
+               "tlbi.all       \n"
+               "sync.i         \n"
+               :
+               :
+               : "memory");
 #else
        tlb_invalid_all();
 #endif
        mtcr("cr<8, 15>", 0x02000000);
 }
 
-static inline void setup_pgd(pgd_t *pgd)
+#define NOP32 ".long 0x4820c400\n"
+
+static inline void setup_pgd(pgd_t *pgd, int asid)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       mtcr("cr<28, 15>", __pa(pgd) | BIT(0));
+       sync_is();
+#else
+       mb();
+#endif
+       asm volatile(
+#ifdef CONFIG_CPU_HAS_TLBI
+               "mtcr %1, cr<28, 15>    \n"
 #endif
-       mtcr("cr<29, 15>", __pa(pgd) | BIT(0));
+               "mtcr %1, cr<29, 15>    \n"
+               "mtcr %0, cr< 4, 15>    \n"
+               ".rept 64               \n"
+               NOP32
+               ".endr                  \n"
+               :
+               :"r"(asid), "r"(__pa(pgd) | BIT(0))
+               :"memory");
 }
 
 static inline pgd_t *get_pgd(void)
 
        if (prev != next)
                check_and_switch_context(next, cpu);
 
-       setup_pgd(next->pgd);
-       write_mmu_entryhi(next->context.asid.counter);
+       setup_pgd(next->pgd, next->context.asid.counter);
 
        flush_icache_deferred(next);
 }
 
        /* Setup page mask to 4k */
        write_mmu_pagemask(0);
 
-       setup_pgd(swapper_pg_dir);
+       setup_pgd(swapper_pg_dir, 0);
 }
 
 void __init fixrange_init(unsigned long start, unsigned long end,
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.asids %0"::"r"(cpu_asid(mm)));
+       sync_is();
+       asm volatile(
+               "tlbi.asids %0  \n"
+               "sync.i         \n"
+               :
+               : "r" (cpu_asid(mm))
+               : "memory");
 #else
        tlb_invalid_all();
 #endif
        end   &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
+       sync_is();
        while (start < end) {
-               asm volatile("tlbi.vas %0"::"r"(start | newpid));
+               asm volatile(
+                       "tlbi.vas %0    \n"
+                       :
+                       : "r" (start | newpid)
+                       : "memory");
+
                start += 2*PAGE_SIZE;
        }
-       sync_is();
+       asm volatile("sync.i\n");
 #else
        {
        unsigned long flags, oldpid;
        end   &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
+       sync_is();
        while (start < end) {
-               asm volatile("tlbi.vaas %0"::"r"(start));
+               asm volatile(
+                       "tlbi.vaas %0   \n"
+                       :
+                       : "r" (start)
+                       : "memory");
+
                start += 2*PAGE_SIZE;
        }
-       sync_is();
+       asm volatile("sync.i\n");
 #else
        {
        unsigned long flags, oldpid;
        addr &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.vas %0"::"r"(addr | newpid));
        sync_is();
+       asm volatile(
+               "tlbi.vas %0    \n"
+               "sync.i         \n"
+               :
+               : "r" (addr | newpid)
+               : "memory");
 #else
        {
        int oldpid, idx;
        addr &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.vaas %0"::"r"(addr));
        sync_is();
+       asm volatile(
+               "tlbi.vaas %0   \n"
+               "sync.i         \n"
+               :
+               : "r" (addr)
+               : "memory");
 #else
        {
        int oldpid, idx;