以SIGSEGV为例详解信号处理(与栈回溯) 信号是内核提供的向用户态进程发送信息的机制, 常见的有使用SIGUSR1唤醒用户进程执行子程序或发生段错误时使用SIGSEGV
保存用户错误现场. 本文以SIGSEGV为例, 详细分析信号使用方法, 内核信号的发送与接收机制.
信号处理例程 以下是一个SiGEGV
处理例程, 主程序注册一个信号量并创建一个线程, 线程中故意访问空指针, 引发段错误. 在信号回调中会回溯堆栈, 保存出错的地址. 回溯堆栈的原理在分析完整个信号处理流程后再分析, 首先我们先来分析如何使用信号.
sigaction
()用于向内核注册一个信号(参数1), 使用参数2(如果非空)作为注册信号的回调, 内核会将之前的信号回调返回在参数3中(如果非空). 如果父进程或程序之前阻塞了该信号则需先调用sigprocmask
()取消阻塞. 在回调处理结束时需手动退出进程(exit
()), 否则内核会不断触发该信号(重新执行异常指令再次引起崩溃) , glibc对SIGSEGV有默认的回调, 所以默认情况下也会正常退出.
include <string.h> #include <signal.h> #include <stdio.h> #include <unistd.h> #include <pthread.h> #define POPCNT(data) do { \ data = (data & 0x55555555) + ((data >> 1) & 0x55555555); \ data = (data & 0x33333333) + ((data >> 2) & 0x33333333); \ data = (data & 0x0F0F0F0F) + ((data >> 4) & 0x0F0F0F0F); \ data = (data & 0x00FF00FF) + ((data >> 8) & 0x00FF00FF); \ data = (data & 0x0000FFFF) + ((data >> 16) & 0x0000FFFF); \ } while (0); void backtrace_stack (unsigned int **pppc, unsigned int **ppsp) { unsigned int *ppc_last = *pppc; unsigned int *psp = *ppsp; unsigned int decrease = 0 ; int i; enum { INS_SUB_IMM = 0 , INS_STM1, INS_STR_LR, INS_STR_FP, INS_BUTT }; struct ins_map { unsigned int mask; unsigned int ins; }; struct ins_map map[INS_BUTT] = { {0xFFEFF000 , 0xE24DD000 }, {0xFFFF4000 , 0xE92D4000 }, {0xFFFFFFFF , 0xE52DE004 }, {0xFFFFFFFF , 0xE52DB004 }, }; again: ppc_last--; for (i = 0 ; i < INS_BUTT; i++) { if (map[i].ins == (*ppc_last &map[i].mask)) { break ; } } switch (i) { case INS_SUB_IMM: decrease = (*ppc_last & 0xFF ) << ((32 - 2 * (*ppc_last & 0xF00 )) % 32 ); psp += decrease / sizeof (unsigned int ); break ; case INS_STM1: decrease = *ppc_last & 0xFFFF ; POPCNT (decrease); psp += decrease; *pppc = *(psp - 1 ); *ppsp = psp; return ; case INS_STR_LR: psp += 1 ; *pppc = *(psp - 1 ); *ppsp = psp; return ; case INS_STR_FP: psp += 1 ; *ppsp = psp; return ; default : break ; } goto again; } void sighandle (int sig, siginfo_t *siginfo, void *data) { unsigned int *psp = ((unsigned int *)data) + 21 ; unsigned int *plr = ((unsigned int *)data) + 22 ; unsigned int *ppc = ((unsigned int *)data) + 23 ; unsigned int pc_val[5 ] = {0 }; unsigned int sp_val[5 ] = {0 }; char **ppstr; int i; printf ("get signal %u addr %x\n" , siginfo->si_signo, siginfo->si_addr); pc_val[0 ] = *ppc; sp_val[0 ] = *psp; for (i = 1 ; i < 4 ; i++) { pc_val[i] = pc_val[i - 1 ]; sp_val[i] = sp_val[i - 1 ]; backtrace_stack ((unsigned int **)(&pc_val[i]), (unsigned int **)(&sp_val[i])); if (pc_val[i] == pc_val[i - 1 ]) { pc_val[i] = *plr; } pc_val[i] -= 4 ; } ppstr = backtrace_symbols ((void **)pc_val, 5 ); for (i = 0 ; i < 5 ; i++) { printf ("%u: pc[0x%08x] sp[0x%08x] %s\n" , i, pc_val[i], sp_val[i], ppstr[i]); } exit (1 ); } void fault_func3 () { int *p = NULL ; *p = 1 ; } void fault_func2 () { int a = 0x5678 ; fault_func3 (); return ; } void fault_func1 (void *pvoid) { int a = 0x1234 ; fault_func2 (); return ; } int main (int argc, char *argv[]) { struct sigaction sigact; int *p = NULL ; memset (&sigact, 0 , sizeof (struct sigaction)); sigact.sa_sigaction = sighandle; sigact.sa_flags = SA_SIGINFO | SA_RESTART; sigaction (SIGSEGV, &sigact, NULL ); getc (stdin); pthread_t thread; pthread_create (&thread, NULL , fault_func1, NULL ); while (1 ) { ; } return 0 ; }
内核信号量数据结构与系统调用 虽然用户调用的sig*接口都是glibc的接口, 但实际上glibc还是通过系统调用实现的. 与信号量相关的数据结构有:task_struct
(负责保存信号处理句柄, 阻塞与挂起的信号队列)sighand_struct
(每个信号处理 handler句柄, 保护信号的自旋锁)signal_struct
(信号量结构, 大部分参数都在该结构中)sigpending
(挂起队列, 用于索引挂起的信号) 作为一种信息传递机制, 信号量代码本身并不复杂, 即使是信号发送接口__send_signal()(分析见下).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 struct task_struct { ...... struct signal_struct *signal ; struct sighand_struct *sighand ; sigset_t blocked, real_blocked; sigset_t saved_sigmask; struct sigpending pending ; ...... }; struct sighand_struct { atomic_t count; struct k_sigaction action [_NSIG ]; spinlock_t siglock; wait_queue_head_t signalfd_wqh; }; struct signal_struct { ...... struct sigpending shared_pending ; ...... }; struct sigpending { struct list_head list ; sigset_t signal; }; struct sigqueue { struct list_head list ; int flags; siginfo_t info; struct user_struct *user ; }; typedef struct siginfo { int si_signo; int si_errno; int si_code; ...... } __ARCH_SI_ATTRIBUTES siginfo_t ;
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 SYSCALL_DEFINE3(sigprocmask, int , how, \ old_sigset_t __user *, nset, \ old_sigset_t __user *, oset); SYSCALL_DEFINE3(sigaction, int , sig, \ const struct old_sigaction __user *, act, \ struct old_sigaction __user *, oact); int do_send_sig_info (int sig, struct siginfo *info, \ struct task_struct *p, bool group) ;int __group_send_sig_info(int sig, \ struct siginfo *info, struct task_struct *p);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 static int __send_signal(int sig, struct siginfo *info, \ struct task_struct *t, int group, int from_ancestor_ns) { assert_spin_locked(&t->sighand->siglock); if (!prepare_signal(sig, t, from_ancestor_ns || (info == SEND_SIG_FORCED))) goto ret; pending = group &t->signal->shared_pending : &t->pending; if (legacy_queue(pending, sig)) goto ret; if (info == SEND_SIG_FORCED) goto out_set; if (sig < SIGRTMIN) override_rlimit = (is_si_special(info) || info->si_code >= 0 ); else override_rlimit = 0 ; q = __sigqueue_alloc(sig, t, \ GFP_ATOMIC | __GFP_NOTRACK_FALSE_POSITIVE, override_rlimit); if (q) { list_add_tail(&q->list , &pending->list ); switch ((unsigned long ) info) { case (unsigned long ) SEND_SIG_NOINFO: q->info.si_signo = sig; q->info.si_errno = 0 ; q->info.si_code = SI_USER; q->info.si_pid = task_tgid_nr_ns(current, task_active_pid_ns(t)); q->info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); break ; case (unsigned long ) SEND_SIG_PRIV: q->info.si_signo = sig; q->info.si_errno = 0 ; q->info.si_code = SI_KERNEL; q->info.si_pid = 0 ; q->info.si_uid = 0 ; break ; default : copy_siginfo(&q->info, info); if (from_ancestor_ns) q->info.si_pid = 0 ; break ; } userns_fixup_signal_uid(&q->info, t); } else if (!is_si_special(info)) { if (sig >= SIGRTMIN && info->si_code != SI_USER) { result = TRACE_SIGNAL_OVERFLOW_FAIL; ret = -EAGAIN; goto ret; } else { result = TRACE_SIGNAL_LOSE_INFO; } } out_set: signalfd_notify(t, sig); sigaddset(&pending->signal, sig); complete_signal(sig, t, group); ret: trace_signal_generate(sig, info, t, group, result); return ret; } static void complete_signal (int sig, struct task_struct *p, int group) { if (wants_signal(sig, p)) t = p; else if (!group || thread_group_empty(p)) return ; else { t = signal->curr_target; while (!wants_signal(sig, t)) { t = next_thread(t); if (t == signal->curr_target) return ; } signal->curr_target = t; } if (sig_fatal(p, sig) && !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) && !sigismember(&t->real_blocked, sig) && (sig == SIGKILL || !t->ptrace)) { if (!sig_kernel_coredump(sig)) { signal->flags = SIGNAL_GROUP_EXIT; signal->group_exit_code = sig; signal->group_stop_count = 0 ; t = p; do { task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1 ); } while_each_thread(p, t); return ; } } signal_wake_up(t, sig == SIGKILL); }
信号处理流程 信号处理涉及内核最底层代码, 需了解芯片架构在内各类知识, 相对晦涩难懂. 一般对现代芯片而言当进程访问一个非法地址后MMU
会修改寄存器引起内核进入异常, 在异常处理时内核会分辨非法地址产生的原因(是真的非法地址还是没有映射页表)并作出不同处理. 对于处理失败的情况内核在异常处理结束时会向引起异常的task发送SIGSEGV
, 在异常结束后执行调度时会首先判断该task是否有挂起信号, 如果存在则执行信号处理. 信号处理的复杂之处主要在于内核需要调用用户态程序并在程序结束后恢复内核现场. 接下来我们以Hi3536(ARMv7)平台具体分析信号处理流程(使用3.10内核).
arm一共有7种异常处理模式, reset, und, swi, pabt, dabt, irq, fiq(reference manual A2-13). 其中与内存访问相关的有两种prefetch abort与data abort, 前者为取指令异常, 后者为数据异常
. 异常向量表定义在arch/arm/kernel/entry-armv.S, __stubs_start到__stubs_end即整个异常向量表. 在内核初始化时调用early_trap_init拷贝向量表(低地址空间是用户态, 所以需搬移到0xFFFF0000). 向量表中每类异常的起始地址都是vector_stub宏, 后面跟着不同异常向量处理函数. 以dabt为例, 先看下该宏:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 .macro vector_stub, name, mode, correction=0 .align 5 vector_\name: .if \correction sub lr, lr, #\correction .endif @ @ Save r0, lr_<exception> (parent PC) and spsr_<exception> @ (parent CPSR) @ stmia sp, {r0, lr} @ save r0, lr mrs lr, spsr str lr, [sp, #8] @ save spsr @ @ Prepare for SVC32 mode. IRQs remain disabled. @ mrs r0, cpsr eor r0, r0, #(\mode ^ SVC_MODE | PSR_ISETSTATE) msr spsr_cxsf, r0 @ @ the branch table must immediately follow this code @ and lr, lr, #0x0f THUMB(adr r0, 1f) THUMB(ldr lr, [r0, lr, lsl #2]) mov r0, sp ARM( ldr lr, [pc, lr, lsl #2]) movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name)
进入异常后第一件事是保存异常模式下寄存器(如果发生嵌套异常又不保存寄存器则无法恢复异常环境). 即保存 lr_exception
与 spsr_exception
, 由于使用 r0传递 sp 还需保存 r0, 将 cpsr 设置为 svc 模式. 保存现场后第二件事是跳转到对应的异常处理函数, 由于未定义THUMB2_KERNEL, 内核全部使用ARM指令. 通过读cpsr寄存器低4位得知(通过mrs读取到lr中再位与0xF)进入异常前的运行模式. 异常向量表是连续的4字节数组, 紧跟在该代码后, 通过pc + mode * 4得到异常向量地址. 仍以dabt为例, 用户访问空指针引起abort异常, 用户模式mode bits为0, 此时即ldr lr, [pc]. 由于arm架构三级流水线, pc领先实际执行两个指令, 即lr为__dabt_usr, 最后跳转到__dabt_usr执行. 如果内核访问空指针引起abort异常, 内核模式mode bits为3, 即跳转到__dabt_svc
1 2 3 4 5 vector_stub dabt, ABT_MODE, 8 .long __dabt_usr @ 0 (USR_26 / USR_32) .long __dabt_invalid @ 1 (FIQ_26 / FIQ_32) .long __dabt_invalid @ 2 (IRQ_26 / IRQ_32) .long __dabt_svc @ 3 (SVC_26 / SVC_32)
接下来进入具体异常处理函数, 我们以__dabt_usr为例具体分析.
1 2 3 4 5 6 7 8 __dabt_usr: usr_entry kuser_cmpxchg_check mov r2, sp dabt_helper b ret_from_exception UNWIND(.fnend) ENDPROC(__dabt_usr)
进入异常处理函数后第一件事是保存现场, 之前已保存了部分寄存器, usr_entry用来保存全部寄存器.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 .macro usr_entry UNWIND(.fnstart) UNWIND(.cantunwind) @ don't unwind the user space sub sp, sp, #S_FRAME_SIZE ARM( stmib sp, {r1 - r12}) THUMB( stmia sp, {r0 - r12}) ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance mov r6, #-1 str r3, [sp] @ save the "real" r0 copied @ from the exception stack @ @ We are now ready to fill in the remaining blanks on the stack: @ @ r4 - lr_<exception>, already fixed up for correct return/restart @ r5 - spsr_<exception> @ r6 - orig_r0 (see pt_regs definition in ptrace.h) @ @ Also, separately save sp_usr and lr_usr @ stmia r0, {r4 - r6} ARM( stmdb r0, {sp, lr}^) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC) @ @ Enable the alignment trap while in kernel mode @ alignment_trap r0 @ @ Clear FP to mark the first stack frame @ zero_fp #ifdef CONFIG_IRQSOFF_TRACER bl trace_hardirqs_off #endif ct_user_exit save = 0 .endm
首先将r1-r12压栈, 注意此处没有使用push而是sp先减少再使用stmib反向压栈. 原因是这些寄存器后面将以pt_regs形式访问, 数组排列是从低到高, 与栈增长相反. 另外r0, pc, cpsr, orig_r0是压栈传入的, 原因分别如下. r0需作为栈地址参数传入异常处理函数, 其原始值被修改, 所以通过栈传入. 由于pt_regs是指用户异常现场, pc与cpsr应保存异常发生时值, 但进入异常时使用影子寄存器. 所以使用压栈的 lr_exception
与 spsr_exception
(reference manual A2-13). 最后orig_r0是什么鬼? 想不清楚它的用处.
保存完用户现场后开始真正异常处理, dabt_helper的注释是调用指定的abort handler.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 .macro dabt_helper @ @ Call the processor-specific abort handler: @ @ r2 - pt_regs @ r4 - aborted context pc @ r5 - aborted context psr @ @ The abort handler must return the aborted address in r0, and @ the fault status register in r1. r9 must be preserved. @ #ifdef MULTI_DABORT ldr ip, .LCprocfns mov lr, pc ldr pc, [ip, #PROCESSOR_DABT_FUNC] #else bl CPU_DABORT_HANDLER #endif .endm #ifdef MULTI_DABORT .LCprocfns: .word processor #endif
其中pt_regs保存在r2中, abort时的pc指针保存在r4中, abort时的cpsr保存在r5中. handler返回时abort地址保存在r0中, 错误状态寄存器(fsr)保存在r1中, r9保留. 宏MULTI_DABORT定义见arch/arm/include/asm/glue-df.h, 由不同架构决定, ARMv7架构定义了该宏. 对于定义MULTI_DABORT宏的架构, ldr pc, [ip, #PROCESSOR_DABT_FUNC
]是跳转的关键. .LCprocfns段存放的是全局变量processor, 其定义在arch/arm/include/asm/proc-fns.h. PROCESSOR_DABT_FUNC定义见arch/arm/kernel/asm-offsets.c, 即指向processor._data_abort.
全局变量processor是如何初始化的? 答案见setup_processor(defined in arch/arm/kernel/setup.c). 在setup_processor中会调用lookup_processor_type(defined in arch/arm/kernel/head-common.S):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 ENTRY(lookup_processor_type) stmfd sp!, {r4 - r6, r9, lr} mov r9, r0 bl __lookup_processor_type mov r0, r5 ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) __lookup_processor_type: adr r3, __lookup_processor_type_data ldmia r3, {r4 - r6} sub r3, r3, r4 @ get offset between virt&phys add r5, r5, r3 @ convert virt addresses to add r6, r6, r3 @ physical address space 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 beq 2f add r5, r5, #PROC_INFO_SZ @ sizeof(proc_info_list) cmp r5, r6 blo 1b mov r5, #0 @ unknown processor 2: mov pc, lr ENDPROC(__lookup_processor_type)
__lookup_processor_type的注释解释了代码意图: 从CP15读取处理器id并从链接时建立的数组中查找. 由于此时未开启MMU因此无法使用绝对地址索引proc_info, 需根据偏移来计算. lookup_processor_type首先将cpuid保存在r9, 然后获取程序装载地址的偏移. __lookup_processor_type_data是数据段对象, 其包含两个数据__proc_info_begin与__proc_info_end. 通过arch/arm/kernel/vmlinux.lds.S可以得知该地址区间保存.proc.info.init数据. r3是编译时的程序地址, r4是运行时的实际地址. r3与r4相减即无MMU时程序加载地址相对程序文件地址的偏移. r5与r6分别为__lookup_processor_type_data数据段的起始地址与结束地址. 将r5地址前两个成员(cpu_val与cpu_mask)保存在r3与r4, 将其与cpuid比较, 如果符合则跳出循环. 如果不符合则取r5下一个元素地址与r6比较, 溢出说明数组越界r5设为0, 否则重复上一步比较.
在分析了processor的初始化后, 我们再来看下.proc.info.init数组是如何定义的. 此处代码与架构强相关, 每个芯片都有差异, 仅以基于ARMv7架构为例:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 .macro __v7_proc initfunc, mm_mmuflags = 0, io_mmuflags = 0, hwcaps = 0, proc_fns = v7_processor_functions ALT_SMP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_SMP | \mm_mmuflags) ALT_UP(.long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | \ PMD_SECT_AF | PMD_FLAGS_UP | \mm_mmuflags) .long PMD_TYPE_SECT | PMD_SECT_AP_WRITE | \ PMD_SECT_AP_READ | PMD_SECT_AF | \io_mmuflags W(b) \initfunc .long cpu_arch_nam .long cpu_elf_name .long HWCAP_SWP | HWCAP_HALF | HWCAP_THUMB | HWCAP_FAST_MULT | \ HWCAP_EDSP | HWCAP_TLS | \hwcaps .long cpu_v7_name .long \proc_fns .long v7wbi_tlb_fns .long v6_user_fns .long v7_cache_fns .endm
宏__v7_proc(defined in arch/arm/mm/proc-v7.S)作用是生成一个struct proc_info_list实例. 在arch/arm/mm/proc-v7.S中有多个用该宏定义的实例, 这些实例都放在.proc.info.init段中. 每个实例对应一类芯片, __v7_proc_info是大部分ARMv7处理器对应的struct proc_info_list的实例. __v7_proc_info的processor成员是v7_processor_functions, 再来看看该成员. 直接搜索该名字找不到定义的, 因为它是通过宏定义的生成的(烦不烦- -!).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 .macro define_processor_functions name:req, dabort:req, pabort:req, nommu=0, suspend=0 .type \name\()_processor_functions, #object .align 2 ENTRY(\name\()_processor_functions) .word \dabort .word \pabort .word cpu_\name\()_proc_init .word cpu_\name\()_proc_fin .word cpu_\name\()_reset .word cpu_\name\()_do_idle .word cpu_\name\()_dcache_clean_area .word cpu_\name\()_switch_mm .if \nommu .word 0 .else .word cpu_\name\()_set_pte_ext .endif .if \suspend .word cpu_\name\()_suspend_size #ifdef CONFIG_PM_SLEEP .word cpu_\name\()_do_suspend .word cpu_\name\()_do_resume #else .word 0 .word 0 #endif .else .word 0 .word 0 .word 0 .endif .size \name\()_processor_functions, . - \name\()_processor_functions .endm define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1
宏define_processor_functions(defined in arch/arm/mm/proc-macro.S). 该宏作用是生成一个struct processor实例, 联系对该宏的调用终于可以摸索出我们想要的回调了. 在lookup_processor_type返回后r0保存着proc_info_list地址, 对ARMv7架构而言. 返回的proc_info_list为__v7_proc_info(defined in arch/arm/mm/proc-v7.S). 其processor成员为v7_processor_functions, 它是由宏展开的, 其_data_abort成员为v7_early_abort.
再来看v7_early_abort(defined in arch/arm/mm/abort-ev7.S):
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 ENTRY(v7_early_abort) /* * The effect of data aborts on on the exclusive access monitor are * UNPREDICTABLE. Do a CLREX to clear the state */ clrex mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR /* * V6 code adjusts the returned DFSR. * New designs should not need to patch up faults. */ #if defined(CONFIG_VERIFY_PERMISSION_FAULT) /* * Detect erroneous permission failures and fix */ ldr r3, =0x40d @ On permission fault and r3, r1, r3 cmp r3, #0x0d bne do_DataAbort mcr p15, 0, r0, c7, c8, 0 @ Retranslate FAR isb mrc p15, 0, ip, c7, c4, 0 @ Read the PAR and r3, ip, #0x7b @ On translation fault cmp r3, #0x0b bne do_DataAbort bic r1, r1, #0xf @ Fix up FSR FS[5:0] and ip, ip, #0x7e orr r1, r1, ip, LSR #1 #endif b do_DataAbort ENDPROC(v7_early_abort)
v7_early_abort很简单, 先对FSR与FAR的处理(reference manual B3-18), 然后调用do_DataAbort. 使用r0保存FAR(fault address register), 使用r1保存FSR(fault status register), 后面会用到.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 asmlinkage void __exception do_DataAbort (unsigned long addr, unsigned int fsr, struct pt_regs *regs) { const struct fsr_info *inf = fsr_info + fsr_fs(fsr); struct siginfo info ; if (!inf->fn(addr, fsr & ~FSR_LNX_PF, regs)) return ; printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n" , inf->name, fsr, addr); info.si_signo = inf->sig; info.si_errno = 0 ; info.si_code = inf->code; info.si_addr = (void __user *)addr; arm_notify_die("" , regs, &info, fsr, 0 ); } struct fsr_info { int (*fn)(unsigned long addr, unsigned int fsr, struct pt_regs *regs); int sig; int code; const char *name; }; #ifdef CONFIG_ARM_LPAE #include "fsr-3level.c" #else #include "fsr-2level.c" #endif
do_DataAbort也很简单, 调用fsr_info数组某个元素的回调, 返回后根据结果向进程发送信号. 由于未开启ARM_LPAE(ARM large page support), 此处使用fsr-2level.c的数组(太大了不拷贝). . 以page fault为例, 调用do_page_fault, 当找不到页表时会调用__do_user_fault向用户进程发送信号. 回到__dabt_usr, 在abort handler返回后调用ret_from_exception退出异常.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 ENTRY(ret_from_exception) UNWIND(.fnstart) UNWIND(.cantunwind) get_thread_info tsk mov why, #0 b ret_to_user UNWIND(.fnend) ENDPROC(__pabt_usr) ENDPROC(ret_from_exception) ENTRY(ret_to_user) ret_slow_syscall: disable_irq @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne work_pending no_work_pending: asm_trace_hardirqs_on /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr ct_user_enter save = 0 restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user)
ret_to_user首先会关中断, 检查thread_info->flags. 如发现需要调度的标记执行work_pending(defined in arch/arm/kernel/entry-common.S).
1 2 3 4 5 6 7 8 9 work_pending: mov r0, sp @ 'regs' mov r2, why @ 'syscall' bl do_work_pending cmp r0, #0 beq no_work_pending movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) ldmia sp, {r0 - r6} @ have to reload r0 - r6 b local_restart @ ... and off we go
do_work_pending(defined in arch/arm/kernel/signal.c)的作用是判断是否需要调度或信号处理:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 asmlinkage int do_work_pending (struct pt_regs *regs, \ unsigned int thread_flags, int syscall) ;{ do { if (likely(thread_flags & _TIF_NEED_RESCHED)) { schedule(); } else { if (unlikely(!user_mode(regs))) return 0 ; local_irq_enable(); if (thread_flags & _TIF_SIGPENDING) { int restart = do_signal(regs, syscall); if (unlikely(restart)) { return restart; } syscall = 0 ; } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } } local_irq_disable(); thread_flags = current_thread_info()->flags; } while (thread_flags & _TIF_WORK_MASK); return 0 ; }
do_signal作用是处理挂起信号, 保存内核寄存器状态, 为内核执行用户态回调做准备. 保存数据的原因: 内核态与用户态共用一套寄存器. 当用户回调返回时内核寄存器状态已被破坏, 因此需要在用户态保存内核寄存器状态.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 static int do_signal (struct pt_regs *regs, int syscall) { ...... if (get_signal(&ksig)) { handle_signal(&ksig, regs); } ...... }
回到work_pending, 当do_work_pending返回时会检查函数返回值(r0). 如果返回成功则跳转到no_work_pending标签, 此时开始准备进入用户态. 其中arch_ret_to_user宏是架构相关宏, ARM上无定义; ct_user_enter是跟踪上下文宏, 忽略. 重点在restore_user_regs(defined in arch/arm/kernel/entry-header.S).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 .macro restore_user_regs, fast = 0, offset = 0 clrex @ clear the exclusive monitor mov r2, sp load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP msr spsr_cxsf, r1 @ save in spsr_svc .if \fast ldmdb sp, {r1 - r12} @ get calling r1 - r12 .else ldmdb sp, {r0 - r12} @ get calling r0 - r12 .endif add sp, sp, #S_FRAME_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr .endm .macro load_user_sp_lr, rd, rtemp, offset = 0 mrs \rtemp, cpsr eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) msr cpsr_c, \rtemp @ switch to the SYS mode ldr sp, [\rd, #\offset] @ load sp_usr ldr lr, [\rd, #\offset + 4] @ load lr_usr eor \rtemp, \rtemp, #(SVC_MODE ^ SYSTEM_MODE) msr cpsr_c, \rtemp @ switch back to the SVC mode .endm
clrex用于清除本地cpu独占访问某块内存区域的标记. S_SP定义见arch/arm/kernel/asm-offsets.c, 是ARM_sp在pt_regs的偏移. 对sp与lr的保存需额外切换到系统模式后处理, 是因为SVC模式下使用sp_svc与lr_svc. 而系统模式与用户模式使用同一套寄存器, 仅权限不同. 再根据是否为fast_path恢复用户寄存器, 同时恢复sp(此处sp为SVC模式的sp). 最后将lr拷贝给pc, 此指令会自动恢复cpsr, 不要问我为什么reference manual就是这么写的. 至此开始用户子程的执行.
用户进程回溯堆栈 回到第一部分, 如何在信号回调中回溯堆栈? 回顾之前的流程, 当用户进程访问非法地址时立即触发异常, 程序跳转到异常向量, 处理器模式进入异常模式使用异常模式下sp与lr, 当执行完异常处理后cpu恢复到特权模式处理, 此时使用特权模式下sp与lr, 为保证程序在执行完信号回调后能正常恢复特权模式现场, 需要在用户态保存现场, 即do_signal中的sigframe(在用户态即信号回调的参数3), 回到用户态进程还需要入栈一个siginfo结构, 因此用户进程栈结构为: 栈顶 … 异常发生时栈地址 sigframe siginfo 信号回调地址 通过sigframe我们可以获取异常发生时寄存器列表, 即获取异常时sp, pc, lr, 进一步回溯整个堆栈.