KSCrash 源码笔记 - 捕获篇

在上一篇章中,介绍了 KSCrash 的注册流程。其中在 kscm_setActiveMonitors 方法里遍历 g_monitors 数组,对各种类型的崩溃进行一一注册操作。这一篇章就来挨个分析下各种类型的崩溃是如果添加监控,以及如果处理崩溃的。

我们就主要分析以下几种典型崩溃类型的处理:

  • Mach kernel exceptions
  • Fatal signals
  • C++ exceptions
  • Objective-C exceptions
  • Main thread deadlock (experimental)

几乎都是相同的处理思路:

第一步调用 getAPI 方法,
第二步调用 setEnabled 方法,
第三步调用 install 方法添加监控,
第四步调用 handle 方法处理捕获到的崩溃事件。

接下来,我们一一查看具体的代码内容。

Mach kernel exceptions

1、getAPI

进入 kscm_machexception_getAPI 方法,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
KSCrashMonitorAPI* kscm_machexception_getAPI()
{
static KSCrashMonitorAPI api =
{
#if KSCRASH_HAS_MACH
.setEnabled = setEnabled,
.isEnabled = isEnabled,
.addContextualInfoToEvent = addContextualInfoToEvent
#endif
};
return &api;
}

2、setEnabled

在 setEnabled 方法里会对当前崩溃类型添加监控,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void setEnabled(bool isEnabled)
{
if(isEnabled != g_isEnabled)
{
g_isEnabled = isEnabled;
if(isEnabled)
{
ksid_generate(g_primaryEventID);
ksid_generate(g_secondaryEventID);
if(!installExceptionHandler())
{
return;
}
}
else
{
uninstallExceptionHandler();
}
}
}

3、install

在上述方法里会根据 isEnabled 值来进行 install or uninstall。主要来看下 installExceptionHandler 方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
static bool installExceptionHandler()
{
KSLOG_DEBUG("Installing mach exception handler.");

bool attributes_created = false;
pthread_attr_t attr;

kern_return_t kr;
int error;

// 获取当前进程的 task
const task_t thisTask = mach_task_self();
exception_mask_t mask = EXC_MASK_BAD_ACCESS |
EXC_MASK_BAD_INSTRUCTION |
EXC_MASK_ARITHMETIC |
EXC_MASK_SOFTWARE |
EXC_MASK_BREAKPOINT;

KSLOG_DEBUG("Backing up original exception ports.");
// 备份当前异常端口,保存在 g_previousExceptionPorts
kr = task_get_exception_ports(thisTask,
mask,
g_previousExceptionPorts.masks,
&g_previousExceptionPorts.count,
g_previousExceptionPorts.ports,
g_previousExceptionPorts.behaviors,
g_previousExceptionPorts.flavors);
if(kr != KERN_SUCCESS)
{
KSLOG_ERROR("task_get_exception_ports: %s", mach_error_string(kr));
goto failed;
}

// 如果 g_exceptionPort 为空,即未被创建过
if(g_exceptionPort == MACH_PORT_NULL)
{
KSLOG_DEBUG("Allocating new port with receive rights.");
// 创建异常端口并设置接收权限
kr = mach_port_allocate(thisTask,
MACH_PORT_RIGHT_RECEIVE,
&g_exceptionPort);
if(kr != KERN_SUCCESS)
{
KSLOG_ERROR("mach_port_allocate: %s", mach_error_string(kr));
goto failed;
}

KSLOG_DEBUG("Adding send rights to port.");
// 将指定的端口插入目标 task
kr = mach_port_insert_right(thisTask,
g_exceptionPort,
g_exceptionPort,
MACH_MSG_TYPE_MAKE_SEND);
if(kr != KERN_SUCCESS)
{
KSLOG_ERROR("mach_port_insert_right: %s", mach_error_string(kr));
goto failed;
}
}

KSLOG_DEBUG("Installing port as exception handler.");
// 设置 task 的异常端口为 g_exceptionPort
kr = task_set_exception_ports(thisTask,
mask,
g_exceptionPort,
(int)(EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES),
THREAD_STATE_NONE);
if(kr != KERN_SUCCESS)
{
KSLOG_ERROR("task_set_exception_ports: %s", mach_error_string(kr));
goto failed;
}

// 以下操作为创建两个线程用于轮流监听异常端口,设置异常处理函数 handleExceptions
// 1、创建第二异常处理线程(挂起状态)
KSLOG_DEBUG("Creating secondary exception thread (suspended).");
pthread_attr_init(&attr);
attributes_created = true;
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
error = pthread_create(&g_secondaryPThread,
&attr,
&handleExceptions,
(void*)kThreadSecondary);
if(error != 0)
{
KSLOG_ERROR("pthread_create_suspended_np: %s", strerror(error));
goto failed;
}
g_secondaryMachThread = pthread_mach_thread_np(g_secondaryPThread);
ksmc_addReservedThread(g_secondaryMachThread);

// 2、创建主要异常处理线程
KSLOG_DEBUG("Creating primary exception thread.");
error = pthread_create(&g_primaryPThread,
&attr,
&handleExceptions,
(void*)kThreadPrimary);
if(error != 0)
{
KSLOG_ERROR("pthread_create: %s", strerror(error));
goto failed;
}
pthread_attr_destroy(&attr);
g_primaryMachThread = pthread_mach_thread_np(g_primaryPThread);
ksmc_addReservedThread(g_primaryMachThread);

KSLOG_DEBUG("Mach exception handler installed.");
return true;


failed:
KSLOG_DEBUG("Failed to install mach exception handler.");
if(attributes_created)
{
pthread_attr_destroy(&attr);
}
uninstallExceptionHandler();
return false;
}

4、handle

然后来看异常处理函数 handleExceptions:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
static void* handleExceptions(void* const userData)
{
MachExceptionMessage exceptionMessage = {{0}};
MachReplyMessage replyMessage = {{0}};
char* eventID = g_primaryEventID;

const char* threadName = (const char*) userData;
pthread_setname_np(threadName);
if(threadName == kThreadSecondary)
{
KSLOG_DEBUG("This is the secondary thread. Suspending.");
thread_suspend((thread_t)ksthread_self());
eventID = g_secondaryEventID;
}

for(;;)
{
KSLOG_DEBUG("Waiting for mach exception");

// for 循环调用 mach_msg 从异常端口中读取 exceptionMessage,直到读取成功
// Wait for a message.
kern_return_t kr = mach_msg(&exceptionMessage.header,
MACH_RCV_MSG,
0,
sizeof(exceptionMessage),
g_exceptionPort,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);
if(kr == KERN_SUCCESS)
{
break;
}

// Loop and try again on failure.
KSLOG_ERROR("mach_msg: %s", mach_error_string(kr));
}

KSLOG_DEBUG("Trapped mach exception code 0x%llx, subcode 0x%llx",
exceptionMessage.code[0], exceptionMessage.code[1]);
if(g_isEnabled)
{
thread_act_array_t threads = NULL;
mach_msg_type_number_t numThreads = 0;
// 挂起所有非当前线程且非白名单线程
ksmc_suspendEnvironment(&threads, &numThreads);
g_isHandlingCrash = true;
// 捕捉异常之后清除所有 monitor
kscm_notifyFatalExceptionCaptured(true);

KSLOG_DEBUG("Exception handler is installed. Continuing exception handling.");


// Switch to the secondary thread if necessary, or uninstall the handler
// to avoid a death loop.
if(ksthread_self() == g_primaryMachThread)
{
KSLOG_DEBUG("This is the primary exception thread. Activating secondary thread.");
// TODO: This was put here to avoid a freeze. Does secondary thread ever fire?
restoreExceptionPorts();
if(thread_resume(g_secondaryMachThread) != KERN_SUCCESS)
{
KSLOG_DEBUG("Could not activate secondary thread. Restoring original exception ports.");
}
}
else
{
KSLOG_DEBUG("This is the secondary exception thread.");// Restoring original exception ports.");
// restoreExceptionPorts();
}

// Fill out crash information
KSLOG_DEBUG("Fetching machine state.");
// 创建一个新的 machineContext,用于保存异常信息
KSMC_NEW_CONTEXT(machineContext);
KSCrash_MonitorContext* crashContext = &g_monitorContext;
crashContext->offendingMachineContext = machineContext;
// 创建一个 cursor,用于遍历调用栈
kssc_initCursor(&g_stackCursor, NULL, NULL);
if(ksmc_getContextForThread(exceptionMessage.thread.name, machineContext, true))
{
kssc_initWithMachineContext(&g_stackCursor, KSSC_MAX_STACK_DEPTH, machineContext);
KSLOG_TRACE("Fault address %p, instruction address %p",
kscpu_faultAddress(machineContext), kscpu_instructionAddress(machineContext));
if(exceptionMessage.exception == EXC_BAD_ACCESS)
{
crashContext->faultAddress = kscpu_faultAddress(machineContext);
}
else
{
crashContext->faultAddress = kscpu_instructionAddress(machineContext);
}
}

KSLOG_DEBUG("Filling out context.");
crashContext->crashType = KSCrashMonitorTypeMachException;
crashContext->eventID = eventID;
crashContext->registersAreValid = true;
crashContext->mach.type = exceptionMessage.exception;
crashContext->mach.code = exceptionMessage.code[0] & (int64_t)MACH_ERROR_CODE_MASK;
crashContext->mach.subcode = exceptionMessage.code[1] & (int64_t)MACH_ERROR_CODE_MASK;
if(crashContext->mach.code == KERN_PROTECTION_FAILURE && crashContext->isStackOverflow)
{
// A stack overflow should return KERN_INVALID_ADDRESS, but
// when a stack blasts through the guard pages at the top of the stack,
// it generates KERN_PROTECTION_FAILURE. Correct for this.
crashContext->mach.code = KERN_INVALID_ADDRESS;
}
// 转换 mach 异常为 signal 异常
crashContext->signal.signum = signalForMachException(crashContext->mach.type, crashContext->mach.code);
crashContext->stackCursor = &g_stackCursor;

// 处理异常
kscm_handleException(crashContext);

KSLOG_DEBUG("Crash handling complete. Restoring original handlers.");
g_isHandlingCrash = false;
// 恢复所有非当前线程且非白名单线程
ksmc_resumeEnvironment(threads, numThreads);
}

KSLOG_DEBUG("Replying to mach exception message.");
// Send a reply saying "I didn't handle this exception".
replyMessage.header = exceptionMessage.header;
replyMessage.NDR = exceptionMessage.NDR;
replyMessage.returnCode = KERN_FAILURE;

mach_msg(&replyMessage.header,
MACH_SEND_MSG,
sizeof(replyMessage),
0,
MACH_PORT_NULL,
MACH_MSG_TIMEOUT_NONE,
MACH_PORT_NULL);

return NULL;
}

其中进入 kscm_handleException 函数,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
void kscm_handleException(struct KSCrash_MonitorContext* context)
{
context->requiresAsyncSafety = g_requiresAsyncSafety;
if(g_crashedDuringExceptionHandling)
{
context->crashedDuringCrashHandling = true;
}
for(int i = 0; i < g_monitorsCount; i++)
{
Monitor* monitor = &g_monitors[i];
if(isMonitorEnabled(monitor))
{
addContextualInfoToEvent(monitor, context);
}
}

g_onExceptionEvent(context);

if (context->currentSnapshotUserReported) {
g_handlingFatalException = false;
} else {
if(g_handlingFatalException && !g_crashedDuringExceptionHandling) {
KSLOG_DEBUG("Exception is fatal. Restoring original handlers.");
kscm_setActiveMonitors(KSCrashMonitorTypeNone);
}
}
}

我们看到调用了 g_onExceptionEvent 函数,该函数会调用 onCrash 方法。onCrash 方法的具体内容,我们会在后续篇章分析。现在,我们进入下一崩溃类型的监控分析。

Fatal signals

1、getAPI

进入 kscm_signal_getAPI 方法,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
KSCrashMonitorAPI* kscm_signal_getAPI()
{
static KSCrashMonitorAPI api =
{
#if KSCRASH_HAS_SIGNAL
.setEnabled = setEnabled,
.isEnabled = isEnabled,
.addContextualInfoToEvent = addContextualInfoToEvent
#endif
};
return &api;
}

2、setEnabled

在 setEnabled 方法里会对当前崩溃类型添加监控,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void setEnabled(bool isEnabled)
{
if(isEnabled != g_isEnabled)
{
g_isEnabled = isEnabled;
if(isEnabled)
{
ksid_generate(g_eventID);
if(!installSignalHandler())
{
return;
}
}
else
{
uninstallSignalHandler();
}
}
}

3、install

在这个方法里会根据 isEnabled 值来进行 install or uninstall。主要来看下 installSignalHandler 方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
static bool installSignalHandler()
{
KSLOG_DEBUG("Installing signal handler.");

#if KSCRASH_HAS_SIGNAL_STACK

if(g_signalStack.ss_size == 0)
{
KSLOG_DEBUG("Allocating signal stack area.");
g_signalStack.ss_size = SIGSTKSZ;
g_signalStack.ss_sp = malloc(g_signalStack.ss_size);
}

KSLOG_DEBUG("Setting signal stack area.");
if(sigaltstack(&g_signalStack, NULL) != 0)
{
KSLOG_ERROR("signalstack: %s", strerror(errno));
goto failed;
}
#endif

// 支持的 singal 异常类型数组
const int* fatalSignals = kssignal_fatalSignals();
// 异常类型数组大小
int fatalSignalsCount = kssignal_numFatalSignals();

// 记录原来的异常处理
if(g_previousSignalHandlers == NULL)
{
KSLOG_DEBUG("Allocating memory to store previous signal handlers.");
g_previousSignalHandlers = malloc(sizeof(*g_previousSignalHandlers)
* (unsigned)fatalSignalsCount);
}

struct sigaction action = {{0}};
action.sa_flags = SA_SIGINFO | SA_ONSTACK;
#if KSCRASH_HOST_APPLE && defined(__LP64__)
action.sa_flags |= SA_64REGSET;
#endif
sigemptyset(&action.sa_mask);
// 异常处理函数
action.sa_sigaction = &handleSignal;

// 遍历,一一添加异常监控
for(int i = 0; i < fatalSignalsCount; i++)
{
KSLOG_DEBUG("Assigning handler for signal %d", fatalSignals[i]);
// 将指定操作的 signal 设置为新修改的 sigaction,并保存该函数原有的 sigaction 在 g_previousSignalHandlers 中
if(sigaction(fatalSignals[i], &action, &g_previousSignalHandlers[i]) != 0)
{
char sigNameBuff[30];
const char* sigName = kssignal_signalName(fatalSignals[i]);
if(sigName == NULL)
{
snprintf(sigNameBuff, sizeof(sigNameBuff), "%d", fatalSignals[i]);
sigName = sigNameBuff;
}
KSLOG_ERROR("sigaction (%s): %s", sigName, strerror(errno));
// Try to reverse the damage
for(i--;i >= 0; i--)
{
sigaction(fatalSignals[i], &g_previousSignalHandlers[i], NULL);
}
goto failed;
}
}
KSLOG_DEBUG("Signal handlers installed.");
return true;

failed:
KSLOG_DEBUG("Failed to install signal handlers.");
return false;
}

其中遍历的数据,即所支持的所有 sinal 类型为:

1
2
3
4
5
6
7
8
9
10
11
static const int g_fatalSignals[] =
{
SIGABRT,
SIGBUS,
SIGFPE,
SIGILL,
SIGPIPE,
SIGSEGV,
SIGSYS,
SIGTRAP,
};

4、handle

然后来看异常处理函数 handleSignal:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
static void handleSignal(int sigNum, siginfo_t* signalInfo, void* userContext)
{
KSLOG_DEBUG("Trapped signal %d", sigNum);
if(g_isEnabled)
{
thread_act_array_t threads = NULL;
mach_msg_type_number_t numThreads = 0;
// 挂起所有非当前线程且非白名单线程
ksmc_suspendEnvironment(&threads, &numThreads);
// 捕捉异常之后清除所有 monitor
kscm_notifyFatalExceptionCaptured(false);

KSLOG_DEBUG("Filling out context.");
// 创建一个新的 machineContext,用于保存异常信息
KSMC_NEW_CONTEXT(machineContext);
ksmc_getContextForSignal(userContext, machineContext);
// 创建一个 cursor,用于遍历调用栈
kssc_initWithMachineContext(&g_stackCursor, KSSC_MAX_STACK_DEPTH, machineContext);

KSCrash_MonitorContext* crashContext = &g_monitorContext;
memset(crashContext, 0, sizeof(*crashContext));
crashContext->crashType = KSCrashMonitorTypeSignal;
crashContext->eventID = g_eventID;
crashContext->offendingMachineContext = machineContext;
crashContext->registersAreValid = true;
crashContext->faultAddress = (uintptr_t)signalInfo->si_addr;
crashContext->signal.userContext = userContext;
crashContext->signal.signum = signalInfo->si_signo;
crashContext->signal.sigcode = signalInfo->si_code;
crashContext->stackCursor = &g_stackCursor;

// 处理异常
kscm_handleException(crashContext);
// 恢复所有非当前线程且非白名单线程
ksmc_resumeEnvironment(threads, numThreads);
}

KSLOG_DEBUG("Re-raising signal for regular handlers to catch.");
// This is technically not allowed, but it works in OSX and iOS.
raise(sigNum);
}

其中,能看到同样调用了 kscm_handleException 函数。

C++ exceptions

1、getAPI

进入 kscm_cppexception_getAPI 方法,该方法的具体内容:

1
2
3
4
5
6
7
8
9
extern "C" KSCrashMonitorAPI* kscm_cppexception_getAPI()
{
static KSCrashMonitorAPI api =
{
.setEnabled = setEnabled,
.isEnabled = isEnabled
};
return &api;
}

2、setEnabled

在 setEnabled 方法里会对当前崩溃类型添加监控,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void setEnabled(bool isEnabled)
{
if(isEnabled != g_isEnabled)
{
g_isEnabled = isEnabled;
if(isEnabled)
{
initialize();

ksid_generate(g_eventID);
g_originalTerminateHandler = std::set_terminate(CPPExceptionTerminate);
}
else
{
std::set_terminate(g_originalTerminateHandler);
}
g_captureNextStackTrace = isEnabled;
}
}

3、CPPExceptionTerminate

在这个方法里会根据 isEnabled 值来决定是否添加监控,即执行 CPPExceptionTerminate 方法:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
static void CPPExceptionTerminate(void)
{
thread_act_array_t threads = NULL;
mach_msg_type_number_t numThreads = 0;
// 挂起所有非当前线程且非白名单线程
ksmc_suspendEnvironment(&threads, &numThreads);
KSLOG_DEBUG("Trapped c++ exception");
const char* name = NULL;
std::type_info* tinfo = __cxxabiv1::__cxa_current_exception_type();
if(tinfo != NULL)
{
name = tinfo->name();
}

if(name == NULL || strcmp(name, "NSException") != 0)
{
kscm_notifyFatalExceptionCaptured(false);
KSCrash_MonitorContext* crashContext = &g_monitorContext;
memset(crashContext, 0, sizeof(*crashContext));

char descriptionBuff[DESCRIPTION_BUFFER_LENGTH];
const char* description = descriptionBuff;
descriptionBuff[0] = 0;

KSLOG_DEBUG("Discovering what kind of exception was thrown.");
g_captureNextStackTrace = false;
try
{
throw;
}
catch(std::exception& exc)
{
strncpy(descriptionBuff, exc.what(), sizeof(descriptionBuff));
}
#define CATCH_VALUE(TYPE, PRINTFTYPE) \
catch(TYPE value)\
{ \
snprintf(descriptionBuff, sizeof(descriptionBuff), "%" #PRINTFTYPE, value); \
}
CATCH_VALUE(char, d)
CATCH_VALUE(short, d)
CATCH_VALUE(int, d)
CATCH_VALUE(long, ld)
CATCH_VALUE(long long, lld)
CATCH_VALUE(unsigned char, u)
CATCH_VALUE(unsigned short, u)
CATCH_VALUE(unsigned int, u)
CATCH_VALUE(unsigned long, lu)
CATCH_VALUE(unsigned long long, llu)
CATCH_VALUE(float, f)
CATCH_VALUE(double, f)
CATCH_VALUE(long double, Lf)
CATCH_VALUE(char*, s)
catch(...)
{
description = NULL;
}
g_captureNextStackTrace = g_isEnabled;

// TODO: Should this be done here? Maybe better in the exception handler?
// 创建一个新的 machineContext,用于保存异常信息
KSMC_NEW_CONTEXT(machineContext);
ksmc_getContextForThread(ksthread_self(), machineContext, true);

KSLOG_DEBUG("Filling out context.");
crashContext->crashType = KSCrashMonitorTypeCPPException;
crashContext->eventID = g_eventID;
crashContext->registersAreValid = false;
crashContext->stackCursor = &g_stackCursor;
crashContext->CPPException.name = name;
crashContext->exceptionName = name;
crashContext->crashReason = description;
crashContext->offendingMachineContext = machineContext;

// 处理异常
kscm_handleException(crashContext);
}
else
{
KSLOG_DEBUG("Detected NSException. Letting the current NSException handler deal with it.");
}
// 恢复所有非当前线程且非白名单线程
ksmc_resumeEnvironment(threads, numThreads);

KSLOG_DEBUG("Calling original terminate handler.");
g_originalTerminateHandler();
}

其中,能看到同样调用了 kscm_handleException 函数。

Objective-C exceptions

1、getAPI

进入 kscm_nsexception_getAPI 方法,该方法的具体内容:

1
2
3
4
5
6
7
8
9
KSCrashMonitorAPI* kscm_nsexception_getAPI()
{
static KSCrashMonitorAPI api =
{
.setEnabled = setEnabled,
.isEnabled = isEnabled
};
return &api;
}

2、setEnabled

在 setEnabled 方法里会对当前崩溃类型添加监控,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
static void setEnabled(bool isEnabled)
{
if(isEnabled != g_isEnabled)
{
g_isEnabled = isEnabled;
if(isEnabled)
{
KSLOG_DEBUG(@"Backing up original handler.");
// 备份之前的异常处理方法,保存在 g_previousUncaughtExceptionHandler
g_previousUncaughtExceptionHandler = NSGetUncaughtExceptionHandler();

KSLOG_DEBUG(@"Setting new handler.");
// 设置新的异常处理方法 handleUncaughtException
NSSetUncaughtExceptionHandler(&handleUncaughtException);
KSCrash.sharedInstance.uncaughtExceptionHandler = &handleUncaughtException;
KSCrash.sharedInstance.currentSnapshotUserReportedExceptionHandler = &handleCurrentSnapshotUserReportedException;
}
else
{
KSLOG_DEBUG(@"Restoring original handler.");
// 恢复之前的异常处理方法
NSSetUncaughtExceptionHandler(g_previousUncaughtExceptionHandler);
}
}
}

3、handle

在这个方法里会根据 isEnabled 值来执行 NSSetUncaughtExceptionHandler,设置机场处理函数 handleUncaughtException:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
static void handleUncaughtException(NSException* exception) {
handleException(exception, false);
}

static void handleException(NSException* exception, BOOL currentSnapshotUserReported) {
KSLOG_DEBUG(@"Trapped exception %@", exception);
if(g_isEnabled)
{
thread_act_array_t threads = NULL;
mach_msg_type_number_t numThreads = 0;
// 挂起所有非当前线程且非白名单线程
ksmc_suspendEnvironment(&threads, &numThreads);
// 捕捉异常之后清除所有 monitor
kscm_notifyFatalExceptionCaptured(false);

KSLOG_DEBUG(@"Filling out context.");
NSArray* addresses = [exception callStackReturnAddresses];
NSUInteger numFrames = addresses.count;
uintptr_t* callstack = malloc(numFrames * sizeof(*callstack));
for(NSUInteger i = 0; i < numFrames; i++)
{
callstack[i] = (uintptr_t)[addresses[i] unsignedLongLongValue];
}

char eventID[37];
ksid_generate(eventID);
// 创建一个新的 machineContext,用于保存异常信息
KSMC_NEW_CONTEXT(machineContext);
ksmc_getContextForThread(ksthread_self(), machineContext, true);
// 创建一个 cursor,用于遍历调用栈
KSStackCursor cursor;
kssc_initWithBacktrace(&cursor, callstack, (int)numFrames, 0);

KSCrash_MonitorContext* crashContext = &g_monitorContext;
memset(crashContext, 0, sizeof(*crashContext));
crashContext->crashType = KSCrashMonitorTypeNSException;
crashContext->eventID = eventID;
crashContext->offendingMachineContext = machineContext;
crashContext->registersAreValid = false;
crashContext->NSException.name = [[exception name] UTF8String];
crashContext->NSException.userInfo = [[NSString stringWithFormat:@"%@", exception.userInfo] UTF8String];
crashContext->exceptionName = crashContext->NSException.name;
crashContext->crashReason = [[exception reason] UTF8String];
crashContext->stackCursor = &cursor;
crashContext->currentSnapshotUserReported = currentSnapshotUserReported;

KSLOG_DEBUG(@"Calling main crash handler.");
// 处理异常
kscm_handleException(crashContext);

free(callstack);
if (currentSnapshotUserReported) {
// 恢复所有非当前线程且非白名单线程
ksmc_resumeEnvironment(threads, numThreads);
}
if (g_previousUncaughtExceptionHandler != NULL)
{
KSLOG_DEBUG(@"Calling original exception handler.");
g_previousUncaughtExceptionHandler(exception);
}
}
}

其中,能看到同样调用了 kscm_handleException 函数。

Main thread deadlock (experimental)

1、getAPI

进入 kscm_deadlock_getAPI 方法,该方法的具体内容:

1
2
3
4
5
6
7
8
9
KSCrashMonitorAPI* kscm_deadlock_getAPI()
{
static KSCrashMonitorAPI api =
{
.setEnabled = setEnabled,
.isEnabled = isEnabled
};
return &api;
}

2、setEnabled

在 setEnabled 方法里会对当前崩溃类型添加监控,该方法的具体内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
static void setEnabled(bool isEnabled)
{
if(isEnabled != g_isEnabled)
{
g_isEnabled = isEnabled;
if(isEnabled)
{
KSLOG_DEBUG(@"Creating new deadlock monitor.");
initialize();
g_monitor = [[KSCrashDeadlockMonitor alloc] init];
}
else
{
KSLOG_DEBUG(@"Stopping deadlock monitor.");
[g_monitor cancel];
g_monitor = nil;
}
}
}

3、KSCrashDeadlockMonitor

具体的处理都在 KSCrashDeadlockMonitor 这个类里。
逻辑就是:开启一个定时线程,将全局的 bool 字段 awaitingResponse 设为 YES,然后切换到主线程将 awaitingResponse 再设置为 NO。若监测到 awaitingResponse 字段没有变为 NO,说明主线程死锁。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
- (id) init
{
if((self = [super init]))
{
// target (self) is retained until selector (runMonitor) exits.
self.monitorThread = [[NSThread alloc] initWithTarget:self selector:@selector(runMonitor) object:nil];
self.monitorThread.name = @"KSCrash Deadlock Detection Thread";
[self.monitorThread start];
}
return self;
}

- (void) runMonitor
{
BOOL cancelled = NO;
do
{
// Only do a watchdog check if the watchdog interval is > 0.
// If the interval is <= 0, just idle until the user changes it.
@autoreleasepool {
NSTimeInterval sleepInterval = g_watchdogInterval;
BOOL runWatchdogCheck = sleepInterval > 0;
if(!runWatchdogCheck)
{
sleepInterval = kIdleInterval;
}
[NSThread sleepForTimeInterval:sleepInterval];
cancelled = self.monitorThread.isCancelled;
if(!cancelled && runWatchdogCheck)
{
if(self.awaitingResponse)
{
[self handleDeadlock];
}
else
{
[self watchdogPulse];
}
}
}
} while (!cancelled);
}


- (void) watchdogPulse
{
__block id blockSelf = self;
self.awaitingResponse = YES;
dispatch_async(dispatch_get_main_queue(), ^
{
[blockSelf watchdogAnswer];
});
}

- (void) watchdogAnswer
{
self.awaitingResponse = NO;
}

4、handle

其中,处理异常的函数为 handleDeadlock:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
- (void) handleDeadlock
{
thread_act_array_t threads = NULL;
mach_msg_type_number_t numThreads = 0;
// 挂起所有非当前线程且非白名单线程
ksmc_suspendEnvironment(&threads, &numThreads);
// 捕捉异常之后清除所有 monitor
kscm_notifyFatalExceptionCaptured(false);

// 创建一个新的 machineContext,用于保存异常信息
KSMC_NEW_CONTEXT(machineContext);
ksmc_getContextForThread(g_mainQueueThread, machineContext, false);
// 创建一个 cursor,用于遍历调用栈
KSStackCursor stackCursor;
kssc_initWithMachineContext(&stackCursor, KSSC_MAX_STACK_DEPTH, machineContext);
char eventID[37];
ksid_generate(eventID);

KSLOG_DEBUG(@"Filling out context.");
KSCrash_MonitorContext* crashContext = &g_monitorContext;
memset(crashContext, 0, sizeof(*crashContext));
crashContext->crashType = KSCrashMonitorTypeMainThreadDeadlock;
crashContext->eventID = eventID;
crashContext->registersAreValid = false;
crashContext->offendingMachineContext = machineContext;
crashContext->stackCursor = &stackCursor;

// 处理异常
kscm_handleException(crashContext);
// 恢复所有非当前线程且非白名单线程
ksmc_resumeEnvironment(threads, numThreads);

KSLOG_DEBUG(@"Calling abort()");
abort();
}

其中,能看到同样调用了 kscm_handleException 函数。

小结

KSCrash 主要支持的崩溃类型就是以上这些。g_monitors 数组里的其他崩溃类型大家可以自行了解。后续篇章中我们进入 kscm_handleException 流程,分析捕获到崩溃之后收集堆栈信息的操作。