崩溃日志收集库 xCrash 浅析
xCrash 是爱奇艺团队开源的一款崩溃日志收集库,可以收集 java crash、native crash 和 ANR 日志
日志格式为专用格式,内容还算丰富:机器信息、崩溃线程和其他线程的方法栈、logcat、打开的 fd 等等 …
默认配置为:
java crash
、native crash
和ANR
都会被捕获- 日志目录在
/data/data/[pkg]/files/tombstones
java crash
日志文件为tombstone_[加载 xCrash 的时间,单位为秒的时间戳,宽度为 20]_[app version]__[process name].java.xcrash
native crash
日志文件为tombstone_[加载 xCrash 的时间,单位为秒的时间戳,宽度为 20]_[app version]__[process name].native.xcrash
ANR
日志文件为tombstone_[加载 xCrash 的时间,单位为秒的时间戳,宽度为 20]_[app version]__[process name].trace.xcrash
Java Crash
捕获 Java Crash 用的是 DefaultUncaughtExceptionHandler
,相关的基础知识参考 Uncaught Exception Handling
class JavaCrashHandler implements UncaughtExceptionHandler {
@Override
public void uncaughtException(Thread thread, Throwable throwable) {
if (defaultHandler != null) {
Thread.setDefaultUncaughtExceptionHandler(defaultHandler);
}
try {
handleException(thread, throwable);
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler handleException failed", e);
}
// 可以选择重新抛出给上一个 handler,或者杀死 app
if (this.rethrow) {
if (defaultHandler != null) {
defaultHandler.uncaughtException(thread, throwable);
}
} else {
ActivityMonitor.getInstance().finishAllActivities();
Process.killProcess(this.pid);
System.exit(10);
}
}
// 收集各种各样的信息,写入到日志文件
private void handleException(Thread thread, Throwable throwable) {
Date crashTime = new Date();
//notify the java crash
NativeHandler.getInstance().notifyJavaCrashed();
AnrHandler.getInstance().notifyJavaCrashed();
//create log file
File logFile = null;
try {
String logPath = String.format(Locale.US, "%s/%s_%020d_%s__%s%s", logDir, Util.logPrefix, startTime.getTime() * 1000, appVersion, processName, Util.javaLogSuffix);
logFile = FileManager.getInstance().createLogFile(logPath);
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler createLogFile failed", e);
}
//get emergency
String emergency = null;
try {
emergency = getEmergency(crashTime, thread, throwable);
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler getEmergency failed", e);
}
//write info to log file
if (logFile != null) {
RandomAccessFile raf = null;
try {
raf = new RandomAccessFile(logFile, "rws");
//write emergency info
if (emergency != null) {
raf.write(emergency.getBytes("UTF-8"));
}
//If we wrote the emergency info successfully, we don't need to return it from callback again.
emergency = null;
//write logcat
if (logcatMainLines > 0 || logcatSystemLines > 0 || logcatEventsLines > 0) {
raf.write(Util.getLogcat(logcatMainLines, logcatSystemLines, logcatEventsLines).getBytes("UTF-8"));
}
//write fds
if (dumpFds) {
raf.write(Util.getFds().getBytes("UTF-8"));
}
//write network info
if (dumpNetworkInfo) {
raf.write(Util.getNetworkInfo().getBytes("UTF-8"));
}
//write memory info
raf.write(Util.getMemoryInfo().getBytes("UTF-8"));
//write background / foreground
raf.write(("foreground:\n" + (ActivityMonitor.getInstance().isApplicationForeground() ? "yes" : "no") + "\n\n").getBytes("UTF-8"));
//write other threads info
if (dumpAllThreads) {
raf.write(getOtherThreadsInfo(thread).getBytes("UTF-8"));
}
} catch (Exception e) {
XCrash.getLogger().e(Util.TAG, "JavaCrashHandler write log file failed", e);
} finally {
if (raf != null) {
try {
raf.close();
} catch (Exception ignored) {
}
}
}
}
//callback
if (callback != null) {
try {
callback.onCrash(logFile == null ? null : logFile.getAbsolutePath(), emergency);
} catch (Exception ignored) {
}
}
}
}
Header
private String getEmergency(Date crashTime, Thread thread, Throwable throwable) {
//stack stace
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
throwable.printStackTrace(pw);
String stacktrace = sw.toString();
return Util.getLogHeader(startTime, crashTime, Util.javaCrashType, appId, appVersion)
+ "pid: " + pid + ", tid: " + Process.myTid() + ", name: " + thread.getName() + " >>> " + processName + " <<<\n"
+ "\n"
+ "java stacktrace:\n"
+ stacktrace
+ "\n"
+ getBuildId(stacktrace);
}
static String getLogHeader(Date startTime, Date crashTime, String crashType, String appId, String appVersion) {
DateFormat timeFormatter = new SimpleDateFormat(Util.timeFormatterStr, Locale.US);
return Util.sepHead + "\n"
+ "Tombstone maker: '" + Version.fullVersion + "'\n"
+ "Crash type: '" + crashType + "'\n"
+ "Start time: '" + timeFormatter.format(startTime) + "'\n"
+ "Crash time: '" + timeFormatter.format(crashTime) + "'\n"
+ "App ID: '" + appId + "'\n"
+ "App version: '" + appVersion + "'\n"
+ "Rooted: '" + (Util.isRoot() ? "Yes" : "No") + "'\n"
+ "API level: '" + Build.VERSION.SDK_INT + "'\n"
+ "OS version: '" + Build.VERSION.RELEASE + "'\n"
+ "ABI list: '" + Util.getAbiList() + "'\n"
+ "Manufacturer: '" + Build.MANUFACTURER + "'\n"
+ "Brand: '" + Build.BRAND + "'\n"
+ "Model: '" + Util.getMobileModel() + "'\n"
+ "Build fingerprint: '" + Build.FINGERPRINT + "'\n";
}
private String getBuildId(String stktrace) {
String buildId = "";
List<String> libPathList = new ArrayList<String>();
if (stktrace.contains("UnsatisfiedLinkError")) {
String libInfo = null;
String[] tempLibPathStr;
tempLibPathStr = stktrace.split("\""); // " is the delimiter
for (String libPathStr : tempLibPathStr) {
if (libPathStr.isEmpty() || !libPathStr.endsWith(".so")) continue;
libPathList.add(libPathStr);
String libName = libPathStr.substring(libPathStr.lastIndexOf('/') + 1);
libPathList.add(XCrash.nativeLibDir + "/" + libName);
libPathList.add("/vendor/lib/" + libName);
libPathList.add("/vendor/lib64/" + libName);
libPathList.add("/system/lib/" + libName);
libPathList.add("/system/lib64/" + libName);
libInfo = getLibInfo(libPathList);
}
buildId = "build id:"
+ "\n"
+ libInfo
+ "\n";
}
return buildId;
}
输出的日志内容如下:
*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
Tombstone maker: 'xCrash 2.4.6' // xCrash 把日志叫做 tombstone,这里指的是生成 tombstone 的 xCrash 的版本
Crash type: 'java' // 指明 crash 类型,此日志包含的是 java crash(此外还有 native crash 和 ANR)
Start time: '2019-10-12T03:23:19.580+0800' // 初始化 xCrash 的时间,也就是调用 XCrash.init 的时间
Crash time: '2019-10-12T03:23:25.533+0800' // 发生崩溃的时间
App ID: 'xcrash.sample' // 发生崩溃的 APP 的包名
App version: '1.2.3-beta456-patch789' // APP version name
Rooted: 'No'
API level: '29'
OS version: '10'
ABI list: 'arm64-v8a,armeabi-v7a,armeabi'
Manufacturer: 'Google'
Brand: 'google'
Model: 'Pixel'
Build fingerprint: 'google/sailfish/sailfish:10/QP1A.190711.020/5800535:user/release-keys'
pid: 21356, tid: 21356, name: main >>> xcrash.sample <<<
java stacktrace: // 崩溃线程的调用栈
java.lang.IllegalStateException: Could not execute method for android:onClick
at androidx.appcompat.app.AppCompatViewInflater$DeclaredOnClickListener.onClick(AppCompatViewInflater.java:402)
at android.view.View.performClick(View.java:7140)
at android.view.View.performClickInternal(View.java:7117)
at android.view.View.access$3500(View.java:801)
at android.view.View$PerformClick.run(View.java:27351)
at android.os.Handler.handleCallback(Handler.java:883)
at android.os.Handler.dispatchMessage(Handler.java:100)
at android.os.Looper.loop(Looper.java:214)
at android.app.ActivityThread.main(ActivityThread.java:7356)
at java.lang.reflect.Method.invoke(Native Method)
at com.android.internal.os.RuntimeInit$MethodAndArgsCaller.run(RuntimeInit.java:492)
at com.android.internal.os.ZygoteInit.main(ZygoteInit.java:930)
Caused by: java.lang.reflect.InvocationTargetException
at java.lang.reflect.Method.invoke(Native Method)
at androidx.appcompat.app.AppCompatViewInflater$DeclaredOnClickListener.onClick(AppCompatViewInflater.java:397)
... 11 more
Caused by: java.lang.RuntimeException: test java exception
at xcrash.XCrash.testJavaCrash(XCrash.java:847)
at xcrash.sample.MainActivity.testJavaCrashInMainThread_onClick(MainActivity.java:67)
... 13 more
logcat
其实就是调用 logcat
命令获取崩溃时的 main
、system
和 events
三个 buffer 的日志,如:/system/bin/logcat -b main -d -v threadtime -t 200 --pid 21356 *:D
static String getLogcat(int logcatMainLines, int logcatSystemLines, int logcatEventsLines) {
int pid = android.os.Process.myPid();
StringBuilder sb = new StringBuilder();
sb.append("logcat:\n");
if (logcatMainLines > 0) {
getLogcatByBufferName(pid, sb, "main", logcatMainLines, 'D');
}
if (logcatSystemLines > 0) {
getLogcatByBufferName(pid, sb, "system", logcatSystemLines, 'W');
}
if (logcatEventsLines > 0) {
getLogcatByBufferName(pid, sb, "events", logcatSystemLines, 'I');
}
sb.append("\n");
return sb.toString();
}
private static void getLogcatByBufferName(int pid, StringBuilder sb, String bufferName, int lines, char priority) {
boolean withPid = (android.os.Build.VERSION.SDK_INT >= 24);
String pidString = Integer.toString(pid);
String pidLabel = " " + pidString + " ";
//command for ProcessBuilder
List<String> command = new ArrayList<String>();
command.add("/system/bin/logcat");
command.add("-b");
command.add(bufferName);
command.add("-d");
command.add("-v");
command.add("threadtime");
command.add("-t");
command.add(Integer.toString(withPid ? lines : (int) (lines * 1.2)));
if (withPid) {
command.add("--pid");
command.add(pidString);
}
command.add("*:" + priority);
//append the command line
Object[] commandArray = command.toArray();
sb.append("--------- tail end of log ").append(bufferName);
sb.append(" (").append(android.text.TextUtils.join(" ", commandArray)).append(")\n");
//append logs
BufferedReader br = null;
String line;
try {
Process process = new ProcessBuilder().command(command).start();
br = new BufferedReader(new InputStreamReader(process.getInputStream()));
while ((line = br.readLine()) != null) {
if (withPid || line.contains(pidLabel)) {
sb.append(line).append("\n");
}
}
} catch (Exception e) {
XCrash.getLogger().w(Util.TAG, "Util run logcat command failed", e);
} finally {
if (br != null) {
try {
br.close();
} catch (IOException ignored) {
}
}
}
}
输出如下:
logcat:
--------- tail end of log main (/system/bin/logcat -b main -d -v threadtime -t 200 --pid 21356 *:D)
10-12 03:23:19.356 21356 21356 I xcrash.sample: Late-enabling -Xcheck:jni
10-12 03:23:19.398 21356 21356 E xcrash.sample: Unknown bits set in runtime_flags: 0x8000
10-12 03:23:19.571 21356 21356 D xcrash_sample: xCrash SDK init: start
10-12 03:23:19.586 21356 21356 D xcrash_sample: xCrash SDK init: end
10-12 03:23:19.757 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/View;->computeFitSystemWindows(Landroid/graphics/Rect;Landroid/graphics/Rect;)Z (greylist, reflection, allowed)
10-12 03:23:19.758 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/ViewGroup;->makeOptionalFitsSystemWindows()V (greylist, reflection, allowed)
10-12 03:23:19.829 21356 21356 I WebViewFactory: Loading com.google.android.webview version 77.0.3865.92 (code 386509238)
10-12 03:23:19.874 21356 21356 I cr_LibraryLoader: Time to load native libraries: 4 ms (timestamps 1922-1926)
10-12 03:23:19.920 21356 21356 I chromium: [INFO:library_loader_hooks.cc(51)] Chromium logging enabled: level = 0, default verbosity = 0
10-12 03:23:19.921 21356 21356 I cr_LibraryLoader: Expected native library version number "77.0.3865.92", actual native library version number "77.0.3865.92"
10-12 03:23:19.926 21356 21402 W cr_ChildProcLH: Create a new ChildConnectionAllocator with package name = com.google.android.webview, sandboxed = true
10-12 03:23:19.930 21356 21402 W xcrash.sample: Accessing hidden method Landroid/content/Context;->bindServiceAsUser(Landroid/content/Intent;Landroid/content/ServiceConnection;ILandroid/os/Handler;Landroid/os/UserHandle;)Z (greylist, reflection, allowed)
10-12 03:23:19.934 21356 21356 I cr_BrowserStartup: Initializing chromium process, singleProcess=false
10-12 03:23:19.979 21356 21430 W chromium: [WARNING:dns_config_service_posix.cc(339)] Failed to read DnsConfig.
10-12 03:23:20.031 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker;-><init>(Landroid/content/Context;I)V (greylist, reflection, allowed)
10-12 03:23:20.031 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker;->logEvent(Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;)V (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionStarted(I)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionModified(II)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionModified(IILandroid/view/textclassifier/TextClassification;)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionModified(IILandroid/view/textclassifier/TextSelection;)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionAction(III)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.032 21356 21356 W xcrash.sample: Accessing hidden method Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent;->selectionAction(IIILandroid/view/textclassifier/TextClassification;)Landroid/view/textclassifier/logging/SmartSelectionEventTracker$SelectionEvent; (greylist, reflection, allowed)
10-12 03:23:20.143 21356 21395 I Adreno : QUALCOMM build : 4a00b69, I4e7e888065
10-12 03:23:20.143 21356 21395 I Adreno : Build Date : 04/09/19
10-12 03:23:20.143 21356 21395 I Adreno : OpenGL ES Shader Compiler Version: EV031.26.06.00
10-12 03:23:20.143 21356 21395 I Adreno : Local Branch : mybranche95ae4c8-d77f-f18d-a9ef-1458d0b52ae8
10-12 03:23:20.143 21356 21395 I Adreno : Remote Branch : quic/gfx-adreno.lnx.1.0
10-12 03:23:20.143 21356 21395 I Adreno : Remote Branch : NONE
10-12 03:23:20.143 21356 21395 I Adreno : Reconstruct Branch : NOTHING
10-12 03:23:20.143 21356 21395 I Adreno : Build Config : S L 8.0.5 AArch64
10-12 03:23:20.146 21356 21395 I Adreno : PFP: 0x005ff110, ME: 0x005ff066
10-12 03:23:20.198 21356 21395 W Gralloc3: mapper 3.x is not supported
10-12 03:23:25.531 21356 21356 D AndroidRuntime: Shutting down VM
--------- tail end of log system (/system/bin/logcat -b system -d -v threadtime -t 50 --pid 21356 *:W)
--------- tail end of log events (/system/bin/logcat -b events -d -v threadtime -t 50 --pid 21356 *:I)
10-12 03:23:20.046 21356 21356 I am_on_create_called: [0,xcrash.sample.MainActivity,performCreate]
10-12 03:23:20.053 21356 21356 I am_on_start_called: [0,xcrash.sample.MainActivity,handleStartActivity]
10-12 03:23:20.056 21356 21356 I am_on_resume_called: [0,xcrash.sample.MainActivity,RESUME_ACTIVITY]
10-12 03:23:20.083 21356 21356 I am_on_top_resumed_gained_called: [0,xcrash.sample.MainActivity,topStateChangedWhenResumed]
Opened FD
打印已打开的 FD 及其路径,已打开的 FD 在目录 /proc/self/fd
下
static String getFds() {
StringBuilder sb = new StringBuilder("open files:\n");
try {
File dir = new File("/proc/self/fd");
File[] fds = dir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return TextUtils.isDigitsOnly(name);
}
});
int count = 0;
if (fds != null) {
for (File fd : fds) {
String path = null;
try {
if (Build.VERSION.SDK_INT >= 21) {
path = Os.readlink(fd.getAbsolutePath());
} else {
path = fd.getCanonicalPath();
}
} catch (Exception ignored) {
}
sb.append(" fd ").append(fd.getName()).append(": ")
.append(TextUtils.isEmpty(path) ? "???" : path.trim()).append('\n');
count++;
if (count > 1024) {
break;
}
}
if (fds.length > 1024) {
sb.append(" ......\n");
}
sb.append(" (number of FDs: ").append(fds.length).append(")\n");
}
} catch (Exception ignored) {
}
sb.append('\n');
return sb.toString();
}
输出如下:
open files:
fd 0: /dev/null
fd 1: /dev/null
fd 2: /dev/null
fd 3: /proc/21356/fd/3
fd 4: /proc/21356/fd/4
fd 5: /proc/21356/fd/5
fd 6: /dev/null
fd 7: /dev/null
fd 8: /dev/null
fd 9: /apex/com.android.runtime/javalib/core-oj.jar
fd 10: /apex/com.android.runtime/javalib/core-libart.jar
fd 11: /apex/com.android.runtime/javalib/okhttp.jar
fd 12: /apex/com.android.runtime/javalib/bouncycastle.jar
fd 13: /apex/com.android.runtime/javalib/apache-xml.jar
fd 14: /system/framework/framework.jar
fd 15: /system/framework/ext.jar
fd 16: /system/framework/telephony-common.jar
fd 17: /system/framework/voip-common.jar
fd 18: /system/framework/ims-common.jar
fd 19: /dev/null
fd 20: /dev/null
fd 21: /system/framework/android.test.base.jar
fd 22: /apex/com.android.conscrypt/javalib/conscrypt.jar
fd 23: /apex/com.android.media/javalib/updatable-media.jar
fd 24: /system/framework/framework-res.apk
fd 25: /system/product/overlay/GoogleConfigOverlay.apk
fd 26: /system/product/overlay/GoogleWebViewOverlay.apk
fd 27: /vendor/overlay/framework-res__auto_generated_rro_vendor.apk
fd 28: /system/product/overlay/PixelConfigOverlayCommon.apk
fd 29: /system/product/overlay/framework-res__auto_generated_rro_product.apk
fd 30: /dev/null
fd 31: /dev/binder
fd 32: /proc/21356/fd/32
fd 33: /proc/21356/fd/33
fd 34: /proc/21356/fd/34
fd 35: /proc/21356/fd/35
fd 36: /proc/21356/fd/36
fd 37: /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/base.apk
fd 38: /proc/21356/fd/38
fd 39: /proc/21356/fd/39
fd 40: /system/product/overlay/NavigationBarModeGestural/NavigationBarModeGesturalOverlay.apk
fd 41: /dev/null
fd 42: /dev/null
fd 43: /dev/null
fd 44: /dev/null
fd 45: /proc/21356/fd/45
fd 46: /proc/21356/fd/46
fd 47: /proc/21356/fd/47
fd 48: /proc/21356/fd/48
fd 49: /dev/ashmem
fd 50: /proc/21356/fd/50
fd 51: /proc/21356/fd/51
fd 52: /data/app/com.google.android.trichromelibrary_386509238-C5vGqz1rgNqceBgeyyw2Aw==/base.apk
fd 53: /proc/21356/fd/53
fd 54: /data/data/xcrash.sample/files/tombstones/tombstone_00001570821799580000_1.2.3-beta456-patch789__xcrash.sample.java.xcrash
fd 55: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 56: /data/app/com.google.android.trichromelibrary_386509238-C5vGqz1rgNqceBgeyyw2Aw==/base.apk
fd 57: /data/data/xcrash.sample/app_webview/webview_data.lock
fd 58: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 59: /system/product/overlay/NavigationBarModeGestural/NavigationBarModeGesturalOverlay.apk
fd 60: /proc/21356/fd/60
fd 61: /proc/21356/fd/61
fd 62: /data/app/com.google.android.trichromelibrary_386509238-C5vGqz1rgNqceBgeyyw2Aw==/base.apk
fd 63: /data/app/com.google.android.trichromelibrary_386509238-C5vGqz1rgNqceBgeyyw2Aw==/base.apk
fd 64: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 65: /data/app/com.google.android.trichromelibrary_386509238-C5vGqz1rgNqceBgeyyw2Aw==/base.apk
fd 66: /dev/urandom
fd 67: /proc/21356/fd/67
fd 68: /proc/21356/fd/68
fd 69: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 70: /proc/21356/fd/70
fd 71: /proc/21356/fd/71
fd 72: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 73: /data/app/com.google.android.webview-wtyVrSKc9Gzy-ujvyvTNjw==/base.apk
fd 74: /proc/21356/fd/74
fd 75: /proc/21356/fd/75
fd 76: /proc/21356/fd/76
fd 77: /proc/21356/fd/77
fd 78: /proc/21356/fd/78
fd 79: /proc/21356/fd/79
fd 80: /proc/21356/fd/80
fd 81: /proc/21356/fd/81
fd 82: /proc/21356/fd/82
fd 83: /proc/21356/fd/83
fd 84: /proc/21356/fd/84
fd 85: /proc/21356/fd/85
fd 86: /proc/21356/fd/86
fd 87: /proc/21356/fd/87
fd 88: /proc/21356/fd/88
fd 89: /proc/21356/fd/89
fd 90: /proc/21356/fd/90
fd 91: /dev/ashmem
fd 92: /dev/ashmem
fd 93: /dev/ashmem
fd 94: /data/data/xcrash.sample/app_webview/Web Data
fd 95: /proc/21356/fd/95
fd 96: /proc/21356/fd/96
fd 97: /dev/ashmem
fd 98: /dev/ion
fd 99: /proc/21356/fd/99
fd 100: /proc/21356/fd/100
fd 101: /proc/21356/fd/101
fd 102: /dev/ashmem
fd 103: /dev/kgsl-3d0
fd 104: /dev/ion
fd 105: /dev/hwbinder
fd 106: /proc/21356/fd/106
fd 107: /proc/21356/fd/107
fd 110: /proc/21356/fd/110
fd 111: /proc/21356/fd/111
fd 113: /proc/21356/fd/113
fd 114: /proc/21356/fd/114
fd 115: /proc/21356/fd/115
fd 116: /proc/21356/fd/116
fd 117: /proc/21356/fd/117
(number of FDs: 115)
System Memory Summary
System Summary (From: /proc/meminfo)
MemTotal: 3855796 kB
MemFree: 90124 kB
MemAvailable: 1452636 kB
Buffers: 77420 kB
Cached: 1461900 kB
SwapCached: 10232 kB
Active: 1771504 kB
Inactive: 1014432 kB
Active(anon): 1046604 kB
Inactive(anon): 368348 kB
Active(file): 724900 kB
Inactive(file): 646084 kB
Unevictable: 151672 kB
Mlocked: 151672 kB
SwapTotal: 524284 kB
SwapFree: 271320 kB
Dirty: 136 kB
Writeback: 0 kB
AnonPages: 1391280 kB
Mapped: 620988 kB
Shmem: 16660 kB
Slab: 231556 kB
SReclaimable: 92700 kB
SUnreclaim: 138856 kB
KernelStack: 44448 kB
PageTables: 57544 kB
NFS_Unstable: 0 kB
Bounce: 0 kB
WritebackTmp: 0 kB
CommitLimit: 2452180 kB
Committed_AS: 67847232 kB
VmallocTotal: 258998208 kB
VmallocUsed: 223632 kB
VmallocChunk: 258675172 kB
APP Process Summary
Process Status (From: /proc/PID/status)
Name: xcrash.sample
State: R (running)
Tgid: 21356
Pid: 21356
PPid: 626
TracerPid: 0
Uid: 10180 10180 10180 10180
Gid: 10180 10180 10180 10180
Ngid: 0
FDSize: 128
Groups: 9997 20180 50180
VmPeak: 5659228 kB
VmSize: 5542192 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 94624 kB
VmRSS: 94396 kB
VmData: 5051840 kB
VmStk: 8192 kB
VmExe: 28 kB
VmLib: 166580 kB
VmPTE: 1068 kB
VmSwap: 6476 kB
Threads: 37
SigQ: 0/13891
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000080001200
SigIgn: 0000000000000001
SigCgt: 0000000e400084fc
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: 0000000000000000
CapAmb: 0000000000000000
Seccomp: 2
Cpus_allowed: f
Cpus_allowed_list: 0-3
Mems_allowed: 1
Mems_allowed_list: 0
voluntary_ctxt_switches: 343
nonvoluntary_ctxt_switches: 301
APP Process Limits
Process Limits (From: /proc/PID/limits)
Limit Soft Limit Hard Limit Units
Max cpu time unlimited unlimited seconds
Max file size unlimited unlimited bytes
Max data size unlimited unlimited bytes
Max stack size 8388608 unlimited bytes
Max core file size 0 unlimited bytes
Max resident set unlimited unlimited bytes
Max processes 13891 13891 processes
Max open files 32768 32768 files
Max locked memory 65536 65536 bytes
Max address space unlimited unlimited bytes
Max file locks unlimited unlimited locks
Max pending signals 13891 13891 signals
Max msgqueue size 819200 819200 bytes
Max nice priority 40 40
Max realtime priority 0 0
Max realtime timeout unlimited unlimited us
APP Memory Summary
Process Summary (From: android.os.Debug.MemoryInfo)
Pss(KB)
------
Java Heap: 7632
Native Heap: 10932
Code: 19064
Stack: 56
Graphics: 1104
Private Other: 3448
System: 4414
TOTAL: 46650 TOTAL SWAP: 6460
Other StackTraces
private String getOtherThreadsInfo(Thread crashedThread) {
// 其他线程可能有很多,所以有“白名单”机制
int thdMatchedRegex = 0;
int thdIgnoredByLimit = 0;
int thdDumped = 0;
//build whitelist regex list
ArrayList<Pattern> whiteList = null;
if (dumpAllThreadsWhiteList != null) {
whiteList = new ArrayList<Pattern>();
for (String s : dumpAllThreadsWhiteList) {
try {
whiteList.add(Pattern.compile(s));
} catch (Exception e) {
XCrash.getLogger().w(Util.TAG, "JavaCrashHandler pattern compile failed", e);
}
}
}
// dump trace
StringBuilder sb = new StringBuilder();
Map<Thread, StackTraceElement[]> map = Thread.getAllStackTraces();
for (Map.Entry<Thread, StackTraceElement[]> entry : map.entrySet()) {
Thread thd = entry.getKey();
StackTraceElement[] stacktrace = entry.getValue();
//skip the crashed thread
if (thd.getName().equals(crashedThread.getName())) continue;
//check regex for thread name
if (whiteList != null && !matchThreadName(whiteList, thd.getName())) continue;
thdMatchedRegex++;
//check dump count limit
if (dumpAllThreadsCountMax > 0 && thdDumped >= dumpAllThreadsCountMax) {
thdIgnoredByLimit++;
continue;
}
sb.append(Util.sepOtherThreads + "\n");
sb.append("pid: ").append(pid).append(", tid: ").append(thd.getId()).append(", name: ").append(thd.getName()).append(" >>> ").append(processName).append(" <<<\n");
sb.append("\n");
sb.append("java stacktrace:\n");
for (StackTraceElement element : stacktrace) {
sb.append(" at ").append(element.toString()).append("\n");
}
sb.append("\n");
thdDumped++;
}
// 统计
if (map.size() > 1) {
if (thdDumped == 0) {
sb.append(Util.sepOtherThreads + "\n");
}
sb.append("total JVM threads (exclude the crashed thread): ").append(map.size() - 1).append("\n");
if (whiteList != null) {
sb.append("JVM threads matched whitelist: ").append(thdMatchedRegex).append("\n");
}
if (dumpAllThreadsCountMax > 0) {
sb.append("JVM threads ignored by max count limit: ").append(thdIgnoredByLimit).append("\n");
}
sb.append("dumped JVM threads:").append(thdDumped).append("\n");
sb.append(Util.sepOtherThreadsEnding + "\n");
}
return sb.toString();
}
输出如下:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4364, name: RenderThread >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4349, name: Jit thread pool worker thread 0 >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4357, name: Binder:21356_2 >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4374, name: NetworkService >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4353, name: ReferenceQueueDaemon >>> xcrash.sample <<<
java stacktrace:
at java.lang.Object.wait(Native Method)
at java.lang.Object.wait(Object.java:442)
at java.lang.Object.wait(Object.java:568)
at java.lang.Daemons$ReferenceQueueDaemon.runInternal(Daemons.java:215)
at java.lang.Daemons$Daemon.run(Daemons.java:137)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4359, name: Profile Saver >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4371, name: GoogleApiHandler >>> xcrash.sample <<<
java stacktrace:
at android.os.MessageQueue.nativePollOnce(Native Method)
at android.os.MessageQueue.next(MessageQueue.java:336)
at android.os.Looper.loop(Looper.java:174)
at android.os.HandlerThread.run(HandlerThread.java:67)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4362, name: xcrash_trace_dp >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4361, name: xcrash_crash_cb >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4352, name: HeapTaskDaemon >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4354, name: FinalizerDaemon >>> xcrash.sample <<<
java stacktrace:
at java.lang.Object.wait(Native Method)
at java.lang.Object.wait(Object.java:442)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:190)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:211)
at java.lang.Daemons$FinalizerDaemon.runInternal(Daemons.java:271)
at java.lang.Daemons$Daemon.run(Daemons.java:137)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4372, name: Chrome_IOThread >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4370, name: CrAsyncTask #2 >>> xcrash.sample <<<
java stacktrace:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:230)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2109)
at java.util.concurrent.ArrayBlockingQueue.poll(ArrayBlockingQueue.java:402)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1091)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1152)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:641)
at Js.run(PG:2)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4375, name: PlatformServiceBridgeHandlerThread >>> xcrash.sample <<<
java stacktrace:
at android.os.MessageQueue.nativePollOnce(Native Method)
at android.os.MessageQueue.next(MessageQueue.java:336)
at android.os.Looper.loop(Looper.java:174)
at android.os.HandlerThread.run(HandlerThread.java:67)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4355, name: FinalizerWatchdogDaemon >>> xcrash.sample <<<
java stacktrace:
at java.lang.Thread.sleep(Native Method)
at java.lang.Thread.sleep(Thread.java:440)
at java.lang.Thread.sleep(Thread.java:356)
at java.lang.Daemons$FinalizerWatchdogDaemon.sleepForMillis(Daemons.java:383)
at java.lang.Daemons$FinalizerWatchdogDaemon.waitForFinalization(Daemons.java:411)
at java.lang.Daemons$FinalizerWatchdogDaemon.runInternal(Daemons.java:323)
at java.lang.Daemons$Daemon.run(Daemons.java:137)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4378, name: process reaper >>> xcrash.sample <<<
java stacktrace:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:230)
at java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:461)
at java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362)
at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:937)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1091)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1152)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:641)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4377, name: CleanupReference >>> xcrash.sample <<<
java stacktrace:
at java.lang.Object.wait(Native Method)
at java.lang.Object.wait(Object.java:442)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:190)
at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:211)
at Po.run(PG:2)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4350, name: Signal Catcher >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4373, name: ThreadPoolForeg >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4369, name: Chrome_ProcessLauncherThread >>> xcrash.sample <<<
java stacktrace:
at android.os.MessageQueue.nativePollOnce(Native Method)
at android.os.MessageQueue.next(MessageQueue.java:336)
at android.os.Looper.loop(Looper.java:174)
at android.os.HandlerThread.run(HandlerThread.java:67)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4358, name: Binder:21356_3 >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4366, name: CrAsyncTask #1 >>> xcrash.sample <<<
java stacktrace:
at sun.misc.Unsafe.park(Native Method)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:230)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2109)
at java.util.concurrent.ArrayBlockingQueue.poll(ArrayBlockingQueue.java:402)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1091)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1152)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:641)
at Js.run(PG:2)
at java.lang.Thread.run(Thread.java:919)
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4376, name: ThreadPoolForeg >>> xcrash.sample <<<
java stacktrace:
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
pid: 21356, tid: 4356, name: Binder:21356_1 >>> xcrash.sample <<<
java stacktrace:
total JVM threads (exclude the crashed thread): 24
dumped JVM threads:24
Native Crash
捕获 Native Crash 靠的是信号处理器(sigaction
),比如说访问非法地址时,APP 进程会收到 SIGSEGV
,对应的信号处理器就可以在这个时间点收集堆栈信息
sigaction
signal 产生后会处于几种状态中:
1. blocked,让内核先持有信号不要分发(deliver),在 unblocked 之前都不会被分发出去;被 blocked 的信号集合叫做 singal mask,每个线程都有自己的 signal mask
2. pending,内核正在分发信号给指定的进程/线程(但还没分发出去)
signal 可以是进程范围的,比如内核产生的信号、kill 和 sigqueue;也可以是线程范围的,比如因执行机器指令而导致的硬件异常(SIGSEGV、SIGFPE)、通过 tgkill 或者 pthread_kill 指定目标线程
进程范围的信号会随机选择一个 signal unblocked 的线程来消费(deliver)
sigaction
用来注册信号处理器,是升级版的 signal
// 如果 act != null,它被注册为新的信号处理器;如果 oldact != null,上一个信号处理器将被保存在此
int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact);
struct sigaction {
void (*sa_handler)(int); // 只收到 signal 作为参数的处理器
void (*sa_sigaction)(int, siginfo_t *, void *); // 当指定 SA_SIGINFO 时,替代 sa_handler 作为处理器(能收到三个参数)
sigset_t sa_mask; // 处理器运行时,暂时屏蔽指定信号(将它们加到线程的 signal mask)
int sa_flags;
void (*sa_restorer)(void); // not for app
};
// sa_flags:
// SA_SIGINFO 使用 sa_sigaction 作为处理器
// SA_RESTART 当线程阻塞在系统调用/库函数上,因为信号的到来转而进入信号处理器,退出信号处理器后如何恢复上一个系统调用/库函数;
// 默认是使其返回失败码,此 flag 指示重新执行系统调用/库函数
// SA_ONSTACK 用另一个方法调用栈来执行处理器函数
void sa_sigaction(int sig, siginfo_t *info, void *ucontext)
siginfo_t {
int si_signo; /* Signal number */
int si_errno; /* An errno value */
int si_code; /* Signal code */
int si_trapno; /* Trap number that caused
hardware-generated signal
(unused on most architectures) */
pid_t si_pid; /* Sending process ID */
uid_t si_uid; /* Real user ID of sending process */
int si_status; /* Exit value or signal */
clock_t si_utime; /* User time consumed */
clock_t si_stime; /* System time consumed */
sigval_t si_value; /* Signal value */
int si_int; /* POSIX.1b signal */
void *si_ptr; /* POSIX.1b signal */
int si_overrun; /* Timer overrun count;
POSIX.1b timers */
int si_timerid; /* Timer ID; POSIX.1b timers */
void *si_addr; /* Memory location which caused fault */
long si_band; /* Band event (was int in
glibc 2.3.2 and earlier) */
int si_fd; /* File descriptor */
short si_addr_lsb; /* Least significant bit of address
(since Linux 2.6.32) */
void *si_lower; /* Lower bound when address violation
occurred (since Linux 3.19) */
void *si_upper; /* Upper bound when address violation
occurred (since Linux 3.19) */
int si_pkey; /* Protection key on PTE that caused
fault (since Linux 4.6) */
void *si_call_addr; /* Address of system call instruction
(since Linux 3.5) */
int si_syscall; /* Number of attempted system call
(since Linux 3.5) */
unsigned int si_arch; /* Architecture of attempted system call
(since Linux 3.5) */
}
sigaltstack
/**
* 为当前进程设置一个新的(获取上一个)信号处理器调用栈,其实就是为信号处理器预先分配一块内存,作为其调用栈
* The most common usage of an alternate signal stack is to handle the SIGSEGV signal
* that is generated if the space available for the normal process stack is exhausted:
* in this case, a signal handler for SIGSEGV cannot be invoked on the process stack;
* if we wish to handle it, we must use an alternate signal stack
*/
int sigaltstack(const stack_t *ss, stack_t *old_ss);
typedef struct {
void *ss_sp; /* Base address of stack */
int ss_flags; /* Flags */
size_t ss_size; /* Number of bytes in stack */
} stack_t;
xc_crash_signal_handler
// XCrash.init
// NativeHandler.initialize
// NativeHandler.nativeInit
// xc_jni_init
// xc_crash_init
// 需要捕获的信号
static xcc_signal_crash_info_t xcc_signal_crash_info[] =
{
{.signum = SIGABRT},
{.signum = SIGBUS},
{.signum = SIGFPE},
{.signum = SIGILL},
{.signum = SIGSEGV},
{.signum = SIGTRAP},
{.signum = SIGSYS},
{.signum = SIGSTKFLT}
};
int xcc_signal_crash_register(void (*handler)(int, siginfo_t *, void *))
{
// 预先为处理器分配一块内存
stack_t ss;
if(NULL == (ss.ss_sp = calloc(1, XCC_SIGNAL_CRASH_STACK_SIZE))) return XCC_ERRNO_NOMEM;
ss.ss_size = XCC_SIGNAL_CRASH_STACK_SIZE;
ss.ss_flags = 0;
if(0 != sigaltstack(&ss, NULL)) return XCC_ERRNO_SYS;
struct sigaction act;
memset(&act, 0, sizeof(act));
sigfillset(&act.sa_mask);
act.sa_sigaction = handler;
act.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
// 为上述信号注册处理器
size_t i;
for(i = 0; i < sizeof(xcc_signal_crash_info) / sizeof(xcc_signal_crash_info[0]); i++)
if(0 != sigaction(xcc_signal_crash_info[i].signum, &act, &(xcc_signal_crash_info[i].oldact)))
return XCC_ERRNO_SYS;
return 0;
}
// 信号处理器,跟上面的 JavaCrashHandler 一样,主要是收集各种信息,写入 tombstone 文件
// 比较复杂,下一章节进行分析
static void xc_crash_signal_handler(int sig, siginfo_t *si, void *uc)
核心步骤
- 信号处理器(
xc_crash_signal_handler
,在 APP 进程)收集相关的信息到xc_crash_spot
fork
出子进程 dumper,子进程继承了父进程的内存布局,也就捕获到了 APP 进程 crash 时刻的内存布局- dumper 进程的入口点是
xc_crash_exec_dumper
,signal handler 线程通过waitpid
阻塞直到 dumper 进程完成工作 - dumper 将 signal 和调用堆栈等信息写入管道,然后加载程序
libxcrash_dumper.so
替换当前的内存空间(旧的内存空间的所有信息将被清空) xcd_core.c
里的 main 函数从管道里读取xc_crash_spot
并写入 tombstone 日志文件,退出- signal handler 线程从阻塞中恢复,退出 APP 进程
// APP 进程,signal hander 线程,dump 开始的地方
static void xc_crash_signal_handler(int sig, siginfo_t *si, void *uc)
{
// set crash spot info
xc_crash_spot.crash_time = xc_crash_time;
xc_crash_spot.crash_tid = xc_crash_tid;
memcpy(&(xc_crash_spot.siginfo), si, sizeof(siginfo_t));
memcpy(&(xc_crash_spot.ucontext), uc, sizeof(ucontext_t));
xc_crash_spot.log_pathname_len = strlen(xc_crash_log_pathname);
// spawn crash dumper process
pid_t dumper_pid = xc_crash_fork(xc_crash_exec_dumper);
// wait the crash dumper process terminated
int wait_r = XCC_UTIL_TEMP_FAILURE_RETRY(waitpid(dumper_pid, &status, __WALL));
// exit
}
// dumper 进程的入口
static int xc_crash_exec_dumper(void *arg)
{
// 创建一个管道,第一个用来读,第二个用来写
int pipefd[2];
if(0 != pipe2(pipefd, O_CLOEXEC))
// 将 xc_crash_spot 写入管道
struct iovec iovs[12] = {
{.iov_base = &xc_crash_spot, .iov_len = sizeof(xcc_spot_t)},
{.iov_base = xc_crash_log_pathname, .iov_len = xc_crash_spot.log_pathname_len},
{.iov_base = xc_common_os_version, .iov_len = xc_crash_spot.os_version_len},
{.iov_base = xc_common_kernel_version, .iov_len = xc_crash_spot.kernel_version_len},
{.iov_base = xc_common_abi_list, .iov_len = xc_crash_spot.abi_list_len},
{.iov_base = xc_common_manufacturer, .iov_len = xc_crash_spot.manufacturer_len},
{.iov_base = xc_common_brand, .iov_len = xc_crash_spot.brand_len},
{.iov_base = xc_common_model, .iov_len = xc_crash_spot.model_len},
{.iov_base = xc_common_build_fingerprint, .iov_len = xc_crash_spot.build_fingerprint_len},
{.iov_base = xc_common_app_id, .iov_len = xc_crash_spot.app_id_len},
{.iov_base = xc_common_app_version, .iov_len = xc_crash_spot.app_version_len},
{.iov_base = xc_crash_dump_all_threads_whitelist, .iov_len = xc_crash_spot.dump_all_threads_whitelist_len}
};
int iovs_cnt = (0 == xc_crash_spot.dump_all_threads_whitelist_len ? 11 : 12);
ssize_t ret = XCC_UTIL_TEMP_FAILURE_RETRY(writev(pipefd[1], iovs, iovs_cnt));
// 将 stdin (fd 0) 指向管道的读端口
XCC_UTIL_TEMP_FAILURE_RETRY(dup2(pipefd[0], STDIN_FILENO));
syscall(SYS_close, pipefd[0]);
syscall(SYS_close, pipefd[1]);
// 加载程序 libxcrash_dumper.so 替换当前的内存空间
execl(xc_crash_dumper_pathname, XCC_UTIL_XCRASH_DUMPER_FILENAME, NULL);
return 100 + errno;
}
// libxcrash_dumper.so 的入口点,在 xcd_core.c
int main(int argc, char** argv)
{
// 从 stdin 读取 xc_crash_spot
if(0 != xcd_core_read_args()) exit(1);
//open log file
if(0 > (xcd_core_log_fd = XCC_UTIL_TEMP_FAILURE_RETRY(open(xcd_core_log_pathname, O_WRONLY | O_CLOEXEC)))) exit(2);
//create process object
if(0 != xcd_process_create(&xcd_core_proc,
xcd_core_spot.crash_pid,
xcd_core_spot.crash_tid,
&(xcd_core_spot.siginfo),
&(xcd_core_spot.ucontext))) exit(3);
//suspend all threads in the process
xcd_process_suspend_threads(xcd_core_proc);
//load process info
if(0 != xcd_process_load_info(xcd_core_proc)) exit(4);
//record system info
if(0 != xcd_sys_record(xcd_core_log_fd,
xcd_core_spot.time_zone,
xcd_core_spot.start_time,
xcd_core_spot.crash_time,
xcd_core_app_id,
xcd_core_app_version,
xcd_core_spot.api_level,
xcd_core_os_version,
xcd_core_kernel_version,
xcd_core_abi_list,
xcd_core_manufacturer,
xcd_core_brand,
xcd_core_model,
xcd_core_build_fingerprint)) exit(5);
//record process info
if(0 != xcd_process_record(xcd_core_proc,
xcd_core_log_fd,
xcd_core_spot.logcat_system_lines,
xcd_core_spot.logcat_events_lines,
xcd_core_spot.logcat_main_lines,
xcd_core_spot.dump_elf_hash,
xcd_core_spot.dump_map,
xcd_core_spot.dump_fds,
xcd_core_spot.dump_network_info,
xcd_core_spot.dump_all_threads,
xcd_core_spot.dump_all_threads_count_max,
xcd_core_dump_all_threads_whitelist,
xcd_core_spot.api_level)) exit(6);
//resume all threads in the process
xcd_process_resume_threads(xcd_core_proc);
// exit
}
Signal Info
打印导致 Native Crash 的 Signal 的基本信息,可以从 sigaction
的信号处理器(xc_crash_signal_handler
)的参数列表里拿到(siginfo_t
)
- 信号码(
siginfo_t.si_signo
),比如:SIGKILL(9)、SIGSEGV(11),更多参考man signal.7
- 信号错误码(
siginfo_t.si_code
),描述此信号的更详细的信息,比如对于 SIGSEGV 有以下错误码:- SEGV_MAPERR Address not mapped to object.
- SEGV_ACCERR Invalid permissions for mapped object.
- SEGV_BNDERR (since Linux 3.19) Failed address bound checks.
- SEGV_PKUERR (since Linux 4.6) Access was denied by memory protection keys. See pkeys(7). The protection key which applied to this access is available via si_pkey.
- SIGSEGV 会将错误地址写入
siginfo_t.si_addr
- 有关
siginfo_t
的更详细信息请参考man sigaction.2
static int xcd_process_record_signal_info(xcd_process_t *self, int log_fd)
{
//fault addr
char addr_desc[64];
if(xcc_util_signal_has_si_addr(self->si))
{
void *addr = self->si->si_addr;
if(self->si->si_signo == SIGILL)
{
uint32_t instruction = 0;
xcd_util_ptrace_read(self->pid, (uintptr_t)addr, &instruction, sizeof(instruction));
snprintf(addr_desc, sizeof(addr_desc), "%p (*pc=%#08x)", addr, instruction);
}
else
{
snprintf(addr_desc, sizeof(addr_desc), "%p", addr);
}
}
else
{
snprintf(addr_desc, sizeof(addr_desc), "--------");
}
//from
char sender_desc[64] = "";
if(xcc_util_signal_has_sender(self->si, self->pid))
{
snprintf(sender_desc, sizeof(sender_desc), " from pid %d, uid %d", self->si->si_pid, self->si->si_uid);
}
return xcc_util_write_format(log_fd, "signal %d (%s), code %d (%s%s), fault addr %s\n",
self->si->si_signo, xcc_util_get_signame(self->si),
self->si->si_code, xcc_util_get_sigcodename(self->si),
sender_desc, addr_desc);
}
输出如下:
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0x0
Registers Info
打印寄存器(Register
)的值,下面是 ARM64 的例子,它有 30 个通用寄存器
寄存器 | 作用 |
---|---|
x0 | 一般表示返回值 |
x1 ~ x7 | 般是函数的参数,大于 8 个的会通过堆传参 |
lr | 链接寄存器,存放着函数的返回地址 |
sp | 堆栈顶寄存器,用于指向每个函数调用栈的栈顶 |
pc | 表示当前执行的指令的地址 |
x0 0000000000000003 x1 0000000000000000 x2 000000751128fd60 x3 0000007511290020
x4 000000751128fd60 x5 00000075a26c1708 x6 000000751128fd50 x7 00000075200a59dc
x8 0000000000000000 x9 79fc7e30c0ff4d9e x10 00000000000003e8 x11 0000000000000000
x12 0000000000004100 x13 0000000000000001 x14 0000000000080100 x15 0000000000000000
x16 00000074b9be4d20 x17 00000074b9bcc86c x18 00000075a57fa000 x19 00000075a4f52000
x20 0000000000000000 x21 00000075a4f52000 x22 0000007fe0ef23a0 x23 00000074bb1b62fe
x24 0000000000000004 x25 00000075a5107020 x26 00000075a4f520b0 x27 0000000000000001
x28 0000007fe0ef2130 x29 0000007fe0ef2090
sp 0000007fe0ef2070 lr 00000074b9bcc8cc pc 00000074b9bcc884
通过 ptrace
,一个线程(tracer
)可以观察并控制另一线程(tracee
)的执行、读取/修改它的内存和寄存器,比如单步调试(debugger);tracer 和 tracee 都是线程,而不是进程(虽然 ptrace 的参数里写的是 pid);它的一般用法是这样的:
PTRACE_ATTACH
:使当前线程成为 tracer,pid 指定的线程成为 traceewaitpid
:PTRACE_ATTACH 发送 SIGSTOP 给 tracee 但它不一定立刻 stop,所以需要 tracer 等待 traceePTRACE_PEEKDATA
(读内存)、PTRACE_POKEDATA
(写内存)、PTRACE_GETREGS
(读寄存器)、PTRACE_SETREGS
(写寄存器)…PTRACE_DETACH
long ptrace(enum __ptrace_request request, pid_t pid, void *addr, void *data);
// 通过 ptrace 获取寄存器的值并保存到 xcd_thread_t.regs
void xcd_thread_load_regs(xcd_thread_t *self)
{
uintptr_t regs[64]; //big enough for all architectures
size_t regs_len;
if(0 != ptrace(PTRACE_GETREGS, self->tid, NULL, ®s))
{
XCD_LOG_ERROR("THREAD: ptrace GETREGS failed, errno=%d", errno);
self->status = XCD_THREAD_STATUS_REGS;
return;
}
regs_len = XCD_REGS_USER_NUM;
xcd_regs_load_from_ptregs(&(self->regs), regs, regs_len);
}
void xcd_regs_load_from_ptregs(xcd_regs_t *self, uintptr_t *regs, size_t regs_len)
{
if(regs_len > XCD_REGS_USER_NUM) regs_len = XCD_REGS_USER_NUM;
memcpy(&(self->r), regs, sizeof(uintptr_t) * regs_len);
}
// 打印寄存器的值
int xcd_regs_record(xcd_regs_t *self, int log_fd)
{
return xcc_util_write_format(log_fd,
" x0 %016lx x1 %016lx x2 %016lx x3 %016lx\n"
" x4 %016lx x5 %016lx x6 %016lx x7 %016lx\n"
" x8 %016lx x9 %016lx x10 %016lx x11 %016lx\n"
" x12 %016lx x13 %016lx x14 %016lx x15 %016lx\n"
" x16 %016lx x17 %016lx x18 %016lx x19 %016lx\n"
" x20 %016lx x21 %016lx x22 %016lx x23 %016lx\n"
" x24 %016lx x25 %016lx x26 %016lx x27 %016lx\n"
" x28 %016lx x29 %016lx\n"
" sp %016lx lr %016lx pc %016lx\n\n",
self->r[XCD_REGS_X0], self->r[XCD_REGS_X1], self->r[XCD_REGS_X2], self->r[XCD_REGS_X3],
self->r[XCD_REGS_X4], self->r[XCD_REGS_X5], self->r[XCD_REGS_X6], self->r[XCD_REGS_X7],
self->r[XCD_REGS_X8], self->r[XCD_REGS_X9], self->r[XCD_REGS_X10], self->r[XCD_REGS_X11],
self->r[XCD_REGS_X12], self->r[XCD_REGS_X13], self->r[XCD_REGS_X14], self->r[XCD_REGS_X15],
self->r[XCD_REGS_X16], self->r[XCD_REGS_X17], self->r[XCD_REGS_X18], self->r[XCD_REGS_X19],
self->r[XCD_REGS_X20], self->r[XCD_REGS_X21], self->r[XCD_REGS_X22], self->r[XCD_REGS_X23],
self->r[XCD_REGS_X24], self->r[XCD_REGS_X25], self->r[XCD_REGS_X26], self->r[XCD_REGS_X27],
self->r[XCD_REGS_X28], self->r[XCD_REGS_X29],
self->r[XCD_REGS_SP], self->r[XCD_REGS_LR], self->r[XCD_REGS_PC]);
}
Backtrace
/proc/pid/maps
/proc/pid/maps
包含了进程所有的内存映射(mmap
)信息,后续的步骤需要用它来查找 函数名 及其所在的 文件路径,它的内容大概是这样的(参考 man mmap.2
& man proc.5
):
列 | 描述 |
---|---|
address | 内存映射所在的进程的虚拟地址空间(开始地址 - 结束地址) |
perms | 这块内存的读写权限:r = read,w = write,x = execute,s = shared,p = private (copy on write) |
offset | 映射至内存的文件(或者其他东西)的起始偏移 |
dev | 文件所在的设备(major:minor) |
inode | 文件的 inode |
path | 文件的路径 |
address perms offset dev inode pathname
12c00000-32c00000 rw-p 00000000 00:00 0 [anon:dalvik-main space (region space)]
70fb9000-71248000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot.art]
71248000-712a3000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot-core-libart.art]
712a3000-7136f000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot-core-icu4j.art]
7136f000-713a6000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot-okhttp.art]
713a6000-713ea000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot-bouncycastle.art]
713ea000-713f9000 rw-p 00000000 00:00 0 [anon:dalvik-/apex/com.android.art/javalib/boot-apache-xml.art]
713f9000-71479000 r--p 00000000 fc:00 150 /apex/com.android.art/javalib/arm64/boot.oat
...
7e1f72d000-7e1f778000 r--s 00000000 fc:00 2605 /system/fonts/Roboto-Medium.ttf
7e1f778000-7e1f779000 r--p 00000000 fc:00 6422 /system/system_ext/lib64/libqti-at.so
7e1f779000-7e1f77a000 r-xp 00001000 fc:00 6422 /system/system_ext/lib64/libqti-at.so
7e1f77a000-7e1f77b000 r--p 00002000 fc:00 6422 /system/system_ext/lib64/libqti-at.so
7e1f7a0000-7e1f7ab000 r--p 00000000 fc:00 6522 /system/system_ext/lib64/[email protected]
7e1f7ab000-7e1f7b5000 r-xp 0000b000 fc:00 6522 /system/system_ext/lib64/[email protected]
7e1f7b5000-7e1f7b7000 r--p 00015000 fc:00 6522 /system/system_ext/lib64/[email protected]
7e1f7b7000-7e1f7b8000 rw-p 00016000 fc:00 6522 /system/system_ext/lib64/[email protected]
7e1f86b000-7e1fb0e000 r--p 00000000 103:0f 5128711 /data/data/xcrash.sample/code_cache/.overlay/base.apk/classes.dex
7e1fb0e000-7e1fbd4000 r-xp 00000000 103:0f 5096166 /data/data/xcrash.sample/code_cache/startup_agents/e4ee8c59-agent.so
7e1fbd4000-7e1fbe3000 ---p 00000000 00:00 0
7e1fbe3000-7e1fbec000 rw-p 000c5000 103:0f 5096166 /data/data/xcrash.sample/code_cache/startup_agents/e4ee8c59-agent.so
...
7e24946000-7e24987000 r--s 0001d000 103:0f 180482 /data/app/~~jln4G3nGOa7-pv4aJFN6jg==/xcrash.sample-icj_DCtDvU5ZX6MZSDcn4Q==/base.apk
...
7e347f9000-7e34800000 r--s 001be000 103:0f 180482 /data/app/~~jln4G3nGOa7-pv4aJFN6jg==/xcrash.sample-icj_DCtDvU5ZX6MZSDcn4Q==/base.apk
...
ELF
Linux 下的可执行文件(executable
)和共享库文件(so - Shared Object
)都是 ELF 格式(Executable and Linking Format
)
ELF Header
里的 e_type
指明这是一个什么类型的文件:
e_type | desc |
---|---|
ET_NONE | An unknown type |
ET_REL | A relocatable file |
ET_EXEC | An executable file |
ET_DYN | A shared object |
ET_CORE | A core file |
ELF 里有很多 Section
,每个 Section 都是一段连续的地址保存了相同类型的数据,具体到哪个 Section 在哪里有多大定义在 Section Header
里,它的一些重要成员属性有:
Fields | Desc |
---|---|
sh_name | name of the section. Its value is an index into the string section |
sh_type | SHT_SYMTAB(符号表)、SHT_STRTAB(字符串表)等等 |
sh_offset | Section 在文件里的位置 |
sh_size | Section 的大小 |
所有的 Section Header 组成一个数组 Section Header Table
,它的位置和大小则是在 ELF Header 里定义的:
Fields | desc |
---|---|
e_shoff | SHT 所在的位置 |
e_shsize | SHT 的大小 |
e_shentsize | 每个 Section Header 的大小 |
e_shnum | Section Header 的数量 |
e_shstrndx | Name Section 在 SHT 的索引(所谓的 Name Section 其实就是专门保存字符串的 Section,类似于 dex 里的字符串池) |
Symbol Table
是一个很重要的 Section,它的结构如下:
Fields | Desc |
---|---|
st_name | symbol name (index of string section) |
st_value | symbol value |
st_size | This member holds zero if the symbol has no size or an unknown size |
st_info | type and binding attributes STT_FUNC (a function or other executable code) STT_OBJECT (data object) STB_LOCAL (Local symbols are not visible outside the object file) STB_GLOBAL (Global symbols are visible to all object files being combined)… |
st_other | symbol visibility STV_DEFAULT (Global and weak symbols are available to other modules; references in the local module can be interposed by definitions in other modules) STV_HIDDEN (Symbol is unavailable to other modules) STV_PROTECTED (Symbol is available to other modules) |
可以用 readelf
命令查看 ELF 文件的结构,以 /apex/com.android.art/lib64/libart.so
为例,readelf -S -W libart.so
输出 Section Header Table
There are 28 section headers, starting at offset 0x7d8af8:
Section Headers:
[Nr] Name Type Address Off Size ES Flg Lk Inf Al
[ 0] NULL 0000000000000000 000000 000000 00 0 0 0
[ 1] .note.android.ident NOTE 0000000000000270 000270 000018 00 A 0 0 4
[ 2] .note.gnu.build-id NOTE 0000000000000288 000288 000020 00 A 0 0 4
[ 3] .dynsym DYNSYM 00000000000002a8 0002a8 021d50 18 A 7 1 8
[ 4] .gnu.version VERSYM 0000000000021ff8 021ff8 002d1c 02 A 3 0 2
[ 5] .gnu.version_r VERNEED 0000000000024d14 024d14 000100 00 A 7 7 4
[ 6] .gnu.hash GNU_HASH 0000000000024e18 024e18 0085d4 00 A 3 0 8
[ 7] .dynstr STRTAB 000000000002d3ec 02d3ec 05c8eb 00 A 0 0 1
[ 8] .rela.dyn LOOS+0x2 0000000000089cd8 089cd8 0005f9 01 A 3 0 8
[ 9] .relr.dyn LOOS+0xfffff00 000000000008a2d8 08a2d8 000490 08 A 0 0 8
[10] .rela.plt RELA 000000000008a768 08a768 002fb8 18 A 3 21 8
[11] .rodata PROGBITS 000000000008d720 08d720 03cec6 00 AMS 0 0 16
[12] .eh_frame_hdr PROGBITS 00000000000ca5e8 0ca5e8 01083c 00 A 0 0 4
[13] .eh_frame PROGBITS 00000000000dae28 0dae28 04de74 00 A 0 0 8
[14] .text PROGBITS 0000000000129000 129000 51c160 00 AX 0 0 512
[15] .plt PROGBITS 0000000000645160 645160 001ff0 00 AX 0 0 16
[16] .data.rel.ro PROGBITS 0000000000648000 648000 00e808 00 WA 0 0 8
[17] .fini_array FINI_ARRAY 0000000000656808 656808 000010 00 WA 0 0 8
[18] .init_array INIT_ARRAY 0000000000656818 656818 000060 00 WA 0 0 8
[19] .dynamic DYNAMIC 0000000000656878 656878 0002b0 10 WA 7 0 8
[20] .got PROGBITS 0000000000656b28 656b28 000978 00 WA 0 0 8
[21] .got.plt PROGBITS 00000000006574a0 6574a0 001000 00 WA 0 0 8
[22] .data PROGBITS 00000000006594a0 6584a0 002879 00 WA 0 0 8
[23] .bss NOBITS 000000000065bd20 65ad19 002c90 00 WA 0 0 8
[24] .comment PROGBITS 0000000000000000 65ad19 00016b 01 MS 0 0 1
[25] .symtab SYMTAB 0000000000000000 65ae88 08d678 18 27 18360 8
[26] .shstrtab STRTAB 0000000000000000 6e8500 00010c 00 0 0 1
[27] .strtab STRTAB 0000000000000000 6e860c 0f04e7 00 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
L (link order), O (extra OS processing required), G (group), T (TLS),
C (compressed), x (unknown), o (OS specific), E (exclude),
p (processor specific)
readelf -p .dynstr libart.so
输出字符串表:
String dump of section '.dynstr':
[ 1] __cxa_atexit
[ e] __cxa_finalize
[ 1d] _ZN3art14AotClassLinkerC2EPNS_11InternTableE
[ 4a] _ZN3art11ClassLinkerC2EPNS_11InternTableEb
[ 75] _ZN3art14AotClassLinkerD2Ev
[ 91] _ZN3art11ClassLinkerD2Ev
[ aa] _ZN3art14AotClassLinkerD0Ev
[ c6] _ZdlPv
[ cd] _ZN3art14AotClassLinker13CanAllocClassEv
[ f6] _ZNK3art7Runtime19IsActiveTransactionEv
[ 11e] _ZN3art7Runtime34AbortTransactionAndThrowAbortErrorEPNS_6ThreadERKNSt3__112basic_stringIcNS3_11char_traitsIcEENS3_9allocatorIcEEEE
[ 1a1] _ZN3art14AotClassLinker15InitializeClassEPNS_6ThreadENS_6HandleINS_6mirror5ClassEEEbb
[ 1f7] _ZNK3art7Runtime29IsActiveStrictTransactionModeEv
[ 229] _ZN3art11ClassLinker15InitializeClassEPNS_6ThreadENS_6HandleINS_6mirror5ClassEEEbb
[ 27c] _ZNK3art2gc4Heap24ObjectIsInBootImageSpaceENS_6ObjPtrINS_6mirror6ObjectEEE
[ 2c7] _ZN3art6mirror6Object12PrettyTypeOfEv
[ 2ed] _ZN3art6mirror5Class16IsThrowableClassEv
[ 316] _ZN7android4base10LogMessageC1EPKcjNS0_11LogSeverityES3_i
readelf -s libart.so
输出符号表:
Symbol table '.dynsym' contains 5774 entries:
Num: Value Size Type Bind Vis Ndx Name
0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND
1: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __cxa_atexit@LIBC (2)
2: 0000000000000000 0 FUNC GLOBAL DEFAULT UND __cxa_finalize@LIBC (2)
3: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZdlPv
4: 0000000000000000 0 FUNC GLOBAL DEFAULT UND _ZN7android4base10LogMess
...
809: 0000000000132340 620 FUNC LOCAL HIDDEN 14 art_quick_invoke_stub
810: 00000000001325b0 640 FUNC LOCAL HIDDEN 14 art_quick_invoke_static_s
811: 000000000013ba10 292 FUNC LOCAL HIDDEN 14 art_quick_proxy_invoke_ha
812: 000000000013c100 256 FUNC LOCAL HIDDEN 14 art_quick_instrumentation
813: 000000000013be40 336 FUNC LOCAL HIDDEN 14 art_quick_generic_jni_tra
814: 000000000013bfa0 248 FUNC LOCAL HIDDEN 14 art_quick_to_interpreter_
815: 000000000013c210 480 FUNC LOCAL HIDDEN 14 art_quick_instrumentation
816: 000000000013bd00 304 FUNC LOCAL HIDDEN 14 art_quick_resolution_tram
817: 000000000013bb40 432 FUNC LOCAL HIDDEN 14 art_quick_imt_conflict_tr
818: 000000000013c0a0 80 FUNC LOCAL HIDDEN 14 art_invoke_obsolete_metho
819: 000000000013c400 164 FUNC LOCAL HIDDEN 14 art_quick_deoptimize
...
逻辑
通过 ptrace
可以拿到 PC 寄存器的值,它指向正在执行的代码的地址;拿 pc 去 /proc/pid/maps
里找,看 pc 落在哪块 mmap
上,从而得知这段代码在哪个 so 文件里;so 文件是 ELF 结构,解析出它里面的符号表及其偏移,pc - mmap.start 就是这段代码在这块 mmap 上的偏移,再加上 mmap.offset 内存映射的偏移就是这段代码在 so 文件里的偏移,从而得知这段代码在哪个符号/函数里(函数名)
但是怎么从 pc 回溯整个函数调用栈我还没有想明白
打印
打印调用栈,别看代码这么长,其实关键就是这么几个:
Fields | Desc |
---|---|
xcd_frame.rel_pc | 函数在它所在的内存映射的偏移 |
xcd_frame.map.name | 函数所在 so 文件路径 |
xcd_frame.func_name | 函数名 |
xcd_frame.func_offset | 函数与 pc 的偏移 |
int xcd_thread_record_backtrace(xcd_thread_t *self, int log_fd)
{
if(XCD_THREAD_STATUS_OK != self->status) return 0; //ignore
return xcd_frames_record_backtrace(self->frames, log_fd);
}
int xcd_frames_record_backtrace(xcd_frames_t *self, int log_fd)
{
xcd_frame_t *frame;
xcd_elf_t *elf;
char *name;
char name_buf[512];
char *name_embedded;
char *offset;
char offset_buf[64];
char *func;
char func_buf[512];
int r;
if(0 != (r = xcc_util_write_str(log_fd, "backtrace:\n"))) return r;
TAILQ_FOREACH(frame, &(self->frames), link)
{
//name
name = NULL;
if(NULL == frame->map)
{
name = "<unknown>";
}
else if(NULL == frame->map->name || '\0' == frame->map->name[0])
{
snprintf(name_buf, sizeof(name_buf), "<anonymous:%"XCC_UTIL_FMT_ADDR">", frame->map->start);
name = name_buf;
}
else
{
if(0 != frame->map->elf_start_offset)
{
elf = xcd_map_get_elf(frame->map, self->pid, (void *)self->maps);
if(NULL != elf)
{
name_embedded = xcd_elf_get_so_name(elf);
if(NULL != name_embedded && strlen(name_embedded) > 0)
{
snprintf(name_buf, sizeof(name_buf), "%s!%s", frame->map->name, name_embedded);
name = name_buf;
}
}
}
if(NULL == name) name = frame->map->name;
}
//offset
if(NULL != frame->map && 0 != frame->map->elf_start_offset)
{
snprintf(offset_buf, sizeof(offset_buf), " (offset 0x%"PRIxPTR")", frame->map->elf_start_offset);
offset = offset_buf;
}
else
{
offset = "";
}
//func
if(NULL != frame->func_name)
{
if(frame->func_offset > 0)
snprintf(func_buf, sizeof(func_buf), " (%s+%zu)", frame->func_name, frame->func_offset);
else
snprintf(func_buf, sizeof(func_buf), " (%s)", frame->func_name);
func = func_buf;
}
else
{
func = "";
}
if(0 != (r = xcc_util_write_format(log_fd, " #%02zu pc %0"XCC_UTIL_FMT_ADDR" %s%s%s\n",
frame->num, frame->rel_pc, name, offset, func))) return r;
}
if(0 != (r = xcc_util_write_str(log_fd, "\n"))) return r;
return 0;
}
输出如下:
backtrace:
#00 pc 000000000000b884 /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_call_4+24)
#01 pc 000000000000b8c8 /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_call_3+24)
#02 pc 000000000000b8f8 /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_call_2+24)
#03 pc 000000000000b920 /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_call_1+16)
#04 pc 000000000000b9b4 /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_crash+124)
#05 pc 000000000013f350 /apex/com.android.runtime/lib64/libart.so (art_quick_generic_jni_trampoline+144)
#06 pc 00000000001365b8 /apex/com.android.runtime/lib64/libart.so (art_quick_invoke_static_stub+568)
#07 pc 0000000000145084 /apex/com.android.runtime/lib64/libart.so (_ZN3art9ArtMethod6InvokeEPNS_6ThreadEPjjPNS_6JValueEPKc+276)
#08 pc 00000000002e3bc0 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter34ArtInterpreterToCompiledCodeBridgeEPNS_6ThreadEPNS_9ArtMethodEPNS_11ShadowFrameEtPNS_6JValueE+384)
#09 pc 00000000002deab8 /apex/com.android.runtime/lib64/libart.so (_ZN3art11interpreter6DoCallILb0ELb0EEEbPNS_9ArtMethodEPNS_6ThreadERNS_11ShadowFrameEPKNS_11InstructionEtPNS_6JValueE+928)
#10 pc 00000000005a4e3c /apex/com.android.runtime/lib64/libart.so (MterpInvokeStatic+368)
#11 pc 0000000000130994 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_static+20)
#12 pc 00000000005a2564 /apex/com.android.runtime/lib64/libart.so (MterpInvokeVirtual+1456)
#13 pc 0000000000130814 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_virtual+20)
#14 pc 00000000005a5154 /apex/com.android.runtime/lib64/libart.so (MterpInvokeStatic+1160)
#15 pc 0000000000130994 /apex/com.android.runtime/lib64/libart.so (mterp_op_invoke_static+20)
...
Stack (Per Frame)
这里打印的是上个章节 Backtrace
描述的函数调用栈里,每一帧(Frame
)对应的栈内存,sp 寄存器指向栈顶
stack:
0000007fe0ef1ff0 0000000be0ef2260
0000007fe0ef1ff8 00000075a5107020
0000007fe0ef2000 0000007fe0ef2001 [stack]
0000007fe0ef2008 0000007511197000
0000007fe0ef2010 00000000000fd000
0000007fe0ef2018 0000007511290018
0000007fe0ef2020 0000007511197000
0000007fe0ef2028 0000007511290018
0000007fe0ef2030 0000007f00000000
0000007fe0ef2038 0000007511197000
0000007fe0ef2040 00000000000f8d50
0000007fe0ef2048 0000000000001000
0000007fe0ef2050 0000000000000000
0000007fe0ef2058 0000000000000000
0000007fe0ef2060 00000075a4ff8000 [anon:libc_malloc]
0000007fe0ef2068 000000006f5df020 /system/framework/arm64/boot-framework.art
#00 0000007fe0ef2070 0000000000000000
0000007fe0ef2078 000000030000ddd5
#01 0000007fe0ef2080 0000007fe0ef2130 [stack]
0000007fe0ef2088 0000000200000001
0000007fe0ef2090 0000007fe0ef20b0 [stack]
0000007fe0ef2098 00000074b9bcc8fc /data/app/xcrash.sample-WeCpVYjROKKgYtuzbHflHg==/lib/arm64/libxcrash.so (xc_test_call_2+28)
#02 0000007fe0ef20a0 0000000000000004
0000007fe0ef20a8 0000000100000000
0000007fe0ef20b0 0000007fe0ef20d0 [stack]
Memory Near XX
打印所有寄存器地址附近的内存,寄存器的值可以通过 ptrace
拿到
memory near x2:
000000751128fd40 0000000000000000 0000000000000000 ................
000000751128fd50 000000751138cd50 0000000000000000 P.8.u...........
000000751128fd60 00005015000050c4 0000007f00000000 .P...P..........
000000751128fd70 0000007511197000 00000000000f8d50 .p..u...P.......
000000751128fd80 0000000000001000 0000000000000000 ................
000000751128fd90 0000000000000000 00000075a4ff8000 ............u...
000000751128fda0 0000000000000003 0000000000000000 ................
000000751128fdb0 00000074b9bcc9dc 0000000000000000 ....t...........
000000751128fdc0 0000000000000000 00000075a4e67000 .........p..u...
000000751128fdd0 00000074c5b3b000 0000000000000001 ....t...........
000000751128fde0 0000007511197000 00000000000fd000 .p..u...........
000000751128fdf0 0000000000000000 0000000000000000 ................
000000751128fe00 0000000000000000 0000000000000000 ................
000000751128fe10 0000000000000000 0000000000000000 ................
000000751128fe20 0000000000000000 0000000000000000 ................
000000751128fe30 0000000000000000 0000000000000000 ................
memory near x3:
0000007511290000 0000007511290060 0000000000000000 `.).u...........
0000007511290010 0000000000000000 0000007511290060 ........`.).u...
0000007511290020 00000075a2606c90 000000751128fd50 .l`.u...P.(.u...
0000007511290030 0000000000000000 0000000000000000 ................
0000007511290040 0000000000000000 79fc7e30c0ff4d9e .........M..0~.y
0000007511290050 0000000000000000 0000000000000000 ................
0000007511290060 0000000000000000 0000000000000000 ................
0000007511290070 0000000000000000 0000000000000000 ................
0000007511290080 0000000000000000 0000000000000000 ................
0000007511290090 0000000000000000 0000000000000000 ................
00000075112900a0 0000000000000000 0000000000000000 ................
00000075112900b0 0000000000000000 0000000000000000 ................
00000075112900c0 0000000000000000 0000000000000000 ................
00000075112900d0 0000000000000000 0000000000000000 ................
00000075112900e0 0000000000000000 0000000000000000 ................
00000075112900f0 0000000000000000 0000000000000000 ................
Memory Map
也就是 /proc/pid/maps
内存映射
memory map:
0000000012c00000-00000000133c0000 rw- 0 7c0000 [anon:dalvik-main space (region space)]
00000000133c0000-0000000013dc0000 --- 0 a00000 >
0000000013dc0000-0000000013f80000 --- 0 1c0000 >
0000000013f80000-0000000013fc0000 rw- 0 40000 >
0000000013fc0000-0000000014100000 --- 0 140000 >
0000000014100000-0000000014140000 rw- 0 40000 >
0000000014140000-0000000014200000 --- 0 c0000 >
0000000014200000-0000000014280000 --- 0 80000 >
0000000014280000-00000000163c0000 --- 0 2140000 >
00000000163c0000-0000000032c00000 rw- 0 1c840000 >
000000006f1a9000-000000006f430000 rw- 0 287000 /system/framework/arm64/boot.art
000000006f430000-000000006f51f000 rw- 0 ef000 /system/framework/arm64/boot-core-libart.art
000000006f51f000-000000006f555000 rw- 0 36000 /system/framework/arm64/boot-okhttp.art
000000006f555000-000000006f596000 rw- 0 41000 /system/framework/arm64/boot-bouncycastle.art
000000006f596000-000000006f5a6000 rw- 0 10000 /system/framework/arm64/boot-apache-xml.art
000000006f5a6000-000000006fe62000 rw- 0 8bc000 /system/framework/arm64/boot-framework.art
000000006fe62000-000000006fe95000 rw- 0 33000 /system/framework/arm64/boot-ext.art
000000006fe95000-000000006ff8c000 rw- 0 f7000 /system/framework/arm64/boot-telephony-common.art
000000006ff8c000-000000006ff9a000 rw- 0 e000 /system/framework/arm64/boot-voip-common.art
000000006ff9a000-000000006ffaf000 rw- 0 15000 /system/framework/arm64/boot-ims-common.art
000000006ffaf000-000000006ffb2000 rw- 0 3000 /system/framework/arm64/boot-android.test.base.art
000000006ffb2000-000000007006b000 r-- 0 b9000 /system/framework/arm64/boot.oat
000000007006b000-0000000070300000 r-x b9000 295000 >
0000000070300000-0000000070301000 rw- 0 1000 [anon:.bss]
0000000070301000-0000000070303000 r-- 0 2000 /system/framework/boot.vdex
0000000070303000-0000000070304000 r-- 34e000 1000 /system/framework/arm64/boot.oat
0000000070304000-0000000070305000 rw- 34f000 1000 >
0000000070305000-000000007034e000 r-- 0 49000 /system/framework/arm64/boot-core-libart.oat
000000007034e000-0000000070453000 r-x 49000 105000 >
0000000070453000-0000000070454000 rw- 0 1000 [anon:.bss]
0000000070454000-0000000070455000 r-- 0 1000 /system/framework/boot-core-libart.vdex
0000000070455000-0000000070456000 r-- 14e000 1000 /system/framework/arm64/boot-core-libart.oat
0000000070456000-0000000070457000 rw- 14f000 1000 >
0000000070457000-0000000070466000 r-- 0 f000 /system/framework/arm64/boot-okhttp.oat
ANR Trace
- 给主线程注册
SIGQUIT
的信号处理器xc_trace_handler
,当主线程收到 SIGQUIT 信号时,恢复xc_trace_dumper
(dumper 线程),也就是说发生 ANR 时主线程是被 SIGQUIT 中断的而不是 SIGKILL (?) - 启动 xc_trace_dumper(dumper 线程),挂起等待被主线程唤醒
- 在内存里找到生成 ANR 报告的函数符号:
_ZN3art7Runtime14DumpForSigQuitERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE
- 将
STDERR_FILENO
指向日志文件,调用 ANR 报告函数(它会把 ANR 日志写入STDERR_FILENO
),这样就捕获了 ANR 日志
// XCrash.init
// NativeHandler.initialize
// NativeHandler.nativeInit
// xc_jni_init
// 起 dumper 线程,注册 SIGQUIT 处理器
int xc_trace_init(JNIEnv *env,
int rethrow,
unsigned int logcat_system_lines,
unsigned int logcat_events_lines,
unsigned int logcat_main_lines,
int dump_fds,
int dump_network_info)
{
int r;
pthread_t thd;
//capture SIGQUIT only for ART
if(xc_common_api_level < 21) return 0;
//is Android Lollipop (5.x)?
xc_trace_is_lollipop = ((21 == xc_common_api_level || 22 == xc_common_api_level) ? 1 : 0);
xc_trace_dump_status = XC_TRACE_DUMP_NOT_START;
xc_trace_rethrow = rethrow;
xc_trace_logcat_system_lines = logcat_system_lines;
xc_trace_logcat_events_lines = logcat_events_lines;
xc_trace_logcat_main_lines = logcat_main_lines;
xc_trace_dump_fds = dump_fds;
xc_trace_dump_network_info = dump_network_info;
//init for JNI callback
xc_trace_init_callback(env);
//create event FD
if(0 > (xc_trace_notifier = eventfd(0, EFD_CLOEXEC))) return XCC_ERRNO_SYS;
//register signal handler
if(0 != (r = xcc_signal_trace_register(xc_trace_handler))) goto err2;
//create thread for dump trace
if(0 != (r = pthread_create(&thd, NULL, xc_trace_dumper, NULL))) goto err1;
return 0;
err1:
xcc_signal_trace_unregister();
err2:
close(xc_trace_notifier);
xc_trace_notifier = -1;
return r;
}
// ANR 发生时,收到 SIGQUIT,此 dumper 线程唤醒,调用 ANR 报告函数
static void *xc_trace_dumper(void *arg)
{
JNIEnv *env = NULL;
uint64_t data;
uint64_t trace_time;
int fd;
struct timeval tv;
char pathname[1024];
jstring j_pathname;
(void)arg;
pthread_detach(pthread_self());
JavaVMAttachArgs attach_args = {
.version = XC_JNI_VERSION,
.name = "xcrash_trace_dp",
.group = NULL
};
if(JNI_OK != (*xc_common_vm)->AttachCurrentThread(xc_common_vm, &env, &attach_args)) goto exit;
while(1)
{
//block here, waiting for sigquit
XCC_UTIL_TEMP_FAILURE_RETRY(read(xc_trace_notifier, &data, sizeof(data)));
//check if process already crashed
if(xc_common_native_crashed || xc_common_java_crashed) break;
//trace time
if(0 != gettimeofday(&tv, NULL)) break;
trace_time = (uint64_t)(tv.tv_sec) * 1000 * 1000 + (uint64_t)tv.tv_usec;
//Keep only one current trace.
if(0 != xc_trace_logs_clean()) continue;
//create and open log file
if((fd = xc_common_open_trace_log(pathname, sizeof(pathname), trace_time)) < 0) continue;
//write header info
if(0 != xc_trace_write_header(fd, trace_time)) goto end;
//write trace info from ART runtime
if(0 != xcc_util_write_format(fd, XCC_UTIL_THREAD_SEP"Cmd line: %s\n", xc_common_process_name)) goto end;
if(0 != xcc_util_write_str(fd, "Mode: ART DumpForSigQuit\n")) goto end;
if(0 != xc_trace_load_symbols())
{
if(0 != xcc_util_write_str(fd, "Failed to load symbols.\n")) goto end;
goto skip;
}
if(0 != xc_trace_check_address_valid())
{
if(0 != xcc_util_write_str(fd, "Failed to check runtime address.\n")) goto end;
goto skip;
}
if(dup2(fd, STDERR_FILENO) < 0)
{
if(0 != xcc_util_write_str(fd, "Failed to duplicate FD.\n")) goto end;
goto skip;
}
xc_trace_dump_status = XC_TRACE_DUMP_ON_GOING;
if(sigsetjmp(jmpenv, 1) == 0)
{
if(xc_trace_is_lollipop)
xc_trace_libart_dbg_suspend();
xc_trace_libart_runtime_dump(*xc_trace_libart_runtime_instance, xc_trace_libcpp_cerr);
if(xc_trace_is_lollipop)
xc_trace_libart_dbg_resume();
}
else
{
fflush(NULL);
XCD_LOG_WARN("longjmp to skip dumping trace\n");
}
dup2(xc_common_fd_null, STDERR_FILENO);
skip:
if(0 != xcc_util_write_str(fd, "\n"XCC_UTIL_THREAD_END"\n")) goto end;
//write other info
if(0 != xcc_util_record_logcat(fd, xc_common_process_id, xc_common_api_level, xc_trace_logcat_system_lines, xc_trace_logcat_events_lines, xc_trace_logcat_main_lines)) goto end;
if(xc_trace_dump_fds)
if(0 != xcc_util_record_fds(fd, xc_common_process_id)) goto end;
if(xc_trace_dump_network_info)
if(0 != xcc_util_record_network_info(fd, xc_common_process_id, xc_common_api_level)) goto end;
if(0 != xcc_meminfo_record(fd, xc_common_process_id)) goto end;
end:
//close log file
xc_common_close_trace_log(fd);
//rethrow SIGQUIT to ART Signal Catcher
if(xc_trace_rethrow && (XC_TRACE_DUMP_ART_CRASH != xc_trace_dump_status)) xc_trace_send_sigquit();
xc_trace_dump_status = XC_TRACE_DUMP_END;
//JNI callback
//Do we need to implement an emergency buffer for disk exhausted?
if(NULL == xc_trace_cb_method) continue;
if(NULL == (j_pathname = (*env)->NewStringUTF(env, pathname))) continue;
(*env)->CallStaticVoidMethod(env, xc_common_cb_class, xc_trace_cb_method, j_pathname, NULL);
XC_JNI_IGNORE_PENDING_EXCEPTION();
(*env)->DeleteLocalRef(env, j_pathname);
}
(*xc_common_vm)->DetachCurrentThread(xc_common_vm);
exit:
xc_trace_notifier = -1;
close(xc_trace_notifier);
return NULL;
}