Skip to content

Commit 8df984f

Browse files
authored
Profiler changes (#95)
1 parent 56017f6 commit 8df984f

File tree

4 files changed

+172
-137
lines changed

4 files changed

+172
-137
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,5 @@ ETTrace-iphoneos.xcarchive/
88
output.json
99
output.folded
1010
.swiftpm
11-
.build
11+
.build
12+
output_*.json
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#include "EMGStackTraceRecorder.h"
2+
3+
#import <QuartzCore/QuartzCore.h>
4+
#import <mach-o/arch.h>
5+
#import <mach/mach.h>
6+
#import <pthread.h>
7+
#import <deque>
8+
#import <iostream>
9+
#import <mutex>
10+
#import <unordered_map>
11+
12+
extern "C" {
13+
void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten);
14+
}
15+
16+
static const int kMaxFramesPerStack = 1024;
17+
18+
kern_return_t checkMachCall(kern_return_t result) {
19+
if (result != KERN_SUCCESS) {
20+
std::cerr << "Mach call failed with " << result << std::endl;
21+
}
22+
return result;
23+
}
24+
25+
Thread::Thread(thread_t threadId, thread_t mainThreadId) {
26+
name = "Failed to get name"; // Error case
27+
28+
if(threadId == mainThreadId) {
29+
name = "Main Thread";
30+
} else {
31+
// Get thread Name
32+
char cName[1024];
33+
pthread_t pt = pthread_from_mach_thread_np(threadId);
34+
if (pt) {
35+
int rc = pthread_getname_np(pt, cName, sizeof(cName));
36+
if (rc == 0) {
37+
name = cName;
38+
}
39+
}
40+
}
41+
}
42+
43+
std::vector<ThreadSummary> EMGStackTraceRecorder::collectThreadSummaries() {
44+
std::lock_guard<std::mutex> lockGuard(threadsLock);
45+
46+
std::vector<ThreadSummary> summaries;
47+
for (const auto &[threadId, thread] : threadsMap) {
48+
std::vector<StackSummary> stackSummaries;
49+
for (const auto &stack : thread.stacks) {
50+
std::vector<uintptr_t> addresses;
51+
for (auto i = stack.storageStartIndex; i < stack.storageEndIndex; i++) {
52+
addresses.emplace_back(addressStorage[i]);
53+
}
54+
// Reverse the stack addresses to get the correct order
55+
std::reverse(addresses.begin(), addresses.end());
56+
stackSummaries.emplace_back(stack.time, addresses);
57+
}
58+
summaries.emplace_back(threadId, thread.name, stackSummaries);
59+
}
60+
return summaries;
61+
}
62+
63+
void EMGStackTraceRecorder::recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread) {
64+
std::lock_guard<std::mutex> lockGuard(threadsLock);
65+
thread_act_array_t threads = nullptr;
66+
mach_msg_type_number_t threadCount = 0;
67+
if (recordAllThreads) {
68+
int result = checkMachCall(task_threads(mach_task_self(), &threads, &threadCount));
69+
if (result != KERN_SUCCESS) {
70+
threadCount = 0;
71+
}
72+
} else {
73+
threads = &mainMachThread;
74+
threadCount = 1;
75+
}
76+
77+
// This time gets less accurate for later threads, but still good
78+
CFTimeInterval time = CACurrentMediaTime();
79+
for (mach_msg_type_number_t i = 0; i < threadCount; i++) {
80+
if (threads[i] == etTraceThread) {
81+
continue;
82+
}
83+
84+
uintptr_t frames[kMaxFramesPerStack];
85+
uint64_t frameCount = 0;
86+
87+
if (thread_suspend(threads[i]) != KERN_SUCCESS) {
88+
// In theory, the thread may have been destroyed by now, so we exit early if this fails
89+
continue;
90+
}
91+
// BEGIN REENTRANT SECTION
92+
FIRCLSWriteThreadStack(threads[i], frames, kMaxFramesPerStack, &frameCount);
93+
// END REENTRANT SECTION
94+
checkMachCall(thread_resume(threads[i]));
95+
96+
auto emplaceResult = threadsMap.try_emplace(threads[i], threads[i], mainMachThread);
97+
size_t startIndex = addressStorage.size();
98+
for (int frame_idx = 0; frame_idx < frameCount; frame_idx++) {
99+
addressStorage.emplace_back(frames[frame_idx]);
100+
}
101+
size_t endIndex = addressStorage.size();
102+
emplaceResult.first->second.stacks.emplace_back(time, startIndex, endIndex);
103+
}
104+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#import <deque>
2+
#import <vector>
3+
#import <unordered_map>
4+
#import <mach/mach.h>
5+
#import <QuartzCore/QuartzCore.h>
6+
#import <iostream>
7+
8+
struct StackSummary {
9+
CFTimeInterval time;
10+
std::vector<uintptr_t> stack;
11+
12+
StackSummary(CFTimeInterval time, std::vector<uintptr_t> &stack) : time(time), stack(stack) {
13+
}
14+
};
15+
16+
struct ThreadSummary {
17+
thread_t threadId;
18+
std::string name;
19+
std::vector<StackSummary> stacks;
20+
21+
ThreadSummary(thread_t threadId, const std::string &name, std::vector<StackSummary> &stacks) : threadId(threadId), name(name), stacks(stacks) {
22+
}
23+
};
24+
25+
struct Stack {
26+
CFTimeInterval time;
27+
size_t storageStartIndex; // Inclusive
28+
size_t storageEndIndex; // Exclusive
29+
30+
Stack(CFTimeInterval time, size_t storageStartIndex, size_t storageEndIndex) : time(time), storageStartIndex(storageStartIndex), storageEndIndex(storageEndIndex) {
31+
}
32+
};
33+
34+
struct Thread {
35+
std::deque<Stack> stacks;
36+
std::string name;
37+
38+
Thread(thread_t threadId, thread_t mainThreadId);
39+
};
40+
41+
class EMGStackTraceRecorder {
42+
std::unordered_map<unsigned int, Thread> threadsMap;
43+
std::mutex threadsLock;
44+
std::deque<uintptr_t> addressStorage;
45+
46+
public:
47+
void recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread);
48+
49+
std::vector<ThreadSummary> collectThreadSummaries();
50+
};

ETTrace/Tracer/EMGTracer.mm

Lines changed: 16 additions & 136 deletions
Original file line numberDiff line numberDiff line change
@@ -15,29 +15,16 @@
1515
#import <mach-o/arch.h>
1616
#import <sys/utsname.h>
1717
#import <QuartzCore/QuartzCore.h>
18+
#import "EMGStackTraceRecorder.h"
1819

19-
static const int kMaxFramesPerStack = 512;
2020
static NSThread *sStackRecordingThread = nil;
21-
typedef struct {
22-
CFTimeInterval time;
23-
uint64_t frameCount;
24-
uintptr_t frames[kMaxFramesPerStack];
25-
} Stack;
26-
27-
typedef struct {
28-
std::vector<Stack> *stacks;
29-
char name[256];
30-
} Thread;
31-
static std::map<unsigned int, Thread *> *sThreadsMap;
32-
static std::mutex sThreadsLock;
33-
34-
static BOOL sRecordAllThreads = false;
3521

3622
static thread_t sMainMachThread = {0};
37-
static thread_t sETTraceThread = {0};
3823

39-
extern "C" {
40-
void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten);
24+
// To avoid static initialization order fiasco, we access it from a function
25+
EMGStackTraceRecorder &getRecorder() {
26+
static EMGStackTraceRecorder recorder;
27+
return recorder;
4128
}
4229

4330
@implementation EMGTracer
@@ -55,19 +42,16 @@ + (void)stopRecording:(void (^)(NSDictionary *))stopped {
5542
}
5643

5744
+ (NSDictionary *)getResults {
58-
sThreadsLock.lock();
5945
NSMutableDictionary <NSString *, NSDictionary<NSString *, id> *> *threads = [NSMutableDictionary dictionary];
60-
61-
std::map<unsigned int, Thread *>::iterator it;
62-
for (it = sThreadsMap->begin(); it != sThreadsMap->end(); it++) {
63-
Thread thread = *it->second;
64-
NSString *threadId = [[NSNumber numberWithUnsignedInt:it->first] stringValue];
46+
47+
auto threadSummaries = getRecorder().collectThreadSummaries();
48+
for (const auto &thread : threadSummaries) {
49+
NSString *threadId = [@(thread.threadId) stringValue];
6550
threads[threadId] = @{
66-
@"name": [NSString stringWithFormat:@"%s", thread.name],
67-
@"stacks": [self arrayFromStacks: *thread.stacks]
51+
@"name": @(thread.name.c_str()),
52+
@"stacks": [self arrayFromStacks:thread.stacks]
6853
};
6954
}
70-
sThreadsLock.unlock();
7155

7256
const NXArchInfo *archInfo = NXGetLocalArchInfo();
7357
NSString *cpuType = [NSString stringWithUTF8String:archInfo->description];
@@ -83,13 +67,12 @@ + (NSDictionary *)getResults {
8367
};
8468
}
8569

86-
+ (NSArray <NSDictionary <NSString *, id> *> *) arrayFromStacks: (std::vector<Stack>)stacks {
70+
+ (NSArray <NSDictionary <NSString *, id> *> *) arrayFromStacks: (const std::vector<StackSummary> &)stacks {
8771
NSMutableArray <NSDictionary <NSString *, id> *> *threadStacks = [NSMutableArray array];
8872
for (const auto &cStack : stacks) {
8973
NSMutableArray <NSNumber *> *stack = [NSMutableArray array];
90-
// Add the addrs in reverse order so that they start with the lowest frame, e.g. `start`
91-
for (int j = (int)cStack.frameCount - 1; j >= 0; j--) {
92-
[stack addObject:@((NSUInteger)cStack.frames[j])];
74+
for (const auto &address : cStack.stack) {
75+
[stack addObject:@((NSUInteger)address)];
9376
}
9477
NSDictionary *stackDictionary = @{
9578
@"stack": [stack copy],
@@ -139,103 +122,6 @@ + (NSString *)deviceName {
139122
return [NSString stringWithCString:systemInfo.machine encoding:NSUTF8StringEncoding];
140123
}
141124

142-
Thread* createThread(thread_t threadId)
143-
{
144-
Thread *thread = new Thread;
145-
146-
if(threadId == sMainMachThread) {
147-
strcpy(thread->name,"Main Thread");
148-
} else {
149-
// Get thread Name
150-
char name[256];
151-
pthread_t pt = pthread_from_mach_thread_np(threadId);
152-
if (pt) {
153-
name[0] = '\0';
154-
int rc = pthread_getname_np(pt, name, sizeof name);
155-
strcpy(thread->name, name);
156-
}
157-
}
158-
159-
// Create stacks vector
160-
thread->stacks = new std::vector<Stack>;
161-
thread->stacks->reserve(400);
162-
163-
return thread;
164-
}
165-
166-
+ (void)recordStackForAllThreads
167-
{
168-
thread_act_array_t threads;
169-
mach_msg_type_number_t thread_count;
170-
if (sRecordAllThreads) {
171-
if (task_threads(mach_task_self(), &threads, &thread_count) != KERN_SUCCESS) {
172-
thread_count = 0;
173-
}
174-
} else {
175-
threads = &sMainMachThread;
176-
thread_count = 1;
177-
}
178-
179-
std::map<thread_t, Stack *> stackMap;
180-
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
181-
if (threads[i] == sETTraceThread) {
182-
continue;
183-
}
184-
185-
Stack *stack = new Stack;
186-
stackMap.insert(std::pair<unsigned int, Stack *>(threads[i], stack));
187-
}
188-
189-
// Suspend all threads but ETTrace's
190-
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
191-
if (threads[i] != sETTraceThread) {
192-
thread_suspend(threads[i]);
193-
}
194-
}
195-
196-
CFTimeInterval time = CACurrentMediaTime();
197-
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
198-
if (threads[i] == sETTraceThread) {
199-
continue;
200-
}
201-
202-
Stack *stack = stackMap.at(threads[i]);
203-
stack->time = time;
204-
FIRCLSWriteThreadStack(threads[i], stack->frames, kMaxFramesPerStack, &(stack->frameCount));
205-
}
206-
207-
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
208-
if (threads[i] != sETTraceThread)
209-
thread_resume(threads[i]);
210-
}
211-
212-
std::vector<Stack> *threadStack;
213-
std::map<thread_t, Stack *>::iterator it;
214-
sThreadsLock.lock();
215-
for (it = stackMap.begin(); it != stackMap.end(); it++) {
216-
thread_t t_id = it->first;
217-
if (sThreadsMap->find(t_id) == sThreadsMap->end()) {
218-
Thread *thread = createThread(t_id);
219-
// Add to hash map
220-
sThreadsMap->insert(std::pair<thread_t, Thread *>(t_id, thread));
221-
222-
threadStack = thread->stacks;
223-
} else {
224-
threadStack = sThreadsMap->at(t_id)->stacks;
225-
}
226-
Stack *stack = it->second;
227-
try {
228-
threadStack->emplace_back(*stack);
229-
} catch (const std::length_error& le) {
230-
fflush(stdout);
231-
fflush(stderr);
232-
throw le;
233-
}
234-
delete stack;
235-
}
236-
sThreadsLock.unlock();
237-
}
238-
239125
+ (void)setup {
240126
sMainMachThread = mach_thread_self();
241127
EMGBeginCollectingLibraries();
@@ -256,18 +142,12 @@ + (void)setupStackRecording:(BOOL) recordAllThreads
256142
// usleep is guaranteed to sleep more than that, in practice ~5ms. We could use a
257143
// dispatch_timer, which at least tries to compensate for drift etc., but the
258144
// timer's queue could theoretically end up run on the main thread
259-
sRecordAllThreads = recordAllThreads;
260-
261-
sThreadsMap = new std::map<unsigned int, Thread *>;
262-
263145
sStackRecordingThread = [[NSThread alloc] initWithBlock:^{
264-
if (!sETTraceThread) {
265-
sETTraceThread = mach_thread_self();
266-
}
146+
thread_t etTraceThread = mach_thread_self();
267147

268148
NSThread *thread = [NSThread currentThread];
269149
while (!thread.cancelled) {
270-
[self recordStackForAllThreads];
150+
getRecorder().recordStackForAllThreads(recordAllThreads, sMainMachThread, etTraceThread);
271151
usleep(4500);
272152
}
273153
}];

0 commit comments

Comments
 (0)