-
Notifications
You must be signed in to change notification settings - Fork 838
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Profiler improvements #2724
base: main
Are you sure you want to change the base?
Profiler improvements #2724
Changes from 4 commits
066652d
adac799
f250291
91c0fdf
8911d4c
ec67287
dce2a28
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,15 +16,28 @@ limitations under the License. | |
#ifndef TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ | ||
#define TENSORFLOW_LITE_MICRO_MICRO_PROFILER_H_ | ||
|
||
#include <cinttypes> | ||
#include <cstdint> | ||
#include <cstring> | ||
|
||
#include "tensorflow/lite/kernels/internal/compatibility.h" | ||
#include "tensorflow/lite/micro/compatibility.h" | ||
#include "tensorflow/lite/micro/micro_log.h" | ||
#include "tensorflow/lite/micro/micro_profiler_interface.h" | ||
#include "tensorflow/lite/micro/micro_time.h" | ||
|
||
namespace tflite { | ||
|
||
enum class MicroProfilerLogFormat { | ||
HumanReadable, | ||
Csv, | ||
}; | ||
|
||
// MicroProfiler creates a common way to gain fine-grained insight into runtime | ||
// performance. Bottleck operators can be identified along with slow code | ||
// performance. Bottleneck operators can be identified along with slow code | ||
// sections. This can be used in conjunction with running the relevant micro | ||
// benchmark to evaluate end-to-end performance. | ||
template <int MAX_EVENTS> | ||
class MicroProfiler : public MicroProfilerInterface { | ||
public: | ||
MicroProfiler() = default; | ||
|
@@ -33,67 +46,185 @@ class MicroProfiler : public MicroProfilerInterface { | |
// Marks the start of a new event and returns an event handle that can be used | ||
// to mark the end of the event via EndEvent. The lifetime of the tag | ||
// parameter must exceed that of the MicroProfiler. | ||
virtual uint32_t BeginEvent(const char* tag) override; | ||
uint32_t BeginEvent(const char* tag) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an override of the abstract base class, and should remain as such. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
if (num_events_ == MAX_EVENTS) { | ||
MicroPrintf( | ||
"MicroProfiler errored out because total number of events exceeded " | ||
"the maximum of %d.", | ||
MAX_EVENTS); | ||
TFLITE_ASSERT_FALSE; | ||
} | ||
|
||
tags_[num_events_] = tag; | ||
start_ticks_[num_events_] = GetCurrentTimeTicks(); | ||
end_ticks_[num_events_] = start_ticks_[num_events_] - 1; | ||
return num_events_++; | ||
} | ||
|
||
// Marks the end of an event associated with event_handle. It is the | ||
// responsibility of the caller to ensure than EndEvent is called once and | ||
// only once per event_handle. | ||
// | ||
// If EndEvent is called more than once for the same event_handle, the last | ||
// call will be used as the end of event marker. If EndEvent is called 0 times | ||
// call will be used as the end of event marker.If EndEvent is called 0 times | ||
// for a particular event_handle, the duration of that event will be 0 ticks. | ||
virtual void EndEvent(uint32_t event_handle) override; | ||
void EndEvent(uint32_t event_handle) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an override of the abstract base class, and should remain as such. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
TFLITE_DCHECK(event_handle < MAX_EVENTS); | ||
end_ticks_[event_handle] = GetCurrentTimeTicks(); | ||
} | ||
|
||
// Clears all the events that have been currently profiled. | ||
void ClearEvents() { num_events_ = 0; } | ||
void ClearEvents() { | ||
num_events_ = 0; | ||
num_tag_groups_ = 0; | ||
} | ||
|
||
// Returns the sum of the ticks taken across all the events. This number | ||
// is only meaningful if all of the events are disjoint (the end time of | ||
// event[i] <= start time of event[i+1]). | ||
uint32_t GetTotalTicks() const; | ||
uint32_t GetTotalTicks() const { | ||
int32_t ticks = 0; | ||
for (int i = 0; i < num_events_; ++i) { | ||
ticks += end_ticks_[i] - start_ticks_[i]; | ||
} | ||
return ticks; | ||
} | ||
|
||
// Prints the profiling information of each of the events in human readable | ||
// form. | ||
void Log() const; | ||
void Log(MicroProfilerLogFormat format) const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should have a default argument of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why CSV though? This method logs in human readable by default? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added |
||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS) | ||
switch (format) { | ||
case MicroProfilerLogFormat::HumanReadable: | ||
for (int i = 0; i < num_events_; ++i) { | ||
uint32_t ticks = end_ticks_[i] - start_ticks_[i]; | ||
uint64_t us = TicksToUs(ticks); | ||
MicroPrintf("%s took %" PRIu64 ".%" PRIu64 " ms (%u ticks)", tags_[i], | ||
us / 1000, us % 1000, ticks); | ||
} | ||
break; | ||
|
||
// Prints the profiling information of each of the events in CSV (Comma | ||
// Separated Value) form. | ||
void LogCsv() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
case MicroProfilerLogFormat::Csv: | ||
MicroPrintf("\"Event\",\"Tag\",\"Ms\",\"Ticks\""); | ||
for (int i = 0; i < num_events_; ++i) { | ||
#if defined(HEXAGON) || defined(CMSIS_NN) | ||
int ticks = end_ticks_[i] - start_ticks_[i]; | ||
MicroPrintf("%d,%s,%u,%d", i, tags_[i], TicksToMs(ticks), ticks); | ||
#else | ||
uint32_t ticks = end_ticks_[i] - start_ticks_[i]; | ||
uint64_t us = TicksToUs(ticks); | ||
MicroPrintf("%d,%s,%" PRIu64 ".%" PRIu64 ",%" PRIu32, i, tags_[i], | ||
us / 1000, us % 1000, ticks); | ||
#endif | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing the CSV format probably won't pass review, as it most likely breaks countless internal Google, and external third-party scripts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the |
||
} | ||
break; | ||
} | ||
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) | ||
} | ||
|
||
// Prints total ticks for each unique tag in CSV format. | ||
// Output will have one row for each unique tag along with the | ||
// total ticks summed across all events with that particular tag. | ||
void LogTicksPerTagCsv(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method cannot be removed as it breaks existing applications. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added it back There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a policy to never break stuff like this? IMO duplicating APIs like this is also not great. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @andresovela Let's just say, that at the present time, no one is allowed to break anything. 🙂 |
||
// Prints the profiling information of each of the events in human readable | ||
// form, grouped per tag, sorted by execution time. | ||
void LogGrouped(MicroProfilerLogFormat format) { | ||
#if !defined(TF_LITE_STRIP_ERROR_STRINGS) | ||
for (int i = 0; i < num_events_; ++i) { | ||
// Find if the tag already exists in uniqueTags | ||
TagGroup& tag_group = GetTagGroup(tags_[i]); | ||
|
||
uint32_t ticks = end_ticks_[i] - start_ticks_[i]; | ||
tag_group.tag = tags_[i]; | ||
tag_group.ticks += ticks; | ||
tag_group.tag_count++; | ||
} | ||
|
||
SortTagGroups(); | ||
|
||
switch (format) { | ||
case MicroProfilerLogFormat::HumanReadable: { | ||
MicroPrintf("Cumulative event times:"); | ||
MicroPrintf("%-8s %-32s %-12s %-12s", "Count", "Tag", "Ticks", "Time"); | ||
uint64_t total_ticks = 0; | ||
uint64_t us; | ||
for (int i = 0; i < num_tag_groups_; ++i) { | ||
total_ticks += tag_groups_[i].ticks; | ||
us = TicksToUs(tag_groups_[i].ticks); | ||
MicroPrintf("%-8d %-32s %-12d %" PRIu64 ".%03" PRIu64 " ms", | ||
tag_groups_[i].tag_count, tag_groups_[i].tag, | ||
tag_groups_[i].ticks, us / 1000, us % 1000); | ||
} | ||
us = TicksToUs(total_ticks); | ||
MicroPrintf("\nTotal time: %" PRIu64 ".%03" PRIu64 " ms (%lld ticks)", | ||
us / 1000, us % 1000, total_ticks); | ||
break; | ||
} | ||
case MicroProfilerLogFormat::Csv: { | ||
MicroPrintf("\"Tag\",\"Total ms\",\"Total ticks\""); | ||
for (int i = 0; i < num_tag_groups_; ++i) { | ||
uint64_t us = TicksToUs(tag_groups_[i].ticks); | ||
MicroPrintf("%s, %" PRIu64 ".%" PRIu64 ", %u", tag_groups_[i].tag, | ||
us / 1000, us % 1000, tag_groups_[i].ticks); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Changing the CSV format probably won't pass review, as it most likely breaks countless internal Google, and external third-party scripts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the |
||
break; | ||
} | ||
} | ||
#endif // !defined(TF_LITE_STRIP_ERROR_STRINGS) | ||
} | ||
|
||
private: | ||
// Maximum number of events that this class can keep track of. The | ||
// MicroProfiler will abort if AddEvent is called more than kMaxEvents number | ||
// of times. Increase this number if you need more events. | ||
static constexpr int kMaxEvents = 4096; | ||
|
||
const char* tags_[kMaxEvents]; | ||
uint32_t start_ticks_[kMaxEvents]; | ||
uint32_t end_ticks_[kMaxEvents]; | ||
const char* tags_[MAX_EVENTS]; | ||
uint32_t start_ticks_[MAX_EVENTS]; | ||
uint32_t end_ticks_[MAX_EVENTS]; | ||
int num_events_ = 0; | ||
int num_tag_groups_ = 0; | ||
|
||
struct TicksPerTag { | ||
struct TagGroup { | ||
const char* tag; | ||
uint32_t ticks; | ||
uint32_t tag_count; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only used by the "human readable" log format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What should I do about that? #1290 introduced
which was "only" used by the CSV log format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment is really just there for @suleshahid to see. |
||
}; | ||
|
||
// In practice, the number of tags will be much lower than the number of | ||
// events. But it is theoretically possible that each event to be unique and | ||
// hence we allow total_ticks_per_tag to have kMaxEvents entries. | ||
TicksPerTag total_ticks_per_tag[kMaxEvents] = {}; | ||
// hence we allow total_ticks_per_tag to have MAX_EVENTS entries. | ||
TagGroup tag_groups_[MAX_EVENTS] = {}; | ||
|
||
// Helper function to find the index of a tag in the cumulative array | ||
TagGroup& GetTagGroup(const char* tag) { | ||
for (int i = 0; i < num_tag_groups_; ++i) { | ||
if (strcmp(tag_groups_[i].tag, tag) == 0) { | ||
return tag_groups_[i]; | ||
} | ||
} | ||
|
||
int FindExistingOrNextPosition(const char* tag_name); | ||
// Tag not found, so we create a new entry | ||
// There should always be space since the array of tag groups | ||
// is just as big as the array of events | ||
tag_groups_[num_tag_groups_].tag = tag; | ||
tag_groups_[num_tag_groups_].ticks = 0; | ||
tag_groups_[num_tag_groups_].tag_count = 0; | ||
return tag_groups_[num_tag_groups_++]; | ||
} | ||
|
||
// Helper function to sort the tag groups by ticks in descending order | ||
// Simple bubble sort implementation | ||
void SortTagGroups() { | ||
for (int i = 0; i < num_tag_groups_ - 1; ++i) { | ||
for (int j = i + 1; j < num_tag_groups_; ++j) { | ||
if (tag_groups_[j].ticks > tag_groups_[i].ticks) { | ||
TagGroup temp_tag_group = tag_groups_[i]; | ||
tag_groups_[i] = tag_groups_[j]; | ||
tag_groups_[j] = temp_tag_group; | ||
} | ||
} | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use standard C++ library algorithms (as long as they don't require memory allocation). Here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
TF_LITE_REMOVE_VIRTUAL_DELETE | ||
}; | ||
|
||
#if defined(TF_LITE_STRIP_ERROR_STRINGS) | ||
// For release builds, the ScopedMicroProfiler is a noop. | ||
// | ||
// This is done because the ScipedProfiler is used as part of the | ||
// This is done because the ScopedProfiler is used as part of the | ||
// MicroInterpreter and we want to ensure zero overhead for the release builds. | ||
class ScopedMicroProfiler { | ||
public: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be
template <int MAX_EVENTS = 4096>
so that all existing applications using the profiler do not need to be changed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done