diff --git a/include/utils/BackTrace.h b/include/utils/BackTrace.h index 47263fd..50d16d4 100644 --- a/include/utils/BackTrace.h +++ b/include/utils/BackTrace.h @@ -3,18 +3,22 @@ #include #include #include +#include namespace trace { // reference: https://man7.org/linux/man-pages/man3/backtrace.3.html class Tracer { public: Tracer(std::string name); - ~Tracer() {} + ~Tracer(); private: void trace(); void print(); int64_t max_depth; int64_t real_size; + // start time of the function + std::chrono::high_resolution_clock::time_point start; + bool enable_duration; const std::string func_name; std::vector stack; }; diff --git a/lib/utils/BackTrace.cpp b/lib/utils/BackTrace.cpp index 9696436..23947d2 100644 --- a/lib/utils/BackTrace.cpp +++ b/lib/utils/BackTrace.cpp @@ -2,6 +2,7 @@ #include "utils/Utils.h" #include "utils/Log/Log.h" #include "utils/ConsoleTable/ConsoleTable.h" +// #include "utils/Timer/Timer.h" #include #include #include @@ -12,6 +13,7 @@ using namespace trace; using namespace console_table; +using namespace std::chrono; class Recorder { public: @@ -99,7 +101,7 @@ std::string demangle(const char *mangled_name) { * - count the call times of the current function *******************************************************************************/ Tracer::Tracer(std::string name) - : max_depth(100), real_size(0), func_name(name) { + : max_depth(100), real_size(0), func_name(name), enable_duration(false) { static const char *enable_trace = std::getenv("ENABLE_HOOK_TRACE"); static const char *print_backtrace = std::getenv("PRINT_BACKTRACE"); bool trace_flag = false; @@ -116,6 +118,24 @@ Tracer::Tracer(std::string name) print(); } } + + static const char *enable_duration_str = std::getenv("ENABLE_DURATION"); + + start = high_resolution_clock::now(); + if (enable_duration_str && + (std::string(enable_duration_str) == "true" || std::string(enable_duration_str) == "TRUE")) { + enable_duration = true; + } +} + +Tracer::~Tracer() { + if (enable_duration) { + auto end = high_resolution_clock::now(); + int64_t duration = + std::chrono::duration_cast(end - start) + .count(); + LOG() << func_name << " Duration: " << duration; + } } void Tracer::trace() { diff --git a/python/module_logging/debug/dump.py b/python/module_logging/debug/dump.py index a4b269e..fc83166 100644 --- a/python/module_logging/debug/dump.py +++ b/python/module_logging/debug/dump.py @@ -78,10 +78,7 @@ def __init__(self): """ TensorInfoRecorder 类的初始化方法,用于初始化一些变量。 """ - cpp_extend = get_config("database", "cpp_extend") - if cpp_extend == "True": - from .. import Hook - Hook.install_hook(Hook.HookType.kDUMP) + self.install_hook = False self.lock = threading.Lock() super().__init__() @@ -94,6 +91,12 @@ def __enter__(self): Returns: Any: 当前实例本身,表示进入上下文管理器后的状态。 """ + if not self.install_hook: + self.install_hook = True + cpp_extend = get_config("database", "cpp_extend") + if cpp_extend == "True": + from .. import Hook + Hook.install_hook(Hook.HookType.kDUMP) self._pt_impls = {} for k in TENSOR_FUNCS_NO_DISPATCH: impl = getattr(torch.Tensor, k) diff --git a/python/module_logging/perf/logger.py b/python/module_logging/perf/logger.py index 815cf3c..6d0faf2 100644 --- a/python/module_logging/perf/logger.py +++ b/python/module_logging/perf/logger.py @@ -79,11 +79,10 @@ def __init__(self, model=None, profiling_bw=True) -> None: self._register_hook(name, m) # for gpu profilig with cpp extension, for xpu profiling is not necessary - if config.cpp_extend(): + if cpp_extend: from .. import Hook print("Install hook...") - - Hook.install_hook(Hook.HookType.kNONE) + Hook.install_hook(Hook.HookType.kPROFILE) def config(self, model=None, profiling_bw=True): if model: @@ -99,7 +98,8 @@ def config(self, model=None, profiling_bw=True): def __enter__(self): print("Enter performance logger...") - if config.cpp_extend(): + if cpp_extend: + from .. import Hook Hook.cuda_profiler_start() self._pt_impls = {} for k in TENSOR_FUNCS_NO_DISPATCH: @@ -107,14 +107,21 @@ def __enter__(self): self._pt_impls[k] = impl setattr(torch.Tensor, k, TorchFuncMockNoDispatch(impl)) super().__enter__() + # if cpp_extend: + # from .. import Hook + # print("Performance logging entered at: {} ns".format(Hook.get_current_time())) def __exit__(self, exc_type=None, exc_value=None, traceback=None): print("Exit performance logger...") - if config.cpp_extend(): + if cpp_extend: + from .. import Hook Hook.cuda_profiler_end() for k in TENSOR_FUNCS_NO_DISPATCH: setattr(torch.Tensor, k, self._pt_impls[k]) super().__exit__(exc_type, exc_value, traceback) + # if cpp_extend: + # from .. import Hook + # print("Performance logging exited at: {} ns".format(Hook.get_current_time())) def get_named_modules(self, module: torch.nn.Module, prefix=""): stack = [] @@ -181,7 +188,10 @@ def __torch_dispatch__(self, op, types, args=(), kwargs=None): if self.enable_profiling: torch.cuda.synchronize() # insert pre-op delimiter - print("[START_SYMBOL]: {}".format(str(op)), flush=True) + # print("[START_SYMBOL]: {} ns".format(str(op)), flush=True) + # if cpp_extend: + # from .. import Hook + # print("{} start at: {}".format(str(op), Hook.get_current_time())) # for debug Logger.debug( "[START_SYMBOL]: {}, counter: {}, pid: {}, tid: {}".format( @@ -191,6 +201,9 @@ def __torch_dispatch__(self, op, types, args=(), kwargs=None): # call op output = op(*args, **kwargs) torch.cuda.synchronize() + # if cpp_extend: + # from .. import Hook + # print("{} end at: {} ns".format(str(op), Hook.get_current_time())) # insert after-op delimiter print("[END_SYMBOL]: {}".format(str(op)), flush=True) # for debug