Skip to content
Merged

Dev0 #16

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion include/utils/BackTrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@
#include <iostream>
#include <vector>
#include <string>
#include <chrono>

namespace trace {
// reference: https://man7.org/linux/man-pages/man3/backtrace.3.html
class Tracer {
public:
Tracer(std::string name);
~Tracer() {}
~Tracer();
private:
void trace();
void print();
int64_t max_depth;
int64_t real_size;
// start time of the function
std::chrono::high_resolution_clock::time_point start;
bool enable_duration;
const std::string func_name;
std::vector<void*> stack;
};
Expand Down
22 changes: 21 additions & 1 deletion lib/utils/BackTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "utils/Utils.h"
#include "utils/Log/Log.h"
#include "utils/ConsoleTable/ConsoleTable.h"
// #include "utils/Timer/Timer.h"
#include <cstdlib>
#include <cxxabi.h>
#include <dlfcn.h>
Expand All @@ -12,6 +13,7 @@

using namespace trace;
using namespace console_table;
using namespace std::chrono;

class Recorder {
public:
Expand Down Expand Up @@ -99,7 +101,7 @@ std::string demangle(const char *mangled_name) {
* - count the call times of the current function
*******************************************************************************/
Tracer::Tracer(std::string name)
: max_depth(100), real_size(0), func_name(name) {
: max_depth(100), real_size(0), func_name(name), enable_duration(false) {
static const char *enable_trace = std::getenv("ENABLE_HOOK_TRACE");
static const char *print_backtrace = std::getenv("PRINT_BACKTRACE");
bool trace_flag = false;
Expand All @@ -116,6 +118,24 @@ Tracer::Tracer(std::string name)
print();
}
}

static const char *enable_duration_str = std::getenv("ENABLE_DURATION");

start = high_resolution_clock::now();
if (enable_duration_str &&
(std::string(enable_duration_str) == "true" || std::string(enable_duration_str) == "TRUE")) {
enable_duration = true;
}
}

Tracer::~Tracer() {
if (enable_duration) {
auto end = high_resolution_clock::now();
int64_t duration =
std::chrono::duration_cast<std::chrono::nanoseconds>(end - start)
.count();
LOG() << func_name << " Duration: " << duration;
}
}

void Tracer::trace() {
Expand Down
11 changes: 7 additions & 4 deletions python/module_logging/debug/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,7 @@ def __init__(self):
"""
TensorInfoRecorder 类的初始化方法,用于初始化一些变量。
"""
cpp_extend = get_config("database", "cpp_extend")
if cpp_extend == "True":
from .. import Hook
Hook.install_hook(Hook.HookType.kDUMP)
self.install_hook = False

self.lock = threading.Lock()
super().__init__()
Expand All @@ -94,6 +91,12 @@ def __enter__(self):
Returns:
Any: 当前实例本身,表示进入上下文管理器后的状态。
"""
if not self.install_hook:
self.install_hook = True
cpp_extend = get_config("database", "cpp_extend")
if cpp_extend == "True":
from .. import Hook
Hook.install_hook(Hook.HookType.kDUMP)
self._pt_impls = {}
for k in TENSOR_FUNCS_NO_DISPATCH:
impl = getattr(torch.Tensor, k)
Expand Down
25 changes: 19 additions & 6 deletions python/module_logging/perf/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,10 @@ def __init__(self, model=None, profiling_bw=True) -> None:
self._register_hook(name, m)

# for gpu profilig with cpp extension, for xpu profiling is not necessary
if config.cpp_extend():
if cpp_extend:
from .. import Hook
print("Install hook...")

Hook.install_hook(Hook.HookType.kNONE)
Hook.install_hook(Hook.HookType.kPROFILE)

def config(self, model=None, profiling_bw=True):
if model:
Expand All @@ -99,22 +98,30 @@ def config(self, model=None, profiling_bw=True):

def __enter__(self):
print("Enter performance logger...")
if config.cpp_extend():
if cpp_extend:
from .. import Hook
Hook.cuda_profiler_start()
self._pt_impls = {}
for k in TENSOR_FUNCS_NO_DISPATCH:
impl = getattr(torch.Tensor, k)
self._pt_impls[k] = impl
setattr(torch.Tensor, k, TorchFuncMockNoDispatch(impl))
super().__enter__()
# if cpp_extend:
# from .. import Hook
# print("Performance logging entered at: {} ns".format(Hook.get_current_time()))

def __exit__(self, exc_type=None, exc_value=None, traceback=None):
print("Exit performance logger...")
if config.cpp_extend():
if cpp_extend:
from .. import Hook
Hook.cuda_profiler_end()
for k in TENSOR_FUNCS_NO_DISPATCH:
setattr(torch.Tensor, k, self._pt_impls[k])
super().__exit__(exc_type, exc_value, traceback)
# if cpp_extend:
# from .. import Hook
# print("Performance logging exited at: {} ns".format(Hook.get_current_time()))

def get_named_modules(self, module: torch.nn.Module, prefix=""):
stack = []
Expand Down Expand Up @@ -181,7 +188,10 @@ def __torch_dispatch__(self, op, types, args=(), kwargs=None):
if self.enable_profiling:
torch.cuda.synchronize()
# insert pre-op delimiter
print("[START_SYMBOL]: {}".format(str(op)), flush=True)
# print("[START_SYMBOL]: {} ns".format(str(op)), flush=True)
# if cpp_extend:
# from .. import Hook
# print("{} start at: {}".format(str(op), Hook.get_current_time()))
# for debug
Logger.debug(
"[START_SYMBOL]: {}, counter: {}, pid: {}, tid: {}".format(
Expand All @@ -191,6 +201,9 @@ def __torch_dispatch__(self, op, types, args=(), kwargs=None):
# call op
output = op(*args, **kwargs)
torch.cuda.synchronize()
# if cpp_extend:
# from .. import Hook
# print("{} end at: {} ns".format(str(op), Hook.get_current_time()))
# insert after-op delimiter
print("[END_SYMBOL]: {}".format(str(op)), flush=True)
# for debug
Expand Down
Loading