-
Notifications
You must be signed in to change notification settings - Fork 86
tcpdirect support #41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
9abda64
1ceae04
15b5cee
82737ea
23cb22a
342846f
d70af00
cebb661
67e41db
d05bfb5
8fd9b35
6277841
032b6f7
4fb36d3
5398c56
0b1feef
1f61d4d
51292ba
94f61d9
1d4662c
ac0203b
a1f6bc8
af783cf
8e11a8c
a8d203e
fb78051
8665354
1ed0f55
c36aa25
7087697
b44fef3
5366563
b717eff
70923a6
d8c9098
54808d7
bba962b
51fe161
452e947
1afbd7f
ce1120d
8f18373
fee98c2
fc6e017
262a313
d1c385a
5735fe7
c228fae
712f0ad
2852410
9d340ad
e8ae584
7928b69
9e3ee46
a036a16
1c1ef8d
448b4fb
8d119b9
9793ada
d017626
38c6c2f
c4ba1ff
a27d552
8c5f1cb
fbd2fb5
3606589
60b0af7
b3007ef
f48bb72
da65b51
9f72daa
521cd24
06cf175
8b2f1ba
fc67937
79eaf56
cc96940
54f1e0b
a32203b
f2849c2
e92168c
5714163
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,7 +18,20 @@ | |
|
|
||
| all: binaries | ||
|
|
||
| CFLAGS = -std=c99 -Wall -O3 -g -D_GNU_SOURCE -DNO_LIBNUMA | ||
| CFLAGS := -std=c99 -Wall -O3 -g -D_GNU_SOURCE -DNO_LIBNUMA | ||
|
|
||
| HEADERS_DIR := usr/include | ||
|
|
||
| ifdef WITH_TCPDEVMEM_CUDA | ||
| CFLAGS += -DWITH_TCPDEVMEM_CUDA -I $(HEADERS_DIR) | ||
| endif | ||
| ifdef WITH_TCPDEVMEM_UDMABUF | ||
| CFLAGS += -DWITH_TCPDEVMEM_UDMABUF -DNDEBUG=1 -static -I $(HEADERS_DIR) | ||
| LDFLAGS += -static | ||
| endif | ||
|
|
||
| ifndef_any_of = $(filter undefined,$(foreach v,$(1),$(origin $(v)))) | ||
| ifdef_any_of = $(filter-out undefined,$(foreach v,$(1),$(origin $(v)))) | ||
|
|
||
| lib := \ | ||
| check_all_options.o \ | ||
|
|
@@ -48,6 +61,16 @@ lib := \ | |
| tcp_rr-objs := tcp_rr_main.o tcp_rr.o rr.o $(lib) | ||
|
|
||
| tcp_stream-objs := tcp_stream_main.o tcp_stream.o stream.o $(lib) | ||
| ifdef WITH_TCPDEVMEM_CUDA | ||
| tcp_stream-objs += tcpdevmem_cuda.o | ||
| endif | ||
| ifdef WITH_TCPDEVMEM_UDMABUF | ||
| tcp_stream-objs += tcpdevmem_udmabuf.o | ||
| endif | ||
| ifneq ($(call ifdef_any_of,WITH_TCPDEVMEM_CUDA WITH_TCPDEVMEM_UDMABUF),) | ||
| tcp_stream-objs += tcpdevmem.o | ||
| endif | ||
|
|
||
|
|
||
| tcp_crr-objs := tcp_crr_main.o tcp_crr.o rr.o $(lib) | ||
|
|
||
|
|
@@ -63,11 +86,18 @@ psp_rr-objs := psp_rr_main.o psp_rr.o rr.o psp_lib.o $(lib) | |
|
|
||
| ext-libs := -lm -lrt -lpthread | ||
|
|
||
| tcpdevmem_cuda.o: tcpdevmem_cuda.cu | ||
| nvcc -arch=sm_90 -O3 -g -I $(HEADERS_DIR) -D_GNU_SOURCE -DNO_LIBNUMA -DWITH_TCPDEVMEM_CUDA -c -o $@ $^ | ||
|
|
||
| tcp_rr: $(tcp_rr-objs) | ||
| $(CC) $(LDFLAGS) -o $@ $^ $(ext-libs) | ||
|
|
||
| tcp_stream: $(tcp_stream-objs) | ||
| ifdef WITH_TCPDEVMEM_CUDA | ||
| g++ $(LDFLAGS) -o $@ $^ $(ext-libs) -lc -L/usr/local/cuda/lib64 -lcudart -lcuda | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious why the $(CC) has been overriden. Is it because this is a c++ compiler? Do we have to use C++ compiler here?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Kaiyuan-Zhang might have a better answer, I don't remember |
||
| else | ||
| $(CC) $(LDFLAGS) -o $@ $^ $(ext-libs) | ||
| endif | ||
|
|
||
| tcp_crr: $(tcp_crr-objs) | ||
| $(CC) $(LDFLAGS) -o $@ $^ $(ext-libs) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,212 @@ | ||
| # Neper with TCPDevmem run instructions | ||
|
|
||
| Table of Contents | ||
| - [Neper with TCPDevmem run instructions](#neper-with-tcpdevmem-run-instructions) | ||
| - [TCPDevmem UDMABUF: Compiling tcp\_stream](#tcpdevmem-udmabuf-compiling-tcp_stream) | ||
| - [Manually specifying kernel headers directory (i.e. NOT in `usr/include`)](#manually-specifying-kernel-headers-directory-ie-not-in-usrinclude) | ||
| - [Running tcp\_stream](#running-tcp_stream) | ||
| - [Added flags](#added-flags) | ||
| - [Running tcp\_stream via `multi_neper.py`](#running-tcp_stream-via-multi_neperpy) | ||
| - [Example of successful output](#example-of-successful-output) | ||
| - [Running tcp\_stream directly](#running-tcp_stream-directly) | ||
|
|
||
|
|
||
| ## TCPDevmem UDMABUF: Compiling tcp_stream | ||
|
|
||
| **UDMABUF-capable tcp_stream can be built statically on a workstation.** | ||
|
|
||
| Neper can be built statically on a host with UDMABUF header files. | ||
|
|
||
| ``` | ||
| # clone the Neper repository and checkout the tcpd branch | ||
| git clone -b tcpd https://github.com/google/neper.git | ||
| cd neper | ||
|
|
||
| # copy kernel header files to Neper working directory | ||
| # (assumed to be found in ~/kernel/usr/include) | ||
| mkdir usr | ||
| cp -r ~/kernel/usr/include/ ./usr/ | ||
|
|
||
| make tcp_steam WITH_TCPDEVMEM_UDMABUF=1 | ||
|
|
||
| # copy the binary to your hosts | ||
| scp tcp_stream root@${HOST1}:~/ | ||
| scp multi_neper.py root@${HOST1}:~/ | ||
|
|
||
| scp tcp_stream root@${HOST2}:~/ | ||
| scp multi_neper.py root@${HOST2}:~/ | ||
| ``` | ||
|
|
||
| ### Manually specifying kernel headers directory (i.e. NOT in `usr/include`) | ||
|
|
||
| Copying the header files is unnecessary if you override `HEADERS_DIR` variable when running make. The default value for this variable is `usr/include`. | ||
|
|
||
| ``` | ||
| git clone -b tcpd https://github.com/google/neper.git | ||
| cd neper | ||
|
|
||
| make tcp_steam WITH_TCPDEVMEM_UDMABUF=1 HEADERS_DIR=~/kernel/usr/include | ||
| ``` | ||
|
|
||
|
|
||
| ## Running tcp_stream | ||
|
|
||
|
|
||
| ### Added flags | ||
|
|
||
| In general, these flags will be automatically populated by `multi_neper.py`. | ||
|
|
||
| ``` | ||
| --tcpd-validate # payload validation - must pass to both Tx/Rx if enabled | ||
| --tcpd-tcpd-rx-cpy # copies payload to another buffer (but doesn't validate) | ||
| --tcpd-nic-pci-addr | ||
| --tcpd-gpu-pci-addr | ||
| --tcpd-phys-len # CUDA mode allows for a much larger value than UDMABUF mode | ||
| --tcpd-src-ip | ||
| --tcpd-dst-ip | ||
| --tcpd-link-name | ||
| --queue-start | ||
| --queue-num | ||
| ``` | ||
|
|
||
| `--tcpd-validate`: Client populates the send buffer with [1,111] repeating, and Host verifies the repeating sequence. | ||
|
|
||
|
|
||
| ### Running tcp_stream via `multi_neper.py` | ||
|
|
||
| `multi_neper.py` is a python script that runs in parallel multiple tcp_streams, which is useful when running tcp_stream across multiple pairs of NICs. | ||
|
|
||
| The script also calls ethtool commands on the receiver (host) before spawning tcp_streams, to set the receiver into a TCPDevmem-capable state. | ||
|
|
||
| To view all of `multi_neper.py`’s accepted flags, run `multi_neper.py --help`. | ||
|
|
||
|
|
||
| ``` | ||
| # Rx (host) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rx (server)?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Neper refers to machines receiving traffic as host ( |
||
| FLOWS=2 | ||
| BUF_SIZE=409600 | ||
| DEVS=eth1,eth2,eth3,eth4 | ||
| DSTS=192.168.1.26,192.168.2.26,192.168.3.26,192.168.4.26 # host IP addresses | ||
| SRCS=192.168.1.23,192.168.2.23,192.168.3.23,192.168.4.23 # client IP addresses | ||
| ./multi_neper.py --hosts $DSTS \ | ||
| --devices $DEVS --buffer-size $BUF_SIZE \ | ||
| --flows $FLOWS --threads $FLOWS \ | ||
| --src-ips $SRCS --log DEBUG \ | ||
| --q-num $FLOWS --phys-len 2147483648 \ | ||
| --mode cuda | ||
|
|
||
|
|
||
| # Tx (client) | ||
| FLOWS=2 | ||
| BUF_SIZE=409600 | ||
| DEVS=eth1,eth2,eth3,eth4 | ||
| DSTS=192.168.1.26,192.168.2.26,192.168.3.26,192.168.4.26 | ||
| SRCS=192.168.1.23,192.168.2.23,192.168.3.23,192.168.4.23 | ||
| ./multi_neper.py --hosts $DSTS \ | ||
| --devices $DEVS --buffer-size $BUF_SIZE \ | ||
| --flows $FLOWS --threads $FLOWS \ | ||
| --src-ips $SRCS --log DEBUG \ | ||
| --q-num $FLOWS --phys-len 2147483648 \ | ||
| --client \ | ||
| --mode cuda | ||
| ``` | ||
|
|
||
| #### Example of successful output | ||
|
|
||
| ``` | ||
| DEBUG:root:minflt_end=6037 | ||
| DEBUG:root:majflt_start=0 | ||
| DEBUG:root:majflt_end=0 | ||
| DEBUG:root:nvcsw_start=653 | ||
| DEBUG:root:nvcsw_end=675141 | ||
| DEBUG:root:nivcsw_start=2 | ||
| DEBUG:root:nivcsw_end=1018 | ||
| DEBUG:root:num_samples=155 | ||
| DEBUG:root:time_end=613529.729042674 | ||
| DEBUG:root:correlation_coefficient=1.00 | ||
| DEBUG:root:throughput=193669.32 | ||
| DEBUG:root:throughput_units=Mbit/s | ||
| DEBUG:root:local_throughput=193669323769 | ||
| DEBUG:root:remote_throughput=0 | ||
| DEBUG:root: | ||
| [eth1] Throughput (Mb/s): 193551.94 | ||
| [eth2] Throughput (Mb/s): 193652.69 | ||
| [eth3] Throughput (Mb/s): 193640.21 | ||
| [eth4] Throughput (Mb/s): 193669.32 | ||
| ``` | ||
|
|
||
|
|
||
|
|
||
| ### Running tcp_stream directly | ||
|
|
||
| **If you’re running Neper outside of the container, make sure to run** | ||
|
|
||
| ``` | ||
| sudo -s | ||
| ``` | ||
|
|
||
| **before everything. `ethtool` commands and queue-binding is only available to superuser.** | ||
|
|
||
| Before running tcp_stream, the ethtool commands that `multi_neper.py` runs should also be run: | ||
|
|
||
| ``` | ||
| # run as superuser, if running Neper as root | ||
| sudo -s | ||
|
|
||
| res_link() { | ||
| ethtool --set-priv-flags $1 enable-strict-header-split on | ||
| ethtool --set-priv-flags $1 enable-strict-header-split off | ||
| ethtool --set-priv-flags $1 enable-header-split off | ||
| ethtool --set-rxfh-indir $1 equal 16 | ||
| ethtool -K $1 ntuple off | ||
| ethtool --set-priv-flags $1 enable-strict-header-split off | ||
| ethtool --set-priv-flags $1 enable-header-split off | ||
| ethtool -K $1 ntuple off | ||
| ethtool --set-priv-flags $1 enable-max-rx-buffer-size on | ||
| ethtool -K $1 ntuple on | ||
| } | ||
|
|
||
| # call on each link you plan to run tcp_stream across | ||
| res_link eth1 | ||
| ``` | ||
|
|
||
|
|
||
| You can then run `multi_neper.py` with the `--dry-run` flag, to see what tcp_stream commands the script would run: | ||
|
|
||
|
|
||
| ``` | ||
| $ FLOWS=1 | ||
| $ BUF_SIZE=409600 | ||
| $ DEVS=eth1 | ||
| $ DSTS=192.168.1.26 | ||
| $ SRCS=192.168.1.23 | ||
| $ ./multi_neper.py --hosts $DSTS \ | ||
| --devices $DEVS --buffer-size $BUF_SIZE \ | ||
| --flows $FLOWS --threads $FLOWS \ | ||
| --src-ips $SRCS --log DEBUG \ | ||
| --q-num $FLOWS --phys-len 2147483648 \ | ||
| --client \ | ||
| --mode cuda \ | ||
| --dry-run | ||
|
|
||
| DEBUG:root:running on ['eth1'] | ||
| DEBUG:root:('taskset --cpu-list 2-2 ./tcp_stream -T 1 -F 1 --port 12345 --source-port 12345 --control-port 12866 --buffer-size 409600 -l 10 --num-ports 1 --tcpd-phys-len 2147483648 --tcpd-nic-pci-addr 0000:06:00.0 --tcpd-gpu-pci-addr 0000:04:00.0 -c -H 192.168.1.26', {'CUDA_VISIBLE_DEVICES': '0', ... | ||
| ``` | ||
|
|
||
| The script will print the tcp_stream command, as well as the environment variables. The only environment variable that matters is `CUDA_VISIBLE_DEVICES` if running in `cuda` mode, which tells tcp_stream which GPU it should allocate memory on. | ||
|
|
||
| You can then reset the receiver, and copy/paste the command: | ||
|
|
||
| ``` | ||
| # on Rx (host) | ||
| res_link eth1 | ||
| ./multi_neper.py --dry-run ${other_rx_args} | ||
|
|
||
| CUDA_VISIBLE_DEVICES=0 ./tcp_stream # copy cmd from previous line | ||
|
|
||
|
|
||
| # on Tx (client) | ||
| ./multi_neper.py --dry-run ${other_tx_args} | ||
|
|
||
| CUDA_VISIBLE_DEVICES=0 ./tcp_stream # copy cmd from previous line | ||
| ``` | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -103,6 +103,34 @@ void check_options_tcp_rr(struct options *opts, struct callbacks *cb) | |
|
|
||
| void check_options_tcp_stream(struct options *opts, struct callbacks *cb) | ||
| { | ||
| #ifdef WITH_TCPDEVMEM_CUDA | ||
| if (opts->tcpd_gpu_pci_addr) { | ||
| CHECK(cb, opts->tcpd_nic_pci_addr, | ||
| "Must provide NIC PCI address if GPU PCI address was provided."); | ||
|
|
||
| if (opts->client) { | ||
| CHECK(cb, !opts->tcpd_rx_cpy, | ||
| "Copying CUDA buffer to userspace only allowed on hosts."); | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. receivers (as opposed to clients/senders) |
||
| } | ||
| } | ||
| #endif /* WITH_TCPDEVMEM_CUDA */ | ||
| #if defined(WITH_TCPDEVMEM_CUDA) || defined(WITH_TCPDEVMEM_UDMABUF) | ||
| if (opts->tcpd_nic_pci_addr) { | ||
| CHECK(cb, opts->tcpd_phys_len > 0, | ||
| "Must provide non-zero --tcpd-phys-len flag when running in devmem TCP mode."); | ||
| CHECK(cb, opts->num_flows == opts->num_threads, | ||
| "Thread/Flow count must be equal when running in devmem TCP mode."); | ||
| CHECK(cb, opts->num_flows == opts->num_ports, | ||
| "Number of ports should equal number of flows when running in devmem TCP mode."); | ||
|
|
||
| if (!opts->client) { | ||
| CHECK(cb, opts->tcpd_src_ip, | ||
| "Must provide source IP address for devmem TCP host."); | ||
| CHECK(cb, opts->tcpd_dst_ip, | ||
| "Must provide destination IP address for devmem TCP host."); | ||
| } | ||
| } | ||
| #endif /* WITH_TCPDEVMEM_CUDA || WITH_TCPDEVMEM_UDMABUF */ | ||
| } | ||
|
|
||
| void check_options_udp_rr(struct options *opts, struct callbacks *cb) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.