Skip to content

Commit 6c8a96f

Browse files
suresh-srinivasjasnell
authored andcommitted
src: initial large page (2M) support
PR-URL: #22079 Reviewed-By: Gireesh Punathil <gpunathi@in.ibm.com> Reviewed-By: Denys Otrishko <shishugi@gmail.com> Reviewed-By: Refael Ackermann <refack@gmail.com>
1 parent d548e63 commit 6c8a96f

7 files changed

+370
-0
lines changed

configure.py

+24
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@
388388
dest='with_etw',
389389
help='build with ETW (default is true on Windows)')
390390

391+
parser.add_option('--use-largepages',
392+
action='store_true',
393+
dest='node_use_large_pages',
394+
help='build with Large Pages support. This feature is supported only on Linux kernel' +
395+
'>= 2.6.38 with Transparent Huge pages enabled')
396+
391397
intl_optgroup.add_option('--with-intl',
392398
action='store',
393399
dest='with_intl',
@@ -998,6 +1004,24 @@ def configure_node(o):
9981004
else:
9991005
o['variables']['node_use_dtrace'] = 'false'
10001006

1007+
if options.node_use_large_pages and flavor != 'linux':
1008+
raise Exception(
1009+
'Large pages are supported only on Linux Systems.')
1010+
if options.node_use_large_pages and flavor == 'linux':
1011+
if options.shared or options.enable_static:
1012+
raise Exception(
1013+
'Large pages are supported only while creating node executable.')
1014+
if target_arch!="x64":
1015+
raise Exception(
1016+
'Large pages are supported only x64 platform.')
1017+
# Example full version string: 2.6.32-696.28.1.el6.x86_64
1018+
FULL_KERNEL_VERSION=os.uname()[2]
1019+
KERNEL_VERSION=FULL_KERNEL_VERSION.split('-')[0]
1020+
if KERNEL_VERSION < "2.6.38":
1021+
raise Exception(
1022+
'Large pages need Linux kernel version >= 2.6.38')
1023+
o['variables']['node_use_large_pages'] = b(options.node_use_large_pages)
1024+
10011025
if options.no_ifaddrs:
10021026
o['defines'] += ['SUNOS_NO_IFADDRS']
10031027

node.gyp

+9
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,15 @@
578578
'src/tls_wrap.h'
579579
],
580580
}],
581+
[ 'node_use_large_pages=="true" and OS=="linux"', {
582+
'defines': [ 'NODE_ENABLE_LARGE_CODE_PAGES=1' ],
583+
# The current implementation of Large Pages is under Linux.
584+
# Other implementations are possible but not currently supported.
585+
'sources': [
586+
'src/large_pages/node_large_page.cc',
587+
'src/large_pages/node_large_page.h'
588+
],
589+
}],
581590
[ 'use_openssl_def==1', {
582591
# TODO(bnoordhuis) Make all platforms export the same list of symbols.
583592
# Teach mkssldef.py to generate linker maps that UNIX linkers understand.

node.gypi

+6
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,12 @@
292292
'ldflags': [ '-Wl,-z,relro',
293293
'-Wl,-z,now' ]
294294
}],
295+
[ 'OS=="linux" and target_arch=="x64" and node_use_large_pages=="true"', {
296+
'ldflags': [
297+
'-Wl,-T',
298+
'<!(realpath src/large_pages/ld.implicit.script)',
299+
]
300+
}],
295301
[ 'node_use_openssl=="true"', {
296302
'defines': [ 'HAVE_OPENSSL=1' ],
297303
'conditions': [

src/large_pages/ld.implicit.script

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
SECTIONS {
2+
.lpstub : { *(.lpstub) }
3+
}
4+
PROVIDE (__nodetext = .);
5+
PROVIDE (_nodetext = .);
6+
PROVIDE (nodetext = .);
7+
INSERT BEFORE .text;
8+

src/large_pages/node_large_page.cc

+276
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
// Copyright (C) 2018 Intel Corporation
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"),
5+
// to deal in the Software without restriction, including without limitation
6+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
7+
// and/or sell copies of the Software, and to permit persons to whom
8+
// the Software is furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included
11+
// in all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14+
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16+
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17+
// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19+
// OR OTHER DEALINGS IN THE SOFTWARE.
20+
//
21+
// SPDX-License-Identifier: MIT
22+
23+
#include <errno.h>
24+
#include <fcntl.h> // _O_RDWR
25+
#include <limits.h> // PATH_MAX
26+
#include <locale.h>
27+
#include <signal.h>
28+
#include <stdio.h>
29+
#include <stdlib.h>
30+
#include <stdint.h>
31+
#include <string.h>
32+
#include <sys/types.h>
33+
#include <sys/mman.h>
34+
#include <string>
35+
#include <fstream>
36+
#include <iostream>
37+
#include <sstream>
38+
#include <unistd.h> // readlink
39+
40+
// The functions in this file map the text segment of node into 2M pages.
41+
// The algorithm is simple
42+
// Find the text region of node binary in memory
43+
// 1: Examine the /proc/self/maps to determine the currently mapped text
44+
// region and obtain the start and end
45+
// Modify the start to point to the very beginning of node text segment
46+
// (from variable nodetext setup in ld.script)
47+
// Align the address of start and end to Large Page Boundaries
48+
//
49+
// 2: Move the text region to large pages
50+
// Map a new area and copy the original code there
51+
// Use mmap using the start address with MAP_FIXED so we get exactly the
52+
// same virtual address
53+
// Use madvise with MADV_HUGE_PAGE to use Anonymous 2M Pages
54+
// If successful copy the code there and unmap the original region.
55+
56+
extern char __nodetext;
57+
58+
namespace node {
59+
60+
struct text_region {
61+
char* from;
62+
char* to;
63+
int total_hugepages;
64+
bool found_text_region;
65+
};
66+
67+
static const size_t hps = 2L * 1024 * 1024;
68+
69+
static void PrintSystemError(int error) {
70+
fprintf(stderr, "Hugepages WARNING: %s\n", strerror(error));
71+
return;
72+
}
73+
74+
inline int64_t hugepage_align_up(int64_t addr) {
75+
return (((addr) + (hps) - 1) & ~((hps) - 1));
76+
}
77+
78+
inline int64_t hugepage_align_down(int64_t addr) {
79+
return ((addr) & ~((hps) - 1));
80+
}
81+
82+
// The format of the maps file is the following
83+
// address perms offset dev inode pathname
84+
// 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon
85+
// This is also handling the case where the first line is not the binary
86+
87+
static struct text_region FindNodeTextRegion() {
88+
std::ifstream ifs;
89+
std::string map_line;
90+
std::string permission;
91+
std::string dev;
92+
char dash;
93+
int64_t start, end, offset, inode;
94+
struct text_region nregion;
95+
96+
nregion.found_text_region = false;
97+
98+
ifs.open("/proc/self/maps");
99+
if (!ifs) {
100+
fprintf(stderr, "Could not open /proc/self/maps\n");
101+
return nregion;
102+
}
103+
104+
std::string exename;
105+
{
106+
char selfexe[PATH_MAX];
107+
ssize_t count = readlink("/proc/self/exe", selfexe, PATH_MAX);
108+
exename = std::string(selfexe, count);
109+
}
110+
111+
while (std::getline(ifs, map_line)) {
112+
std::istringstream iss(map_line);
113+
iss >> std::hex >> start;
114+
iss >> dash;
115+
iss >> std::hex >> end;
116+
iss >> permission;
117+
iss >> offset;
118+
iss >> dev;
119+
iss >> inode;
120+
if (inode != 0) {
121+
std::string pathname;
122+
iss >> pathname;
123+
if (pathname == exename && permission == "r-xp") {
124+
start = reinterpret_cast<uint64_t>(&__nodetext);
125+
char* from = reinterpret_cast<char*>(hugepage_align_up(start));
126+
char* to = reinterpret_cast<char*>(hugepage_align_down(end));
127+
128+
if (from < to) {
129+
size_t size = to - from;
130+
nregion.found_text_region = true;
131+
nregion.from = from;
132+
nregion.to = to;
133+
nregion.total_hugepages = size / hps;
134+
}
135+
break;
136+
}
137+
}
138+
}
139+
140+
ifs.close();
141+
return nregion;
142+
}
143+
144+
static bool IsTransparentHugePagesEnabled() {
145+
std::ifstream ifs;
146+
147+
ifs.open("/sys/kernel/mm/transparent_hugepage/enabled");
148+
if (!ifs) {
149+
fprintf(stderr, "Could not open file: " \
150+
"/sys/kernel/mm/transparent_hugepage/enabled\n");
151+
return false;
152+
}
153+
154+
std::string always, madvise, never;
155+
if (ifs.is_open()) {
156+
while (ifs >> always >> madvise >> never) {}
157+
}
158+
159+
int ret_status = false;
160+
161+
if (always.compare("[always]") == 0)
162+
ret_status = true;
163+
else if (madvise.compare("[madvise]") == 0)
164+
ret_status = true;
165+
else if (never.compare("[never]") == 0)
166+
ret_status = false;
167+
168+
ifs.close();
169+
return ret_status;
170+
}
171+
172+
// Moving the text region to large pages. We need to be very careful.
173+
// 1: This function itself should not be moved.
174+
// We use a gcc attributes
175+
// (__section__) to put it outside the ".text" section
176+
// (__aligned__) to align it at 2M boundary
177+
// (__noline__) to not inline this function
178+
// 2: This function should not call any function(s) that might be moved.
179+
// a. map a new area and copy the original code there
180+
// b. mmap using the start address with MAP_FIXED so we get exactly
181+
// the same virtual address
182+
// c. madvise with MADV_HUGE_PAGE
183+
// d. If successful copy the code there and unmap the original region
184+
int
185+
__attribute__((__section__(".lpstub")))
186+
__attribute__((__aligned__(hps)))
187+
__attribute__((__noinline__))
188+
MoveTextRegionToLargePages(const text_region& r) {
189+
void* nmem = nullptr;
190+
void* tmem = nullptr;
191+
int ret = 0;
192+
193+
size_t size = r.to - r.from;
194+
void* start = r.from;
195+
196+
// Allocate temporary region preparing for copy
197+
nmem = mmap(nullptr, size,
198+
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
199+
if (nmem == MAP_FAILED) {
200+
PrintSystemError(errno);
201+
return -1;
202+
}
203+
204+
memcpy(nmem, r.from, size);
205+
206+
// We already know the original page is r-xp
207+
// (PROT_READ, PROT_EXEC, MAP_PRIVATE)
208+
// We want PROT_WRITE because we are writing into it.
209+
// We want it at the fixed address and we use MAP_FIXED.
210+
tmem = mmap(start, size,
211+
PROT_READ | PROT_WRITE | PROT_EXEC,
212+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0);
213+
if (tmem == MAP_FAILED) {
214+
PrintSystemError(errno);
215+
munmap(nmem, size);
216+
return -1;
217+
}
218+
219+
ret = madvise(tmem, size, MADV_HUGEPAGE);
220+
if (ret == -1) {
221+
PrintSystemError(errno);
222+
ret = munmap(tmem, size);
223+
if (ret == -1) {
224+
PrintSystemError(errno);
225+
}
226+
ret = munmap(nmem, size);
227+
if (ret == -1) {
228+
PrintSystemError(errno);
229+
}
230+
231+
return -1;
232+
}
233+
234+
memcpy(start, nmem, size);
235+
ret = mprotect(start, size, PROT_READ | PROT_EXEC);
236+
if (ret == -1) {
237+
PrintSystemError(errno);
238+
ret = munmap(tmem, size);
239+
if (ret == -1) {
240+
PrintSystemError(errno);
241+
}
242+
ret = munmap(nmem, size);
243+
if (ret == -1) {
244+
PrintSystemError(errno);
245+
}
246+
return -1;
247+
}
248+
249+
// Release the old/temporary mapped region
250+
ret = munmap(nmem, size);
251+
if (ret == -1) {
252+
PrintSystemError(errno);
253+
}
254+
255+
return ret;
256+
}
257+
258+
// This is the primary API called from main
259+
int MapStaticCodeToLargePages() {
260+
struct text_region r = FindNodeTextRegion();
261+
if (r.found_text_region == false) {
262+
fprintf(stderr, "Hugepages WARNING: failed to find text region\n");
263+
return -1;
264+
}
265+
266+
if (r.from > reinterpret_cast<void*>(&MoveTextRegionToLargePages))
267+
return MoveTextRegionToLargePages(r);
268+
269+
return -1;
270+
}
271+
272+
bool IsLargePagesEnabled() {
273+
return IsTransparentHugePagesEnabled();
274+
}
275+
276+
} // namespace node

src/large_pages/node_large_page.h

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
// Copyright (C) 2018 Intel Corporation
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"),
5+
// to deal in the Software without restriction, including without limitation
6+
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
7+
// and/or sell copies of the Software, and to permit persons to whom
8+
// the Software is furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included
11+
// in all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
14+
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16+
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
17+
// OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
18+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19+
// OR OTHER DEALINGS IN THE SOFTWARE.
20+
//
21+
// SPDX-License-Identifier: MIT
22+
23+
#ifndef SRC_LARGE_PAGES_NODE_LARGE_PAGE_H_
24+
#define SRC_LARGE_PAGES_NODE_LARGE_PAGE_H_
25+
26+
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
27+
28+
29+
namespace node {
30+
bool IsLargePagesEnabled();
31+
int MapStaticCodeToLargePages();
32+
} // namespace node
33+
34+
#endif // NODE_WANT_INTERNALS
35+
#endif // SRC_LARGE_PAGES_NODE_LARGE_PAGE_H_

0 commit comments

Comments
 (0)