Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ jobs:
run: |
./build/api-test

- name: test lre
run: |
./build/lre-test

windows-msvc:
runs-on: ${{ matrix.config.os }}
strategy:
Expand Down Expand Up @@ -205,6 +209,9 @@ jobs:
- name: test api
run: |
build\${{matrix.config.buildType}}\api-test.exe
- name: test lre
run: |
build\${{matrix.config.buildType}}\lre-test.exe
- name: Set up Visual Studio shell
uses: egor-tensin/vs-shell@v2
with:
Expand Down Expand Up @@ -245,6 +252,9 @@ jobs:
- name: test api
run: |
build\${{matrix.buildType}}\api-test.exe
- name: test lre
run: |
build\${{matrix.buildType}}\lre-test.exe

windows-ninja:
runs-on: windows-latest
Expand Down Expand Up @@ -277,6 +287,9 @@ jobs:
- name: test api
run: |
build\api-test.exe
- name: test lre
run: |
build\lre-test.exe

windows-sdk:
runs-on: windows-latest
Expand Down Expand Up @@ -310,6 +323,9 @@ jobs:
- name: test api
run: |
build\${{matrix.buildType}}\api-test.exe
- name: test lre
run: |
build\${{matrix.buildType}}\lre-test.exe

windows-mingw:
runs-on: windows-latest
Expand Down Expand Up @@ -364,6 +380,9 @@ jobs:
- name: test api
run: |
./build/api-test
- name: test lre
run: |
./build/lre-test
windows-mingw-shared:
runs-on: windows-latest
defaults:
Expand Down Expand Up @@ -457,6 +476,10 @@ jobs:
run: |
./build/api-test

- name: test lre
run: |
./build/lre-test

openbsd:
runs-on: ubuntu-latest
steps:
Expand Down
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,13 @@ add_executable(api-test
target_compile_definitions(api-test PRIVATE ${qjs_defines})
target_link_libraries(api-test PRIVATE qjs)

add_executable(lre-test
lre-test.c
libregexp.c
libunicode.c
)
target_compile_definitions(lre-test PRIVATE ${qjs_defines})

# Unicode generator
#

Expand Down
17 changes: 11 additions & 6 deletions libregexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2250,7 +2250,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
case REOP_save_start:
case REOP_save_end:
val = *pc++;
assert(val < s->capture_count);
if (val >= s->capture_count)
return LRE_RET_BYTECODE_ERROR;
capture[2 * val + opcode - REOP_save_start] = (uint8_t *)cptr;
break;
case REOP_save_reset:
Expand All @@ -2259,7 +2260,8 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture,
val = pc[0];
val2 = pc[1];
pc += 2;
assert(val2 < s->capture_count);
if (val2 >= s->capture_count)
return LRE_RET_BYTECODE_ERROR;
while (val <= val2) {
capture[2 * val] = NULL;
capture[2 * val + 1] = NULL;
Expand Down Expand Up @@ -2552,14 +2554,14 @@ const char *lre_get_groupnames(const uint8_t *bc_buf)
return (const char *)(bc_buf + RE_HEADER_LEN + re_bytecode_len);
}

void lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped)
int lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped)
{
uint8_t *p, *pe;
uint32_t n, r, nw;

p = buf;
if (len < RE_HEADER_LEN)
abort();
return -1;

// format is:
// <header>
Expand All @@ -2574,12 +2576,14 @@ void lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped)
if (is_byte_swapped)
n = bswap32(n);
if (n > len - RE_HEADER_LEN)
abort();
return -1;

p = &buf[RE_HEADER_LEN];
pe = &p[n];

while (p < pe) {
if (*p >= REOP_COUNT)
return -1;
n = reopcode_info[*p].size;
switch (n) {
case 1:
Expand Down Expand Up @@ -2620,10 +2624,11 @@ void lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped)
inplace_bswap32(&p[13]);
break;
default:
abort();
return -1;
}
p = &p[n];
}
return 0;
}

#ifdef TEST
Expand Down
7 changes: 4 additions & 3 deletions libregexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ extern "C" {
#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
#define LRE_FLAG_UNICODE_SETS (1 << 8)

#define LRE_RET_MEMORY_ERROR (-1)
#define LRE_RET_TIMEOUT (-2)
#define LRE_RET_MEMORY_ERROR (-1)
#define LRE_RET_TIMEOUT (-2)
#define LRE_RET_BYTECODE_ERROR (-3)

uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
const char *buf, size_t buf_len, int re_flags,
Expand All @@ -59,7 +60,7 @@ int lre_exec(uint8_t **capture,
int lre_parse_escape(const uint8_t **pp, int allow_utf16);
bool lre_is_space(int c);

void lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped);
int lre_byte_swap(uint8_t *buf, size_t len, bool is_byte_swapped);

/* must be provided by the user */
bool lre_check_stack_overflow(void *opaque, size_t alloca_size);
Expand Down
73 changes: 73 additions & 0 deletions lre-test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#ifdef NDEBUG
#undef NDEBUG
#endif
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "libregexp.h"

bool lre_check_stack_overflow(void *opaque, size_t alloca_size)
{
return false;
}

int lre_check_timeout(void *opaque)
{
return 0;
}

void *lre_realloc(void *opaque, void *ptr, size_t size)
{
if (size == 0) {
free(ptr);
return NULL;
}
return realloc(ptr, size);
}

// https://github.com/quickjs-ng/quickjs/issues/1375
static void oob_save_index(void)
{
// Bytecode with REOP_save_start index=100, but capture_count=1.
// Without validation this causes a heap-buffer-overflow in lre_exec_backtrack.
uint8_t bc[] = {
0x00, 0x00, // RE_HEADER_FLAGS = 0
0x01, // RE_HEADER_CAPTURE_COUNT = 1
0x00, // RE_HEADER_STACK_SIZE = 0
0x04, 0x00, 0x00, 0x00, // RE_HEADER_BYTECODE_LEN = 4 (little-endian)
0x05, // REOP_any
0x0C, 0x64, // REOP_save_start, index=100
0x0B, // REOP_match
};

uint8_t *capture[2] = {NULL, NULL};
int ret = lre_exec(capture, bc, (const uint8_t *)"a", 0, 1, 0, NULL);
assert(ret < 0);
}

// https://github.com/quickjs-ng/quickjs/issues/1376
static void invalid_opcode_byte_swap(void)
{
// Bytecode with an opcode byte >= REOP_COUNT triggers an OOB read
// of the reopcode_info array in lre_byte_swap. Simulate the real
// big-endian deserialization path (is_byte_swapped=true) which is
// how JS_ReadRegExp calls it. The bytecode_len is stored as
// little-endian on disk, so it appears byte-swapped to a BE reader.
uint8_t bc[] = {
0x00, 0x00, // RE_HEADER_FLAGS = 0
0x01, // RE_HEADER_CAPTURE_COUNT = 1
0x00, // RE_HEADER_STACK_SIZE = 0
0x00, 0x00, 0x00, 0x01, // RE_HEADER_BYTECODE_LEN = 1 (big-endian / byte-swapped)
0x1E, // opcode 30 >= REOP_COUNT (invalid)
};

int ret = lre_byte_swap(bc, sizeof(bc), /*is_byte_swapped*/true);
assert(ret < 0);
}

int main(void)
{
oob_save_index();
invalid_opcode_byte_swap();
return 0;
}
14 changes: 14 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,20 @@ if tests.allowed()
),
)

# LRE test
test(
'lre',
executable(
'lre-test',
'lre-test.c',
'libregexp.c',
'libunicode.c',

c_args: qjs_c_args,
build_by_default: false,
),
)

# Function.toString() test
test(
'function_source',
Expand Down
48 changes: 38 additions & 10 deletions quickjs.c
Original file line number Diff line number Diff line change
Expand Up @@ -37314,13 +37314,20 @@ static int JS_WriteRegExp(BCWriterState *s, JSRegExp regexp)

JS_WriteString(s, regexp.pattern);

if (is_be())
lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/false);
if (is_be()) {
if (lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/false)) {
fail:
JS_ThrowInternalError(s->ctx, "regex byte swap failed");
return -1;
}
}

JS_WriteString(s, bc);

if (is_be())
lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/true);
if (is_be()) {
if (lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/true))
goto fail;
}

return 0;
}
Expand Down Expand Up @@ -38573,8 +38580,13 @@ static JSValue JS_ReadRegExp(BCReaderState *s)
return JS_ThrowInternalError(ctx, "bad regexp bytecode");
}

if (is_be())
lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/true);
if (is_be()) {
if (lre_byte_swap(str8(bc), bc->len, /*is_byte_swapped*/true)) {
js_free_string(ctx->rt, pattern);
js_free_string(ctx->rt, bc);
return JS_ThrowInternalError(ctx, "bad regexp bytecode");
}
}

return js_regexp_constructor_internal(ctx, JS_UNDEFINED,
JS_MKPTR(JS_TAG_STRING, pattern),
Expand Down Expand Up @@ -47532,10 +47544,18 @@ static JSValue js_regexp_exec(JSContext *ctx, JSValueConst this_val,
goto fail;
}
} else {
if (rc == LRE_RET_TIMEOUT) {
switch(rc) {
case LRE_RET_TIMEOUT:
JS_ThrowInterrupted(ctx);
} else {
break;
case LRE_RET_MEMORY_ERROR:
JS_ThrowInternalError(ctx, "out of memory in regexp execution");
break;
case LRE_RET_BYTECODE_ERROR:
JS_ThrowInternalError(ctx, "corrupted bytecode in regexp execution");
break;
default:
abort();
}
goto fail;
}
Expand Down Expand Up @@ -47722,10 +47742,18 @@ static JSValue JS_RegExpDelete(JSContext *ctx, JSValueConst this_val, JSValue ar
goto fail;
}
} else {
if (ret == LRE_RET_TIMEOUT) {
switch(ret) {
case LRE_RET_TIMEOUT:
JS_ThrowInterrupted(ctx);
} else {
break;
case LRE_RET_MEMORY_ERROR:
JS_ThrowInternalError(ctx, "out of memory in regexp execution");
break;
case LRE_RET_BYTECODE_ERROR:
JS_ThrowInternalError(ctx, "corrupted bytecode in regexp execution");
break;
default:
abort();
}
goto fail;
}
Expand Down