2024CISCN初赛Easy-VT复现

2024CISCN初赛Easy-VT复现

5m10v3

这道题当时国赛的时候是0解,但是前一阵子看几个师傅写的文章确实不错,因此就探索一下其中的奥秘,笔者将会采用两种方式来解决这道题

逆向分析

第一步进行查壳,其实当时拿到这个赛题的时候就在想Themida,VMP这些强壳都上了还玩鸡毛,但是要想的是这只是CTF比赛肯定不会这么变态

我们可以通过Segments窗口查看一下那些新增的节,发现只是徒有虚名,这些只是混淆选手,那些新增的例如.vmp都没有东西

方法一:利用脚本去除混淆

发现有很多类型的花指令,这里做个总结:

jz +1

这个指令就是跳过了一个字节,它的特征码为0F 84 01 00 00 00 00

jz+2

与上面jz+1同理

jz+8

jge+6

此处花指令的特征码为0F 8D 06 00 00 00

jmp+6

这里的特征码就是E9 06 00 00 00 00这里多加一个00

jmp -23

这个花指令很有意思啊他是往回跳,EB E9 往回跳了23个字节

cmp reg,0E9h

这里的cmp al,0E9h 是一个无效的比较,后面不会因为标志位的改变而跳转到哪

另外的几个就是单独几处有问题,我们可以单独列出

跳转无效地址,这里其实还可以留个心眼,交叉引用一下这里的地址看看有没有跳转到这里的,在4166D0这里存在跳转

41878D处会影响sub_418779这个函数的生成

去花脚本编写

然后我们idapython去花脚本的思路呢就是遍历每一个段,然后根据特征码然后再去patch,这里笔者呢根据其它的师傅了解了一个新的指令

UD2

含义如下:

undefined instruction. 该指令用于生成一个无效操作码。当CPU试图执行无效或未定义的操作码时,将发生无效的操作码异常。UD2指令除了引发无效的操作码异常外,与NOP指令相同。

1
2
3
4
5
这里的异常是指CPU在发生“错误”时生成的。大多数情况下,有些异常并不是真正的错误,而是中断的一种类型。比如: Page Fault。异常分类如下:

Faults (错误):这些错误可以被纠正,程序可以像什么都没有发生一样继续运行。
Traps (陷阱):陷阱指令执行后会立即被报告。
Aborts (终止):一些严重的不可恢复的错误。

其实就是比nop指令多了处理异常的能力

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import idc
import idautils
import ida_search
import ida_segment
seg=0
cur_ea=0
ea=0
def main():
for seg in idautils.Segments():
seg_name = idc.get_segm_name(seg)
if seg_name!='.text' and seg_name!='UPX0':
continue
print("正在处理段: %s (0x%x)"%(seg_name,seg))
start_ea = seg
end_ea = idc.get_segm_end(seg)

cur_ea = start_ea
pattern1 = "0F 84 01 00 00 00"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern1, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern1 at 0x%x"%ea)
idc.patch_byte(ea+6,0x90)
cur_ea = ea+1

cur_ea = start_ea
pattern2 = "0F 84 02 00 00 00"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern2, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern2 at 0x%x"%ea)
idc.patch_word(ea+6,0x9090)
cur_ea = ea+1

cur_ea = start_ea
pattern3 = "0F 84 08 00 00 00"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern3, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern3 at 0x%x"%ea)
idc.patch_qword(ea+6,0x9090909090909090)
cur_ea = ea+1

cur_ea = start_ea
pattern4 = "0F 8D 06 00 00 00"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern4, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern4 at 0x%x"%ea)
idc.patch_word(ea+6,0x0B0F) #ud2
idc.patch_dword(ea+8,0x90909090)
cur_ea = ea+1

cur_ea = start_ea
pattern5 = "E9 06 00 00 00 00"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern5, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern5 at 0x%x"%ea)
idc.patch_word(ea+5,0x0B0F)
idc.patch_dword(ea+7,0x90909090)
cur_ea = ea+1

cur_ea = start_ea
pattern6 = "EB E9"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern6, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern6 at 0x%x"%ea)
idc.patch_word(ea,0x0B0F)
idc.patch_dword(ea+2,0x90909090)
cur_ea = ea+1

cur_ea = start_ea
pattern7 = "3C E9"
while(True):
ea = ida_search.find_binary(cur_ea,end_ea,pattern7, 16, ida_search.SEARCH_DOWN)
if ea > end_ea or ea == idc.BADADDR:
break
print("Found Pattern7 at 0x%x"%ea)
idc.patch_word(ea+2,0x0B0F)
idc.patch_word(ea+4,0x9090)
cur_ea = ea+1
idc.patch_word(0x413824,0x0B0F)
idc.patch_word(0x41878D, 0x0B0F)
idc.patch_qword(0x41878F, 0x9090909090909090)
idc.patch_byte(0x418797, 0x90)


if __name__ == '__main__':
main()
print("Finshed!!!!")

运行完去花脚本,然后就保存为vt_patched.exe,然后打开之后我们发现大部分的函数都显示出来了,然后可以定位到sub_4098F0为我们的main函数

在main函数里面我们发现有很多这样的变量,发现他们只被读取了,没有写入,因此我们可以定义变量byte0-9

然后我们就会发现在程序里面有很多永真永假跳转,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import idc
import idautils
import ida_bytes

def main():
# 遍历段
for seg in idautils.Segments():
# 获取段名称
sgm_name = idc.get_segm_name(seg)
if sgm_name != ".text" and sgm_name != "UPX0":
continue
print("正在处理段: %s (0x%x)" % (sgm_name, seg))

start_ea = seg
end_ea = idc.get_segm_end(seg)

# 遍历每个段的每一条指令
cur_ea = start_ea
while cur_ea < end_ea and cur_ea != idc.BADADDR:
# 获取指令的助记符和操作数
mnemonic = idc.print_insn_mnem(cur_ea)
op1 = idc.print_operand(cur_ea, 0)
op2 = idc.print_operand(cur_ea, 1)

# 检测指令,只处理byte0-9
if mnemonic == "movsx" and op2.startswith("cs:byte"):
try:
index = int(op2[7:]) # 提取byte后面的数字
if 0 <= index <= 9:
if op1 == "eax":
ida_bytes.patch_byte(cur_ea, 0xB8)
elif op1 == "ecx":
ida_bytes.patch_byte(cur_ea, 0xB9)
elif op1 == "edx":
ida_bytes.patch_byte(cur_ea, 0xBA)

ida_bytes.patch_dword(cur_ea + 1, index)
ida_bytes.patch_word(cur_ea + 5, 0x9090)
idc.create_insn(cur_ea)
except ValueError:
pass

# 获取下一条指令
next_ea = idc.next_head(cur_ea, end_ea)
if next_ea == idc.BADADDR or next_ea <= cur_ea:
break
cur_ea = next_ea

if __name__ == '__main__':
main()
print("Finished!!!!")

然后我们在往下分析的时候发现了很多语义混淆的地方,这里有三处引用最多的函数,分别是sub_41307D,sub_413162,sub_4131D6,因为我们知道函数的返回值一般存储在rax/eax中,因此我们可以改成与rax/eax的赋值有关的,sub_41307D()是返回一个参数我们可以命名ret_arg1,sub_413162()是返回一个数字1我们可以命名为ret_1,sub_4131D6()这个函数最后是返回传入的第二个参数因此可以命名为ret_arg2

去除语义混淆脚本编写

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import idc
import idautils
def main():
for seg in idautils.Segments():
sgm_name = idc.get_segm_name(seg)
if sgm_name != ".text" and sgm_name != "UPX0":
continue
print("正在处理段: %s (0x%x)" % (sgm_name, seg))
start_ea = seg
end_ea = idc.get_segm_end(seg)
cur_ea = start_ea
op1=0
while start_ea < end_ea and cur_ea != idc.BADADDR:
#获取指令的助记符和操作数
mnemonic = idc.print_insn_mnem(cur_ea)
op1 = idc.print_operand(cur_ea,0)
if mnemonic == "call":
if op1 == "ret_arg1":
print("Target Ins at 0x%X (call ret_arg1)"%(cur_ea))
#REX.W=1(0x48)表示指令操作 64 位数据
idc.patch_byte(cur_ea, 0x48); # REX.W 前缀
idc.patch_byte(cur_ea + 1, 0x89) # mov 操作码
idc.patch_byte(cur_ea + 2, 0xC8) # modrm: mov r/m64, r64 (rax = rcx)


idc.patch_byte(cur_ea + 3, 0x90) # nop
idc.patch_byte(cur_ea + 4, 0x90) # nop

idc.create_insn(cur_ea)
elif op1 == "ret_1":
print("Target Ins at 0x%X (call ret_1)"%(cur_ea))
#mov eax,1(5个字节)
idc.patch_byte(cur_ea,0xB8)
idc.patch_dword(cur_ea+1,1)
idc.create_insn(cur_ea)

elif op1 == "ret_arg2":
print("Target Ins at 0x%X (call ret_arg2)"%(cur_ea))
#REX.W=1(0x48)表示指令操作 64 位数据
idc.patch_byte(cur_ea, 0x48); # REX.W 前缀
idc.patch_byte(cur_ea + 1, 0x89) # mov 操作码
idc.patch_byte(cur_ea + 2, 0xD0) # modrm: mov r/m64, r64 (rax = rcx)


idc.patch_byte(cur_ea + 3, 0x90) # nop
idc.patch_byte(cur_ea + 4, 0x90) # nop

idc.create_insn(cur_ea)
# 获取下一条指令
next_ea = idc.next_head(cur_ea, end_ea)
if next_ea == idc.BADADDR or next_ea <= cur_ea:
break
cur_ea = next_ea



if __name__ == "__main__":
main()

方法二:动态调试

正在编写中。。。。。。。。。

总结:

笔者复现这个是为了学习一个师傅们去混淆的一种思考方式,以及编写去混淆脚本

参考文章

https://www.52pojie.cn/thread-2008309-1-1.html

https://blog.hxzzz.asia/archives/362/

  • Title: 2024CISCN初赛Easy-VT复现
  • Author: 5m10v3
  • Created at : 2025-03-20 00:00:00
  • Updated at : 2025-03-31 10:44:00
  • Link: https://redefine.ohevan.com/2025/03/20/2024CISCN初赛Easy-VT复现/
  • License: This work is licensed under CC BY-NC-SA 4.0.
Comments