base大家族与逆向分析 base在逆向中也算常见的加密了
算法分析 下面讲解base64和32和16
base64 以下内容为wiki解释
Base64 是一种基于64个可打印字符来表示二进制数据的表示方法。由于2^6=64,所以每6个比特为一个单元,对应某个可打印字符。3个字节有24个比特,对应于4个Base64单元,即3个字节可由4个可打印字符来表示。它可用来作为电子邮件的传输编码。在Base64中的可打印字符包括字母A-Z、a-z、数字0-9,这样共有62个字符,此外两个可打印符号在不同的系统中而不同。一些如uuencode的其他编码方法,和之后BinHex的版本使用不同的64字符集来代表6个二进制数字,但是不被称为Base64。
Base64常用于在通常处理文本数据的场合,表示、传输、存储一些二进制数据,包括MIME的电子邮件及XML的一些复杂数据。
编码规则
第一步,将每三个字节作为一组,一共是24个二进制位 第二步,将这24个二进制位分为四组,每个组有6个二进制位 第三步,在每组前面加两个00,扩展成32个二进制位,即四个字节 第四步,根据下表,得到扩展后的每个字节的对应符号,这就是Base64的编码值
如果要编码的字节数不能被3整除,最后会多出1个或2个字节,那么可以使用下面的方法进行处理:先使用0字节值在末尾补足,使其能够被3整除,然后再进行Base64的编码。在编码后的Base64文本后加上一个或两个=号,代表补足的字节数。也就是说,当最后剩余两个八位字节(2个byte)时,最后一个6位的Base64字节块有四位是0值,最后附加上两个等号;如果最后剩余一个八位字节(1个byte)时,最后一个6位的base字节块有两位是0值,最后附加一个等号。 如下表:
C语言实现
#include <stdio.h> #include "string.h" #include "stdlib.h" const char base[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" ; static char find_pos (char ch) ;char *base64_encode (const char * data, int data_len,int *len) ;char *base64_decode (const char * data, int data_len,int *len) ; static char find_pos (char ch) { char *ptr = (char *)strrchr (base, ch); return (ptr - base); } char *base64_encode (const char * data, int data_len,int *len) { int prepare = 0 ; int ret_len; *len=0 ; int temp = 0 ; char *ret = NULL ; char *f = NULL ; int tmp = 0 ; char changed[4 ]; int i = 0 ; ret_len = data_len / 3 ; temp = data_len % 3 ; if (temp > 0 ) { ret_len += 1 ; } ret_len = ret_len*4 + 1 ; ret = (char *)malloc (ret_len); if ( ret == NULL ) { printf ("No enough memory.n" ); exit (0 ); } memset (ret, 0 , ret_len); f = ret; while (tmp < data_len) { temp = 0 ; prepare = 0 ; memset (changed, 0 , 4 ); while (temp < 3 ) { if (tmp >= data_len) { break ; } prepare = ((prepare << 8 ) | (data[tmp] & 0xFF )); tmp++; temp++; } prepare = (prepare<<((3 -temp)*8 )); for (i = 0 ; i < 4 ;i++ ) { if (temp < i) { changed[i] = 0x40 ; } else { changed[i] = (prepare>>((3 -i)*6 )) & 0x3F ; } *f = base[changed[i]]; f++; (*len)++; } } *f = ''; return ret; } /** *BASE64解码 */ char *base64_decode(const char *data, int data_len,int *len) { int ret_len = (data_len / 4) * 3+1; int equal_count = 0; char *ret = NULL; char *f = NULL; *len=0; int tmp = 0; int temp = 0; char need[3]; int prepare = 0; int i = 0; if (*(data + data_len - 1) == ' =') { equal_count += 1; } if (*(data + data_len - 2) == ' =') { equal_count += 1; } ret = (char *)malloc(ret_len); if (ret == NULL) { printf("No enough memory.n"); exit(0); } memset(ret, 0, ret_len); f = ret; while (tmp < (data_len - equal_count)) { temp = 0; prepare = 0; memset(need, 0, 4); while (temp < 4) { if (tmp >= (data_len - equal_count)) { break; } prepare = (prepare << 6) | (find_pos(data[tmp])); temp++; tmp++; } prepare = prepare << ((4-temp) * 6); for (i=0; i<3 ;i++ ) { if (i == temp) { break; } *f = (char)((prepare>>((2-i)*8)) & 0xFF); f++; (*len)++; } } *f = ' '; if(data[data_len-1]==' =') { (*len)--; } /* while(*(--f)==' ') { (*len)--; } */ return ret; } int main(){ char *former = "hello"; int len1,len2; printf("%sn",former); char *after = base64_encode(former, 5,&len1); printf("%d %sn",len1,after); former = base64_decode(after, len1,&len2); printf("%d %sn",len2,former); }
base32 Base32这种数据编码机制,主要用来把二进制数据编码成可见的字符串,其编码规则是:任意给定一个二进制数据,以5个位(bit)为一组进行切分(base64以6个位(bit)为一组),对切分而成的每个组进行编码得到1个可见字符。Base32编码表字符集中的字符总数为25=32个,这也是Base32名字的由来。下面是Base32的table
C语言实现
#include <stdio.h> #include <string.h> #include <stdlib.h> static const char base32_alphabet[32 ] = { '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'j' , 'k' , 'm' , 'n' , 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z' , }; int find_number (char m) { int i; for (i = 0 ; i < 32 ; ++i) { if (m == base32_alphabet[i]) return i; } return -1 ; } char * base32_encode (char *bin_source) { int i; int j = 0 ; static char str[10 ]; for (i=0 ;i<strlen (bin_source);++i){ if ((i+1 )%5 ==0 ){ j++; int num = (bin_source[i]-'0' )+(bin_source[i-1 ]-'0' )*2 \ +(bin_source[i-2 ]-'0' )*2 *2 +(bin_source[i-3 ]-'0' )*2 *2 *2 \ +(bin_source[i-4 ]-'0' )*2 *2 *2 *2 ; str[j-1 ] = base32_alphabet[num]; } } return str; } int * base32_decode (char *str_source) { int i,j; static int dec[50 ]; int count=0 ; for (i=0 ;i<strlen (str_source);++i){ for (j=5 -1 ;j>=0 ;--j){ count++; dec[count-1 ] = find_number(str_source[i])>>(j%5 )&1 ; } } return dec; }
base16 先拖着,没见过base16的逆向
逆向分析 运算规则
base64编码是用64(2的6次方)个ASCII字符来表示256(2的8次方)个ASCII字符,也就是三位二进制数组经过编码后变为四位的ASCII字符显示,长度比原来增加1/3。
同样,base32就是用32(2的5次方)个特定ASCII码来表示256个ASCII码。所以,5个ASCII字符经过base32编码后会变为8个字符(公约数为40),长度增加3/5.不足8n用“=”补足。
base16就是用16(2的4次方)个特定ASCII码表示256个ASCII字符。1个ASCII字符经过base16编码后会变为2个字符,长度增加一倍。不足2n用“=”补足
来几道题
2018巅峰极客网络安全技能挑战赛 RE(1) Simple Base-N 找不到原文件就不写了
大佬的WP
2017第二届广东省强网杯线上赛–Nonstandard (base32换表) 文件
int __cdecl main (int argc, const char **argv, const char **envp) { FILE *v3; FILE *v4; FILE *v5; char Buffer[16 ]; __int64 v8; int v9; __int16 v10; v9 = 0 ; *(_OWORD *)Buffer = 0 i64; v10 = 0 ; v8 = 0 i64; v3 = _iob_func(); fputs ("Place Input Flag:\n" , v3 + 1 ); v4 = _iob_func(); fgets(Buffer, 29 , v4); if ( sub_401480(Buffer) == 1 ) { v5 = _iob_func(); fputs ("yes\n" , v5 + 1 ); } return 0 ; }
接着看sub_401480函数
int __thiscall sub_401480 (const char *this ) { const char *v2; unsigned int v3; unsigned int v4; int result; char Destination; char v7[49 ]; Destination = 0 ; memset (v7, 0 , sizeof (v7)); if ( strlen (this ) != 28 ) goto LABEL_7; v2 = (const char *)sub_401070(this ); strncpy_s(&Destination, 0x32 u, v2, 0x30 u); v3 = 0 ; v4 = strlen (&Destination); if ( !v4 ) goto LABEL_7; do { if ( aNadtxa66nbbdxa[v3] != *(&Destination + v3) ) break ; ++v3; } while ( v3 < v4 ); if ( v3 == 48 ) result = 1 ; else LABEL_7: result = -1 ; return result; }
70函数
_BYTE *__fastcall sub_401070 (int a1, unsigned int a2) { int v2; unsigned int v3; int v4; unsigned int v5; unsigned int v6; _BYTE *result; unsigned int v8; int v9; __int64 v10; int v11; __int64 v12; int v13; __int64 v14; int v15; __int64 v16; int v17; __int64 v18; __int64 v19; unsigned int v20; signed int v21; _BYTE *v22; unsigned int v23; unsigned int v24; _BYTE *v26; v2 = a1; sub_401000(); v3 = 0 ; v4 = 0 ; v23 = 0 ; v5 = 0 ; v21 = 0 ; v6 = 0 ; if ( a2 ) { do { if ( !*(_BYTE *)(v6 + v2) ) break ; ++v6; v5 += 8 ; ++v3; } while ( v6 < a2 ); v23 = v3; } switch ( v5 % 0x28 ) { case 8u : v4 = 6 ; goto LABEL_10; case 0x10 u: v4 = 4 ; goto LABEL_10; case 0x18 u: v4 = 3 ; goto LABEL_10; case 0x20 u: v4 = 1 ; LABEL_10: v21 = v4; break ; default : break ; } v24 = (8 * v3 + 4 ) / 5 ; result = calloc (__CFADD__(v4 + v24, 1 ) ? -1 : v4 + v24 + 1 , 1u ); v26 = result; if ( result ) { v8 = 0 ; if ( v3 ) { v22 = result; do { v9 = *(unsigned __int8 *)(v8 + v2); ++v8; v10 = v9; if ( v8 >= v3 ) { v11 = 0 ; } else { v11 = *(unsigned __int8 *)(v8 + v2); ++v8; } v12 = v11; if ( v8 >= v3 ) { v13 = 0 ; } else { v13 = *(unsigned __int8 *)(v8 + v2); ++v8; } v14 = v13; if ( v8 >= v3 ) { v15 = 0 ; } else { v15 = *(unsigned __int8 *)(v8 + v2); ++v8; } v16 = v15; if ( v8 >= v3 ) { v17 = 0 ; } else { v17 = *(unsigned __int8 *)(v8 + v2); ++v8; } v18 = v17; HIDWORD(v19) = HIDWORD(v14); LODWORD(v19) = (((v10 & 0xFFFFFFF8 ) << 8 ) + (v12 & 0xFFFFFFC0 | ((unsigned __int8)(v10 & 7 ) << 8 )) + (v12 & 0x3E )) << 8 ; v20 = ((v18 & 0x1F ) + __PAIR64__(HIDWORD(v18), v18 & 0xFFFFFFE0 | ((v16 & 3 ) << 8 )) + ((__PAIR64__(HIDWORD(v16), v16 & 0xFFFFFF80 | ((v14 & 0xF ) << 8 )) + ((__PAIR64__( (((v10 & 0xFFFFFFFFFFFFFFF8 ui64) << 8 ) + __PAIR64__(HIDWORD(v12), v12 & 0xFFFFFFC0 | ((v10 & 7 ) << 8 )) + (v12 & 0x3E )) >> 24 , v14 & 0xFFFFFFF0 | ((v12 & 1 ) << 8 )) + v19) << 8 ) + (v16 & 0x7C )) << 8 )) >> 32 ; HIDWORD(v18) = (v18 & 0x1F ) + (v18 & 0xFFFFFFE0 | ((v16 & 3 ) << 8 )) + (((v16 & 0xFFFFFF80 | ((unsigned __int8)(v14 & 0xF ) << 8 )) + (((v14 & 0xFFFFFFF0 | ((unsigned __int8)(v12 & 1 ) << 8 )) + (_DWORD)v19) << 8 ) + (v16 & 0x7C )) << 8 ); *v22 = byte_403020[(unsigned __int8)v20 >> 3 ]; v22[1 ] = byte_403020[(__PAIR64__(v20, HIDWORD(v18)) >> 30 ) & 0x1F ]; v22[2 ] = byte_403020[(__PAIR64__(v20, HIDWORD(v18)) >> 25 ) & 0x1F ]; v22[3 ] = byte_403020[(__PAIR64__(v20, HIDWORD(v18)) >> 20 ) & 0x1F ]; v22[4 ] = byte_403020[(__PAIR64__(v20, HIDWORD(v18)) >> 15 ) & 0x1F ]; v22[5 ] = byte_403020[(__PAIR64__(v20, HIDWORD(v18)) >> 10 ) & 0x1F ]; LOBYTE(v19) = __PAIR64__(v20, HIDWORD(v18)) >> 5 ; v3 = v23; v22[6 ] = byte_403020[v19 & 0x1F ]; LOBYTE(v19) = byte_403020[BYTE4(v18) & 0x1F ]; v2 = a1; v22[7 ] = v19; v22 += 8 ; } while ( v8 < v23 ); result = v26; } if ( v21 > 0 ) memset (&result[v24], 0x3D u, v21); v26[v24 + v21] = 0 ; result = v26; } return result; }
这是一堆什么,发现
LODWORD(v16) = (((v23 & 0xFFFFFFF8) << 8) + (v18 & 0xFFFFFFC0 | ((v23 & 7) << 8)) + (v18 & 0x3E)) << 8;
v24[1 ] = byte_403020[(__PAIR__(v17, HIDWORD(v14)) >> 30 ) & 0x1F ]; v24[2 ] = byte_403020[(__PAIR__(v17, HIDWORD(v14)) >> 25 ) & 0x1F ]; v24[3 ] = byte_403020[(__PAIR__(v17, HIDWORD(v14)) >> 20 ) & 0x1F ]; v24[4 ] = byte_403020[(__PAIR__(v17, HIDWORD(v14)) >> 15 ) & 0x1F ]; v24[5 ] = byte_403020[(__PAIR__(v17, HIDWORD(v14)) >> 10 ) & 0x1F ];
每次取5个比特,分别赋给8个值,每个值5个位 ,这是base32
aNadtxa66nbbdxa数组是nAdtxA66nbbdxA71tUAE2AOlnnbtrAp1nQzGtAQGtrjC7===
在sub_401000,点进去
__int16 sub_401000 () { int i; int v1; char *v2; char v3; for ( i = 1 ; i < 26 ; i += 2 ) byte_403020[i] += 32 ; v1 = 0 ; v2 = &aMnopqrstuvwxyz[13 ]; do { v3 = byte_40301F[++v1]; byte_40301F[v1] = *v2; *v2-- = v3; } while ( (int )v2 > (int )aMnopqrstuvwxyz ); strcpy (&aMnopqrstuvwxyz[14 ], "765321" ); return 12594 ; }
这里是重新修改的密码表了,
字母倒序
奇数小写偶数大写
后面又加入765321 所以表的顺序是
zYxWvUtSrQpOnMlKjIhGfEdCbA765321
s = "nAdtxA66nbbdxA71tUAE2AOlnnbtrAp1nQzGtAQGtrjC7===" table = "zYxWvUtSrQpOnMlKjIhGfEdCbA765321" def find (x ): if (x=='=' ): return 0 return table.index(x) for i in range (len (s)//8 ): p = s[i*8 :i*8 +8 ] t = 0 for j in p: t = t<<5 t += find(j) for j in range (5 ): print (chr ((t&0xff00000000 )>>32 ), end='' ) t = t<<8
得到
flag{f1ag_1s_enc0de_bA3e32!}
魔改Base 经过上面的分析也就可以知道Base中可变的几个部分
table
移位数据变化
从例子可以看出决定了题中Base-N的N是多少的是移位个数和移位距离
组合加密
很多题都会通过组合加密的方式来提升题目难度,Base中应该就是加密密文和table了
参考链接