`
buliedian
  • 浏览: 1194419 次
  • 性别: Icon_minigender_2
  • 来自: 北京
文章分类
社区版块
存档分类
最新评论

powerbuilder P-code 中的JP跳转指令的反向工程

阅读更多

在我的decomplier中关闭JP to statement开关后,得到原始的未经过处理的跳转指令。

类似汇编中我们写的跳转。这就是高级语言和低级语言的差别。低阶语言更繁琐和趋近于机器处理过程。比如汇编中的寻址,基本就是cpu取数和处理数的一个过程。

P-code中,顺序结构倒好处理,直接把赋值,函数调用搞定也就没什么了,唯一需要处理的是这些跳转。

在汇编中,有许多种的跳转,比如等跳,不等跳等等。。但是在高级语言,如pb中,语言有bool的明确规定,所以if while等conditon部分都必须是bool类型的结构,这也就是c++中比较你写=,不写==不会发生错误的原因,你在pb中的条件判定处写=,编译器就知道是逻辑意义上的判断,而不是赋值。因为没有其他值到bool的隐式转化。所以在pb中,我们只有三种跳转,JTP,JFP,JXP,分别是“为真跳”,“为假跳”,“绝对跳”。当然只是知道这三种区别还不行,还没足够信息反向成高级语言中的statement,我们在分析代码时,顺便在前期已经将跳转时当前offset,和will to address做了比较,区分出了“往前跳”,还是“往后跳”,这样我们不会在长长的字符串中再来繁琐地判定(效率低)。

我们编写了几种结构加于分析区别,以便还原。找出其中一些规律。因为现代编程习惯中,已经废弃了goto的使用,虽然它还是关键字,但是我们约定,我们写的statement都是成块的。这样有助于我们得到一个模糊的表述。

代码 _0012: _L_ (1) = 2 _J_F_F_0032

意义: 地址 logic expression JP

我们拿两个相近似的结构来对比:

(A)

_0000:ls_note = "if ... end if"

_0012:_L_ (1) = 2 _J_F_F_0032
_0024:ll_1 = 1
_0032:

(B)

_0052:ls_note = "if ... else ... end if"
_0064:_L_ (1) = 2 _J_F_F_0088
_0076:ll_1 = 3
_0084:_J_X_F_0096
_0088:ll_1 = 3
_0096

在a结构中我们看到,可以这么描述我们即将编程的伪码:

如果存在logic(没有logic的话,那就只有goto语言产生的绝对跳转了),并且存在“往后跳”,并且遍历当前行到“跳转目标行”之间再无“logic”和“其他跳转指令”,那我们复活成一条“if ... then ... end if”语句。

在b结构中,我们描述为:

如果存在logic,并且存在“往后跳”,并且遍历当前行到“跳转目标行”之间存在“绝对往后跳”,那我们复活成“if ... then ...else ...end if”语句。

当然,这也存在嵌套问题,我想出于简化的设计思路,就没必要用递归。可以采用层层剥菜的方式,先处理内层,处理完后扫描一次即可。

就是必须把“最小的块”找到,然后先处理。

以上只是大致思路。还未具体实现,估计得耗时几天才能处理好。

//20090908:statement反向已经满意搞定,包括缩进显示,都搞好了。

//object name: lf_333.fun
global type lf_333 from lf_333
end type

forward prototypes
global subroutine lf_333() throws exception
end prototypes

global subroutine lf_333() throws exception;
//variables list
longll_1
longa
stringls_note
//global var: stringgs_id5

//expression lines: 66

gs_id5 = ""
ls_note = "if ... end if"

if 1 = 2 then
ll_1 = 1
end if

ls_note = "if .do loop unitl. end if"
1 = 2
ll_1 = 2

do
ll_1 = 8

if 1 = 2 then
ll_1 = 2
end if

loop while ll_1 = 0

ls_note = "if ... else ... end if"

if 1 = 2 then
ll_1 = 3
ll_1 = 3
ll_1 = 3
else
ll_1 = 3
ll_1 = 3
ll_1 = 3
end if

ls_note = "do while ... loop"

do while ll_1 = 0
ll_1 = 5
loop

ls_note = "do until ... loop"

do until ll_1 = 0
ll_1 = 6
loop

ls_note = "do ...loop while"

do
ll_1 = 7
loop while ll_1 = 0

ls_note = "do ...loop until"

do
ll_1 = 8
loop while ll_1 = 0

ls_note = "if if if if if end if end if end if end if"

if a = 0 then

if a = 0 then

if a = 0 then

if a = 0 then
a = 0
end if

end if

end if

end if

ls_note = "choose case.. end choose"

choose case ll_1
case 1
ll_1 = 111

case 2
ll_1 = 111

case 3
ll_1 = 111

case 4
ll_1 = 111

case 5
ll_1 = 111
end choose


choose case ll_1
case 1
ll_1 = 222

case 2 , 3 , 4 , 5
ll_1 = 222

case 2 to 5
ll_1 = 222

case is >= 3

do while 1 = 2
ll_1 = 222
loop

ll_1 = 222

case (1 ) , 2 to 5 , 5
ll_1 = 222
end choose


end function

////////////////////////////////////////////

顺便写写关键字的缩进处理。

声明结构并初始化:

const strc_INDENTATION KEY_INDENTATION[MAX_KEY_INDENTATION] = {
1,"if",2,
3,"else",4,
3,"elseif",6,
2,"end",3,
1,"for",3,
2,"next",4,
4,"choose",6, //特殊,因为case要 -t
5,"end choose",10, //特殊,因为case要 -t
3,"case",4,
1,"while",5,
1,"do",2,
2,"loop",4,
1,"try",3,
3,"catch",5,
3,"finally",7
};

//直接给代码,很简单也不用解释了。

int iIndent0 = 0; //current
short iChanged;
astring asIndent;
char chFirst;

ilines = MemoProcess->Lines->Count;

for(int i = 0;i < ilines;i++){
iChanged = 0; //是否改变缩进个数
chFirst = MemoProcess->Lines->Strings[i][1];
if ('i' == chFirst || 'e' == chFirst || 'f' == chFirst || 'n' == chFirst ||
'c' == chFirst || 'w' == chFirst || 'd' == chFirst || 'l' == chFirst || 't' == chFirst)
{
for(int k = 0;k < MAX_KEY_INDENTATION;k++){
if(LeftStr(MemoProcess->Lines->Strings[i],KEY_INDENTATION[k].cLen) == KEY_INDENTATION[k].KeyWords){
iChanged = KEY_INDENTATION[k].flag;
break; //匹配后退出for内层
}
}
}

switch (iChanged){
case 0: //no changed
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
break;
case 1: //do this,next to change
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}

asIndent = AnsiString::StringOfChar('\t',++iIndent0);
break;
case 2: //change first,then do this row
if (iIndent0 > 0){
asIndent = AnsiString::StringOfChar('\t',--iIndent0);
}

if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}
break;
case 3: //change +t,do this row,then -t for next
asIndent = AnsiString::StringOfChar('\t',++iIndent0);
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}

if (iIndent0 > 0) {
asIndent = AnsiString::StringOfChar('\t',--iIndent0);
}

break;
case 4: //using choose所以必须先加2t
++iIndent0;
asIndent = AnsiString::StringOfChar('\t',++iIndent0);
if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}

if (iIndent0 > 0) {
asIndent = AnsiString::StringOfChar('\t',--iIndent0);
}

break;
case 5: //using end choose所以必须先 -2t
if (iIndent0 > 1) {
iIndent0 -=2;
asIndent = AnsiString::StringOfChar('\t',iIndent0);
}

if (iIndent0 > 0) {
MemoProcess->Lines->Strings[i] = asIndent + MemoProcess->Lines->Strings[i];
}

break;
}
}

//附:

global subroutine lf_333() throws exception;
//variables list
longll_1
longa
stringls_note

//exprssion lines: 42
_0000:ls_note = "if ... end if"
_0012:_L_ (1) = 2 _J_F_F_0032
_0024:ll_1 = 1
_0032:_L_ (1) = 2 _J_F_F_0052
_0044:ll_1 = 2
_0052:ls_note = "if ... else ... end if"
_0064:_L_ (1) = 2 _J_F_F_0088
_0076:ll_1 = 3
_0084:_J_X_F_0096
_0088:ll_1 = 3
_0096:ls_note = "if ... elseif ... end if"
_00A8:_L_ (1) = 2 _J_F_F_00CC
_00BA:ll_1 = 4
_00C8:_J_X_F_00EC
_00CC:_L_ (2) = 3 _J_F_F_00EC
_00DE:ll_1 = 4
_00EC:ls_note = "do while ... loop"
_00FE:_L_ (ll_1) = 0 _J_F_F_0120
_010E:ll_1 = 5
_011C:_J_X_B_00FE
_0120:ls_note = "do until ... loop"
_0132:_L_ (ll_1) = 0 _J_T_F_0154
_0142:ll_1 = 6
_0150:_J_X_B_0132
_0154:ls_note = "do ...loop while"
_0166:ll_1 = 7
_0174:_L_ (ll_1) = 0 _J_T_B_0166
_0184:ls_note = "do ...loop until"
_0196:ll_1 = 8
_01A4:_L_ (ll_1) = 0 _J_F_B_0196
_01B4:ls_note = "if if if if if end if end if end if end if"
_01C6:_L_ (a) = 0 _J_F_F_0214
_01D6:_L_ (a) = 0 _J_F_F_0214
_01E6:_L_ (a) = 0 _J_F_F_0214
_01F6:_L_ (a) = 0 _J_F_F_0214
_0206:a = 0
_0214:_END
end function

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics