尾递归优化【-O2】

最新推荐文章于 2023-10-30 13:44:35 发布

原创最新推荐文章于 2023-10-30 13:44:35 发布 · 1.9k 阅读

本内容遵循CC 4.0 BY-SA版权协议

本文探讨了尾递归优化在C++中的应用，指出仅使用尾递归并不足以实现优化，需要开启-O2选项。通过对比测试，展示了尾递归优化后在计算Fibonacci数列时的时间性能提升，但空间复杂度仍为线性，逊于迭代版本。同时提及其他编译优化选项，如-fomit-frame-pointer。

用了“尾递归”，不一定有尾递归优化。优化要开启-O2选项才行！而且有其他约束条件哦。

c代码：

#include <stdio.h>
#include <string.h>

int geta(int n1, int n2, int n)
{ 
    if(n<=1)
        return n2;
    else
    {
       // printf("&n1: %p\n&n2: %p\n&n:%p\n", &n1, &n2, &n);
        return geta(n2,n1+n2,n-1);
    }
} 

#define N 30
#define ASD(x) #x
int main()
{ 

    printf("The " ASD(N) " %d\n", geta(1,1,N));
    return 0;
    
}

geta函数尾递归优化前：

(gdb) disas geta
Dump of assembler code for function geta:
   0x080483e4 <+0>:	push   ebp
   0x080483e5 <+1>:	mov    ebp,esp
   0x080483e7 <+3>:	sub    esp,0x18
   0x080483ea <+6>:	cmp    DWORD PTR [ebp+0x10],0x1
   0x080483ee <+10>:	jg     0x80483f5 <geta+17>
   0x080483f0 <+12>:	mov    eax,DWORD PTR [ebp+0xc]
   0x080483f3 <+15>:	jmp    0x8048416 <geta+50>
   0x080483f5 <+17>:	mov    eax,DWORD PTR [ebp+0x10]
   0x080483f8 <+20>:	lea    edx,[eax-0x1]
   0x080483fb <+23>:	mov    eax,DWORD PTR [ebp+0xc]
   0x080483fe <+26>:	mov    ecx,DWORD PTR [ebp+0x8]
   0x08048401 <+29>:	add    eax,ecx
   0x08048403 <+31>:	mov    DWORD PTR [esp+0x8],edx
   0x08048407 <+35>:	mov    DWORD PTR [esp+0x4],eax
   0x0804840b <+39>:	mov    eax,DWORD PTR [ebp+0xc]
   0x0804840e <+42>:	mov    DWORD PTR [esp],eax
   0x08048411 <+45>:	call   0x80483e4 <geta>      // ！！函数递归调用！！
   0x08048416 <+50>:	leave  
   0x08048417 <+51>:	ret    
End of assembler dump.

优化后：

(gdb) disas geta
Dump of assembler code for function geta:
   0x08048450 <+0>:	push   ebx
   0x08048451 <+1>:	mov    edx,DWORD PTR [esp+0x10]
   0x08048455 <+5>:	mov    ecx,DWORD PTR [esp+0xc]
   0x08048459 <+9>:	mov    ebx,DWORD PTR [esp+0x8]
   0x0804845d <+13>:	cmp    edx,0x1
   0x08048460 <+16>:	mov    eax,ecx
   0x08048462 <+18>:	jg     0x804846c <geta+28>
   0x08048464 <+20>:	jmp    0x8048477 <geta+39>
   0x08048466 <+22>:	xchg   ax,ax
   0x08048468 <+24>:	mov    ebx,ecx
   0x0804846a <+26>:	mov    ecx,eax
   0x0804846c <+28>:	sub    edx,0x1
   0x0804846f <+31>:	cmp    edx,0x1
   0x08048472 <+34>:	lea    eax,[ebx+ecx*1]
   0x08048475 <+37>:	jne    0x8048468 <geta+24>
   0x08048477 <+39>:	pop    ebx
   0x08048478 <+40>:	ret    
End of assembler dump.

优化后没有函数调用，栈帧不生长，相当于变递归为迭代了。

回到上一篇博客的例子，用尾递归计算Fibonacci，尾递归未优化，因此，尾递归虽然优化了时间性能（从指数级变为线性），但是空间复杂度依然是线性，比不上迭代版本的O（1）。这里加以优化，再对比。先再贴一遍代码：

PS：顺便提一下其他优化。

1. 不使用栈帧。call一个函数的时候，压入参数，push eip，然后就开始sub esp了，没有保存ebp和ebp切换的工作。

-fomit-frame-pointer

Dump of assembler code for function fib_rw(int, int, int):
   0x080486e4 <+0>:	sub    esp,0x1c                      <----没有针对ebp的操作
   0x080486e7 <+3>:	cmp    DWORD PTR [esp+0x28],0x1
   0x080486ec <+8>:	jg     0x80486f4 <fib_rw(int, int, int)+16>
   0x080486ee <+10>:	mov    eax,DWORD PTR [esp+0x24]
   0x080486f2 <+14>:	jmp    0x8048719 <fib_rw(int, int, int)+53>
   0x080486f4 <+16>:	mov    eax,DWORD PTR [esp+0x28]
   0x080486f8 <+20>:	lea    edx,[eax-0x1]
   0x080486fb <+23>:	mov    eax,DWORD PTR [esp+0x24]
   0x080486ff <+27>:	mov    ecx,DWORD PTR [esp+0x20]
   0x08048703 <+31>:	add    eax,ecx
   0x08048705 <+33>:	mov    DWORD PTR [esp+0x8],edx
   0x08048709 <+37>:	mov    DWORD PTR [esp+0x4],eax
   0x0804870d <+41>:	mov    eax,DWORD PTR [esp+0x24]
   0x08048711 <+45>:	mov    DWORD PTR [esp],eax
   0x08048714 <+48>:	call   0x80486e4 <fib_rw(int, int, int)>
   0x08048719 <+53>:	add    esp,0x1c
   0x0804871c <+56>:	ret    
End of assembler dump.

2.循环展开。对编译时能确定执行几次的循环，进行展开。 -funroll-loops

3.尾递归单独的优化选项据说是

-foptimize-sibling-calls.

，但是我单独试了下好像没用。

测试：

#include <iostream>
#include <sys/time.h>
#include <stdlib.h>
using namespace std;

//尾递归
int fib_rw(int a, int b, int n)
{
    if(n<=1)return b;  
   // cout<<"a: "<<&a  <<" b: "<<&b<<" n: "<<&n<<endl;
    return fib_rw(b, a+b, n-1);    
}

//迭代
int fib_dp(int n)
{
    int a=1;
    int b=1;
    int i;
    for(i=2;i<=n;i++)
    {
        b=a+b;
        a=b-a;
    }
    return b;
}

////// main
int main(int argc, char*argv[])
{

    int N=20;
    int TIMES=1000;
    
    if(argc>=2)
        N=atoi(argv[1]);
    if(argc>=3)
        TIMES=atoi(argv[2]);    
    struct timeval begin,end;
    int re;
    
    ////////////////////////
    gettimeofday(&begin,0); //尾递归
    {
        int i=TIMES;
        while(--i)
        {
            re=fib_rw(1,1,N);
        }
    }
    
    gettimeofday(&end,0); 
    if(begin.tv_usec>end.tv_usec)  
    {
        end.tv_sec--;
        end.tv_usec+=1000000;
    }
    cout<<"尾递归 "<<re<<" time: "<<end.tv_sec - begin.tv_sec<<" s "<<end.tv_usec - begin.tv_usec<<" us"<<endl;
    
   
    ////////////////////////
    gettimeofday(&begin,0); // 迭代
    {
        int i=TIMES;
        while(--i)
        {
            re=fib_dp(N);
        }
    }
    
    gettimeofday(&end,0); 
    if(begin.tv_usec>end.tv_usec)  
    {
        end.tv_sec--;
        end.tv_usec+=1000000;
    }
    cout<<"迭代  "<<re<<" time: "<<end.tv_sec - begin.tv_sec<<" s "<<end.tv_usec - begin.tv_usec<<" us"<<endl;
 
    return 0;
}

编译

chen@chen-book1:~$ g++ a.cpp -o a -O2

运行

chen@chen-book1:~$ ./a 5 1000
尾递归 8 time: 0 s 14 us
迭代 8 time: 0 s 12 us
chen@chen-book1:~$ ./a 10 1000
尾递归 89 time: 0 s 31 us
迭代 89 time: 0 s 24 us
chen@chen-book1:~$ ./a 15 1000
尾递归 987 time: 0 s 39 us
迭代 987 time: 0 s 37 us
chen@chen-book1:~$ ./a 20 1000
尾递归 10946 time: 0 s 51 us
迭代 10946 time: 0 s 49 us
chen@chen-book1:~$ ./a 25 1000
尾递归 121393 time: 0 s 61 us
迭代 121393 time: 0 s 62 us
chen@chen-book1:~$ ./a 30 1000
尾递归 1346269 time: 0 s 74 us
迭代 1346269 time: 0 s 74 us
chen@chen-book1:~$ ./a 35 1000
尾递归 14930352 time: 0 s 83 us
迭代 14930352 time: 0 s 87 us
chen@chen-book1:~$ ./a 40 1000
尾递归 165580141 time: 0 s 104 us
迭代 165580141 time: 0 s 100 us
chen@chen-book1:~$ ./a 45 1000
尾递归 1836311903 time: 0 s 106 us
迭代 1836311903 time: 0 s 112 us