Isolating legacy C code from external dependencies

Code naturally resists being isolated if it isn't designed to be isolatable. Isolating legacy code from external dependencies can be awkward. In C and C++ the transitive nature of #includes is the most obvious and direct reflection of the high-coupling such code exhibits. However, there is a technique you can use to isolate a source file by cutting all it's #includes. It relies on a little known third way of writing a #include. From the C standard:

6.10.2 Source file inclusion
...
A preprocessing directive of the form:
  #include pp-tokens 
(that does not match one of the two previous forms) is permitted. The preprocessing tokens after include in the directive are processed just as in normal text. ... The directive resulting after all replacements shall match one of the two previous forms.


An example. Suppose you have a legacy C source file that you want to write some unit tests for. For example:
/*  legacy.c  */
#include "wibble.h"
#include <stdio.h>
...
int legacy(int a, int b)
{
    FILE * stream = fopen("some_file.txt", "w");
    char buffer[256];
    int result = sprintf(buffer, 
                         "%d:%d:%d", a, b, a * b);
    fwrite(buffer, 1, sizeof buffer, stream);
    fclose(stream);
    return result;
}
Your first step is to create a file called nothing.h as follows:
/* nothing! */
nothing.h is a file containing nothing and is an example of the Null Object Pattern. Then you refactor legacy.c to this:
/* legacy.c */
#if defined(UNIT_TEST)
#  define LOCAL(header) "nothing.h"
#  define SYSTEM(header) "nothing.h"
#else
#  define LOCAL(header) #header
#  define SYSTEM(header) <header>
#endif

#include LOCAL(wibble.h)  /* <--- */
#include SYSTEM(stdio.h)  /* <--- */
...
int legacy(int a, int b)
{
    FILE * stream = fopen("some_file.txt", "w");
    char buffer[256];
    int result = sprintf(buffer, 
                         "%d:%d:%d", a, b, a*b);
    fwrite(buffer, 1, sizeof buffer, stream);
    fclose(stream);
    return result;
}
Now structure your unit-tests for legacy.c as follows:
First you write null implementations of the external dependencies you want to fake (more Null Object Pattern):
/* legacy.test.c: Part 1 */

static FILE * fopen(const char * restrict filename, 
                    const char * restrict mode)
{
    return 0;
}

static size_t fwrite(const void * restrict ptr,   
                     size_t size, 
                     size_t nelem, 
                     FILE * restrict stream)
{
    return 0;
}

static int fclose(FILE * stream)
{
    return 0;
}
Then #include the source file. Note carefully that you're #including legacy.c here and not legacy.h and you're #defining UNIT_TEST so that legacy.c will have no #includes of its own:
/* legacy.test.c: Part 2 */
#define UNIT_TEST
#include "legacy.c" 
Then write your tests:
/* legacy.test.c: Part 3 */
#include <assert.h>

void first_unit_test_for_legacy(void)
{
    /* writes "2:9:18" which is 6 chars */
    assert(6, legacy(2,9));
}

int main(void)
{
    first_unit_test_for_legacy();
    return 0;
}
When you compile legacy.test.c you will find your first problem - it does not compile! You have cut away all the #includes which cuts away not only the function declarations but also the type definitions, such as FILE which is a type used in the code under test, as well as in the real and the null fopen, fwrite, and fclose functions. What you need to do now is introduce a seam only for the functions:
/* stdio.seam.h */
#ifndef STDIO_SEAM_INCLUDED
#define STDIO_SEAM_INCLUDED

#include <stdio.h>

struct stdio_t
{
    FILE * (*fopen)(const char * restrict filename, 
                    const char * restrict mode);
    size_t (*fwrite)(const void * restrict ptr, 
                     size_t size,  
                     size_t nelem, 
                     FILE * restrict stream);
    int (*fclose)(FILE * stream);
};

extern const struct stdio_t stdio;

#endif    
Now you Lean On The Compiler and refactor legacy.c to use stdio.seam.h:
/* legacy.c */   
#if defined(UNIT_TEST)
#  define LOCAL(header) "nothing.h"
#  define SYSTEM(header) "nothing.h"
#else
#  define LOCAL(header) #header
#  define SYSTEM(header) <header>
#endif

#include LOCAL(wibble.h) 
#include LOCAL(stdio.seam.h)  /* <--- */
...
int legacy(int a, int b)
{
    FILE * stream = stdio.fopen("some_file.txt", "w");
    char buffer[256];
    int result = sprintf(buffer, 
                         "%d:%d:%d", a, b, a*b);
    stdio.fwrite(buffer, 1, sizeof buffer, stream);
    stdio.fclose(stream);
    return result;
}    
Now you can structure your null functions as follows:
/* legacy.test.c: Part 1 */
#include "stdio.seam.h"

static FILE * null_fopen(const char * restrict filename, 
                         const char * restrict mode)
{
    return 0;
}

static size_t null_fwrite(const void * restrict ptr, 
                          size_t size, 
                          size_t nelem, 
                          FILE * restrict stream)
{
    return 0;
}

static int null_fclose(FILE * stream)
{
    return 0;
}

const struct stdio_t stdio =
{
    .fopen  = null_fopen,
    .fwrite = null_fwrite,
    .fclose = null_fclose,
};    
And viola, you have a unit test. Now you have your knife in the seam you can push it in a bit further. For example, you can do a little spying:
/* legacy.test.c: Part 1 */
#include "stdio.seam.h"
#include <assert.h>
#include <string.h>

static FILE * null_fopen(const char * restrict filename, 
                         const char * restrict mode)
{
    return 0;
}
    
static size_t spy_fwrite(const void * restrict ptr, 
                         size_t size, 
                         size_t nelem, 
                         FILE * restrict stream)
{
    assert(strmp("2:9:18", ptr) == 0);
    return 0;
}

static int null_fclose(FILE * stream)
{
    return 0;
}

const struct stdio_t stdio =
{
    .fopen  = null_fopen,
    .fwrite =  spy_fwrite,
    .fclose = null_fclose,
};
This approach is pretty brutal, but it might just allow you to create an initial seam which you can then gradually prise open. If nothing else it allows you to create characterisation tests to familiarize yourself with legacy code.

You'll also need to create a trivial implementation of stdio.seam.h that the real code uses:
/* stdio.seam.c */
#include "stdio.seam.h"
#include <stdio.h>

const struct stdio_t stdio =
{
    .fopen  = fopen,
    .fwrite = fwrite,
    .fclose = fclose,
};
The -include compiler option might also prove useful.

-include file
    Process file as if #include "file" appeared as the first line of the primary source file.


Using this you can create the following file:
/* include.seam.h */
#ifndef INCLUDE_SEAM
#define INCLUDE_SEAM

#if defined(UNIT_TEST)
#  define LOCAL(header) "nothing.h"
#  define SYSTEM(header) "nothing.h"
#else
#  define LOCAL(header) #header
#  define SYSTEM(header) <header>
#endif

#endif
and then compile with the -include include.seam.h option.

This allows your legacy.c file to look like this:
#include LOCAL(wibble.h) 
#include LOCAL(stdio.seam.h)
...
int legacy(int a, int b)
{
    FILE * stream = stdio.fopen("some_file.txt", "w");
    char buffer[256];
    int result = sprintf(buffer, "%d:%d:%d", a, b, a*b);
    stdio.fwrite(buffer, 1, sizeof buffer, stream);
    stdio.fclose(stream);
    return result;
}    


No comments:

Post a Comment