an interesting TDD episode

I'm doing the roman-numerals kata in C.
I write a test as follows:
#include "to_roman.hpp"
#include <assert.h>
#include <string.h>

int main(void)
{
    char actual[32] = { '\0' };
    to_roman(actual, 111);
    assert(strcmp("CXI", actual) == 0);
}

I write a do-nothing implementation of to_roman.
I run the tests and I get (I kid you not) this:
...
test: to_roman.tests.c:26: main: Assertion `__extension__ ({ size_t __s1_len, __s2_len; (__builtin_constant_p ("CXI") && __builtin_constant_p (actual) && (__s1_len = __builtin_strlen ("CXI"), __s2_len = __builtin_strlen (actual), (!((size_t)(const void *)(("CXI") + 1) - (size_t)(const void *)("CXI") == 1) || __s1_len >= 4) && (!((size_t)(const void *)((actual) + 1) - (size_t)(const void *)(actual) == 1) || __s2_len >= 4)) ? __builtin_strcmp ("CXI", actual) : (__builtin_constant_p ("CXI") && ((size_t)(const void *)(("CXI") + 1) - (size_t)(const void *)("CXI") == 1) && (__s1_len = __builtin_strlen ("CXI"), __s1_len < 4) ? (__builtin_constant_p (actual) && ((size_t)(const void *)((actual) + 1) - (size_t)(const void *)(actual) == 1) ? __builtin_strcmp ("CXI", actual) : (__extension__ ({ const unsigned char *__s2 = (const unsigned char *) (const char *) (actual); register int __result = (((const unsigned char *) (const char *) ("CXI"))[0] - __s2[0]); if (__s1_len > 0 && __result == 0) { __result = (((const unsigned char *) (const char *) ("CXI"))[1] - __s2[1]); if (__s1_len > 1 && __result == 0) { __result = (((const unsigned char *) (const char *) ("CXI"))[2] - __s2[2]); if (__s1_len > 2 && __result == 0) __result = (((const unsigned char *) (const char *) ("CXI"))[3] - __s2[3]); } } __result; }))) : (__builtin_constant_p (actual) && ((size_t)(const void *)((actual) + 1) - (size_t)(const void *)(actual) == 1) && (__s2_len = __builtin_strlen (actual), __s2_len < 4) ? (__builtin_constant_p ("CXI") && ((size_t)(const void *)(("CXI") + 1) - (size_t)(const void *)("CXI") == 1) ? __builtin_strcmp ("CXI", actual) : (__extension__ ({ const unsigned char *__s1 = (const unsigned char *) (const char *) ("CXI"); register int __result = __s1[0] - ((const unsigned char *) (const char *) (actual))[0]; if (__s2_len > 0 && __result == 0) { __result = (__s1[1] - ((const unsigned char *) (const char *) (actual))[1]); if (__s2_len > 1 && __result == 0) { __result = (__s1[2] - ((const unsigned char *) (const char *) (actual))[2]); if (__s2_len > 2 && __result == 0) __result = (__s1[3] - ((const unsigned char *) (const char *) (actual))[3]); } } __result; }))) : __builtin_strcmp ("CXI", actual)))); }) == 0' failed.
...

So I work towards improving the diagnostic with a custom assert, as follows:
#include "to_roman.h"
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>

static void assert_roman(const char * expected, int n)
{
    char actual[32] = { '\0' };
    to_roman(actual, n);
    if (strcmp(expected, actual) != 0)
    {
        printf("to_roman(%d)\n", n);
        printf("expected: \"%s\"\n", expected);
        printf("  actual: \"%s\"\n", actual);
        assert(false);
    }
}

int main(void)
{
    assert_roman("CXI", 111);
}

I run this and my diagnostic is as follows:
test: to_roman.tests.c:16: assert_roman: Assertion `0' failed.
to_roman(111)
expected: "CXI"
  actual: ""
...

Much better :-)
Now I start to implement to_roman
#include "to_roman.h"
#include <string.h>

void to_roman(char * roman, int n)
{
    roman[0] = '\0';
    strcat(roman, "CXI");
}

And I'm at green.
I refactor to this:
#include "to_roman.h"
#include <string.h>

void to_roman(char * roman, int n)
{
    roman[0] = '\0';
    strcat(roman, "C");
    strcat(roman, "X");
    strcat(roman, "I");
}

I refactor to this:
#include "to_roman.h"
#include <string.h>

void to_roman(char * roman, int n)
{
    const char * hundreds[] = { "C" };
    const char * tens[]     = { "X" };
    const char * units[]    = { "I" };

    roman[0] = '\0';
    strcat(roman, hundreds[0]);
    strcat(roman, tens[0]);
    strcat(roman, units[0]);
}

Remembering that in my test, n is one-hundred-and-eleven, I refactor to this:
#include "to_roman.h"
#include <string.h>

void to_roman(char * roman, int n)
{
    const char * hundreds[] = { "", "C" };
    const char * tens[]     = { "", "X" };
    const char * units[]    = { "", "I" };

    roman[0] = '\0';
    strcat(roman, hundreds[1]);
    strcat(roman, tens[1]);
    strcat(roman, units[1]);
}

I refactor to this:
#include "to_roman.h"
#include <string.h>

void to_roman(char * roman, int n)
{
    const char * hundreds[] = { "", "C" };
    const char * tens[]     = { "", "X" };
    const char * units[]    = { "", "I" };

    roman[0] = '\0';
    strcat(roman, hundreds[n / 100]);
    n %= 100;
    strcat(roman, tens[n / 10]);
    n %= 10;
    strcat(roman, units[n]);
}

And I'm still at green. Now I add a new test:
int main(void)
{
    assert_roman("CXI", 111);
    assert_roman("CCXXII", 222);
}

I run it and am amazed to see it pass.
It takes me a little while to figure out what is going on.
I'll take it line by line.
When n == 222 this line:
    strcat(roman, hundreds[n / 100]);

is this
    strcat(roman, hundreds[2]);

and hundreds[2] is an out-of-bounds index. However, hundreds[2] just happens to evaluate to the same as tens[0] which is the empty string. So at this point roman is still the empty string. The next lines are these:
    n %= 100;
    strcat(roman, tens[n / 10]);

which is this:
    strcat(roman, tens[2]);

And tens[2] is also an out-of-bounds index. And tens[2] just happens to evaluate to the same as units[0] which is also the empty string. So at this point roman is still the empty string. The next lines are these:
    n %= 10;
    strcat(roman, units[n]);

which is this:
    strcat(roman, units[2]);

And yet again units[2] is an out-of-bounds index. This time units[2] just happens to evaluate to "CCXXII" from the test! So after this roman is "CCXXII" and the test passes! Amazing!

I edit the code to this:
void to_roman(char * roman, int n)
{
    const char * hundreds[] = { "", "C", "CC" };
    const char * tens[]     = { "", "X", "XX" };
    const char * units[]    = { "", "I", "II" };

    roman[0] = '\0';
    strcat(roman, hundreds[n / 100]);
    n %= 100;
    strcat(roman, tens[n / 10]);
    n %= 10;
    strcat(roman, units[n]);
}

And I'm still at green.

So now I'm wondering if there are any lessons I can learn from this episode. It was not a good idea to run the tests (to try and get an initial red) when doing so would knowingly cause the (unfinished) program to exhihibit undefined behaviour. In cyber-dojo terms an amber traffic-light is not the same as a red traffic-light. After adding the second test I should have edited to_roman as follows:
void to_roman(char * roman, int n)
{
    const char * hundreds[] = { "", "C", "" };
    const char * tens[]     = { "", "X", "" };
    const char * units[]    = { "", "I", "" };

    roman[0] = '\0';
    strcat(roman, hundreds[n / 100]);
    n %= 100;
    strcat(roman, tens[n / 10]);
    n %= 10;
    strcat(roman, units[n]);
}

Then I would have got a proper red:
test: to_roman.tests.c:17: assert_roman: Assertion `0' failed.
to_roman(222)
expected: "CCXXII"
  actual: ""
...