A Comparison Algorithm for TDD

Can you test-drive an algorithm?

There’s been discussion in the abstract about whether you can test-drive an existing algorithm. To make it more concrete, I’ve transliterated a difference algorithm from C to Java. (It’s the “insert-delete” difference algorithm from Webb Miller’s Software Tools Sampler.)

What would constitute a solution? I think it could take one of two forms:

  1. A series of test cases (perhaps with code samples) that would show deriving this algorithm one bit at a time. It isn’t enough to just put test cases for each line; the test cases have to make sense in a way that drives the algorithm forward from simpler cases to more complex ones.
  2. A series of difference algorithms, each more sophisticated than the next, and “in range” of additional tests and refactoring.

Background

  • The original version read in files; I’ve simplified it to just take two arrays of objects.
  • I’ve done a little bit of renaming, and extracted a helper method.
  • The algorithm considers a matrix of moves, by increasing distance. It walks along the diagonals, looking for the best moves.
  • ORIGIN is used to create the effect of a negative index.
  • The algorithm builds up partial scripts, accumulating the best one it can out of previously formed scripts. Then at the end, it flips the script around so the steps come in the order you’d expect.
  • The “test” doesn’t capture expected output – it just writes it to stdout.

Challenge

One challenge is to create a solution as described above.

The next challenge is to say how much knowing the solution pushed you to write particular tests in a particular order. Would you have written those tests if you’d had a different algorithm in mind, or didn’t know an algorithm at all?

Diff.java

import java.io.PrintStream;

public class Diff {
    public class EditScript {
        public final static int DELETE = 1;
        public final static int INSERT = 2;

        int op;
        int line1;
        int line2;

        Object object;

        EditScript link;

        public String toString() {
            String opString = "UNKNOWN";
            if (op == 1)
                opString = "DELETE";
            if (op == 2)
                opString = "INSERT";

            return "Edit: " + opString + " line1: " + line1 + " line2: "
                    + line2 + " obj:" + object;
        }
    }

    public static int MAXFILE = 2000;
    public static int ORIGIN = MAXFILE;

    public void compare(PrintStream out, Object[] in1, Object[] in2) {
       int col, distance, lower, k, m, maxDistance, n, row, upper;

       int[] last_d = new int[2*MAXFILE+1];
       EditScript[] script = new EditScript[2*MAXFILE+1];
       EditScript newEdit;

       maxDistance = 2 * MAXFILE;

       m = in1.length;
       n = in2.length;

       for (row = 0; row < m && row < n && in1[row].equals(in2[row]); row++)
           ;
       last_d[ORIGIN] = row;
       script[ORIGIN] = null;

       lower = (row == m) ? ORIGIN + 1 : ORIGIN - 1;
       upper = (row == n) ? ORIGIN - 1 : ORIGIN + 1;

       if (lower > upper) {
           out.println("Files identical");
           return;
       }

       for (distance = 1; distance <= maxDistance; distance++) {
           // for each relevant diagonal
           for (k = lower; k <= upper; k+=2) {
               newEdit = new EditScript();

               // Move down from last d-1 on diagonal k+1 puts you
               // further along diagonal k than does moving right from last d-1
               // on diagonal k-1
               boolean moveDownBeatsMoveRight = (k == ORIGIN - distance) || k != ORIGIN+distance && last_d[k+1] >= last_d[k-1];

               if (moveDownBeatsMoveRight) {
                   row = last_d[k+1] + 1;
                   newEdit.link = script[k+1];
                   newEdit.op = EditScript.DELETE;
                   newEdit.object = in1[row-1];
               } else {
                   // move right from last d-1 on diagonal k-1
                   row = last_d[k-1];
                   newEdit.link = script[k-1];
                   newEdit.op = EditScript.INSERT;
                   newEdit.object = in2[row + k - ORIGIN - 1];
               }

               newEdit.line1 = row;
               col = row + k - ORIGIN;
               newEdit.line2 = col;
               script[k] = newEdit;

               // slide down the diagonal
               while (row < m && col < n && in1[row].equals(in2[col])) {
                   row++;
                   col++;
               }
               last_d[k] = row;

               if (row == m && col == n) {         // hit southeast corner, have the answer
                   print(out, script[k], in1, in2);
                   return;
               }

               if (row == m)  // hit last row, don't look to the left
                   lower = k+2;

               if (col == n)     // hit last column; don't look to the right
                   upper = k-2;
           }

           lower--;
           upper++;
       }
       exceeds(out, distance);
   }

    private EditScript reverse(EditScript start) {
        EditScript ahead;
        EditScript behind;
        EditScript result;

        ahead = start;
        result = null;
        while (ahead != null) {
            behind = result;
            result = ahead;
            ahead = ahead.link;
            result.link = behind; // flip the pointer
        }
        return result;
    }

    private void print(
            PrintStream out,
            EditScript start,
            Object[] in1,
            Object[] in2)
    {
        EditScript script = reverse(start);

        while (script != null) {
            if (script.op == EditScript.INSERT)
                out.println("Insert after line " + script.line1 + ":t" + in2[script.line2 - 1]);
            else {
                EditScript next = script.link;

                boolean change = next != null && next.op == EditScript.INSERT
                        && next.line1 == script.line1;

                if (change) {
                    out.println("Change line " + script.line1 + " from "
                            + in1[script.line1 - 1] + " to "
                            + in2[next.line2 - 1]);
                    script = script.link; // skip insert

                } else
                    out.println("Delete line " + script.line1 + ":t"
                            + in1[script.line1 - 1]);
            }
            script = script.link;
        }
    }

    private void exceeds(PrintStream out, int d) {
        out.println("At least " + d + " line(s) inserted or deleted");
    }
}

DiffTest.java

import java.io.ByteArrayOutputStream;
import java.io.PrintStream;

import junit.framework.TestCase;

public class DiffTest extends TestCase {
   private final String crlf = "rn";
   private final String tab = "t";

   public void testZeroLength() {
       final String result = performTest(new String[0], new String[0]);
       assertEquals("Files identical" + crlf, result);
   }

   public void testOneLineEquals() {
       final String[] oneLineArray = new String[] { "one" };
       final String result = performTest(oneLineArray, oneLineArray);
       assertEquals("Files identical" + crlf, result);
   }

   public void testDeleteOne() {
       final String[] arrayOne = new String[] { "head", "delete", "tail", };

       final String[] arrayTwo = new String[] { "head", "tail", };

       final String result = performTest(arrayOne, arrayTwo);
       assertEquals("Delete line 2:" + tab + "delete" + crlf, result);
   }

   public void testInsertOne() {
       final String[] arrayOne = new String[] { "head", "tail", };

       final String[] arrayTwo = new String[] { "head", "insert", "tail", };

       final String result = performTest(arrayOne, arrayTwo);
       assertEquals("Insert after line 1:" + tab + "insert" + crlf, result);
   }

public void testDiff() {
   final String[] arrayOne = new String[] {
       "Line1",      // 1 = new line 6
       "then",       // 2 = new line 7
       "different",  // 3 = "Change line 3 from different to extra" (line 8)
       "that",       // 4 = new line 9
       "last",       // 5 = "Delete line 5"
       "new tail"    // 6 = common tail
   };

   final String[] arrayTwo = new String[] {
       "New first",  // 1 = "Insert [new line] after line 0"
       "new 2d",     // 2 = "Insert [new line] after line 0"
       "more",       // 3 = "Insert [new line] after line 0"
       "more",       // 4 = "Insert [new line] after line 0"
       "more",       // 5 = "Insert [new line] after line 0"
       "Line1",      // 6 = old line 1
       "then",       // 7 = old line 2
       "extra",      // 8 = "Change line 3 from different to extra"
       "that",       // 9 = old line 4
       "new tail"    // 10 = common tail
   };

   final String result = performTest(arrayOne, arrayTwo);

   assertEquals(
       "Insert after line 0:" + tab + "New first" + crlf +
         "Insert after line 0:" + tab + "new 2d" + crlf +
         "Insert after line 0:" + tab + "more" + crlf +
         "Insert after line 0:" + tab + "more" + crlf +
         "Insert after line 0:" + tab + "more" + crlf +
         "Change line 3 from different to extra" + crlf +
         "Delete line 5:" + tab + "last" + crlf,
       result);
      }

    public void testFigureOneDiff() {
       final String[] arrayOne = new String[] { "A", "B", "C", "A", "B", "B", "A" };
       final String[] arrayTwo = new String[] { "C", "B", "A", "B", "A", "C" };

       final String result = performTest(arrayOne, arrayTwo);
       assertEquals(
           "Delete line 1:" + tab + "A" + crlf +
             "Delete line 2:" + tab + "B" + crlf +
             "Insert after line 3:" + tab + "B" + crlf +
             "Delete line 6:" + tab + "B" + crlf +
             "Insert after line 7:" + tab + "C" + crlf,
           result);
    }

    private static String performTest(
        final String[] arrayOne,
        final String[] arrayTwo)
    {
       //  Setup.
       final ByteArrayOutputStream byteArrayOutput = new ByteArrayOutputStream();
       final PrintStream printStreamOutput = new PrintStream(byteArrayOutput);

       //  Perform Comparison.
       new Diff().compare(printStreamOutput, arrayOne, arrayTwo);

       //  Fetch result, as a String.
       final String result = byteArrayOutput.toString();
       return result;
   }
}

[June, 2005. Thanks to Jeff Grigg for providing these test cases.]