[liblouis-liblouisxml] Re: House cleaning, i.e. pending patches

  • From: Timothy Lee <timothy.ty.lee@xxxxxxxxx>
  • To: liblouis-liblouisxml@xxxxxxxxxxxxx
  • Date: Tue, 25 May 2010 12:13:15 +0800

 Dear all,

On Thu, 20 May 2010 08:45:31 -0500, John J. Boyer wrote:

Itg looks like the code for outpos in the forward translator is
incomplete. I'll try  to get my mind around the problem, but if someone
beats me that's fine. I still have to look at the back-translator.

On Thu, May 20, 2010 at 12:57:12PM +0200, Christian Egli wrote:
>  Hi
>
>  "Jonathan Sharp"<jonathans@xxxxxxxxxx>  writes:
>
>  >  The problem with holes in outpos also occurs during forward
>  >  translation, for example forward translating the string "Pride and
>  >  Prejudice" using en-us-g2 gives outpos values of 1 2 3 4 5 6 7 7 7 8
>  >  12 -1 -1 13 14 15 16 17 18
>
>  Ah, OK I see.
>
>  >  I wonder if it would be better to fix these problems at their source
>  >  rather than initializing outpos and then checking it's values after
>  >  translation?
>
>  You're saying this problem should be fixed properly in the translation
>  code? But it seems to me that the patch from timothy solves the problem
>  at least for the user. Of course it is not the best solution, but it
>  solves the problem. If someone comes along and solves the problem at its
>  source we can always take timothys patch out again.
>
>  What do you think?
>
>  Christian
I am Timothy Lee. I'm the person who supplied the patches for outpos. I was not subscribed to the mailing list, so I missed the discussion on 20th May.

Anyway, I've got a patch to correct inpos/outpos returned by the back-translator. The problem arises from the fact that during multi-pass translations, contraction invalidates the 1-to-1 relationship between source string and input buffer for each pass. This patch solves the problem by using the srcmapping[] array to map each character in current buffer back to original source string.

I've been testing this patch with en-us-g2.ctb, and fixes incorrect inpos/output that results from contracted spaces. But I know for a fact that are other places where the srcmapping[] array has to be modified. Please indicate whether this patch is in the right direction, and I'll continue to work on it.

Regards,
Timothy Lee
Index: liblouis/compileTranslationTable.c
===================================================================
--- liblouis/compileTranslationTable.c  (revision 355)
+++ liblouis/compileTranslationTable.c  (working copy)
@@ -3933,6 +3933,8 @@
 static int sizePassbuf1 = 0;
 static widechar *passbuf2 = NULL;
 static int sizePassbuf2 = 0;
+static int *srcmapping = NULL;
+static int sizeSrcmapping = 0;
 void *
 liblouis_allocMem (AllocBuf buffer, int srcmax, int destmax)
 {
@@ -3978,6 +3980,15 @@
          sizePassbuf2 = destmax;
        }
       return passbuf2;
+    case alloc_srcmapping:
+      if (srcmax > sizeSrcmapping)
+       {
+         if (srcmapping != NULL)
+           free (srcmapping);
+         srcmapping = malloc ((srcmax + 4) * sizeof (int));
+         sizeSrcmapping = srcmax;
+       }
+      return srcmapping;
     default:
       return NULL;
     }
@@ -4018,6 +4029,10 @@
     free (passbuf2);
   passbuf2 = NULL;
   sizePassbuf2 = 0;
+  if (srcmapping != NULL)
+    free (srcmapping);
+  srcmapping = NULL;
+  sizeSrcmapping = 0;
   opcodeLengths[0] = 0;
 }
 
Index: liblouis/louis.h
===================================================================
--- liblouis/louis.h    (revision 355)
+++ liblouis/louis.h    (working copy)
@@ -435,7 +435,8 @@
     alloc_typebuf,
     alloc_destSpacing,
     alloc_passbuf1,
-    alloc_passbuf2
+    alloc_passbuf2,
+    alloc_srcmapping,
   } AllocBuf;
 /* The following function definitions are hooks into 
 * compileTranslationTable.c. Some are used by other library modules. 
Index: liblouis/lou_backTranslateString.c
===================================================================
--- liblouis/lou_backTranslateString.c  (revision 355)
+++ liblouis/lou_backTranslateString.c  (working copy)
@@ -45,6 +45,7 @@
 static widechar *passbuf2 = NULL;
 static widechar *currentOutput;
 static unsigned char *typebuf = NULL;
+static int *srcmapping = NULL;
 static char *spacebuf;
 static int backTranslateString (void);
 static int makeCorrections (void);
@@ -104,6 +105,11 @@
     else
       passbuf1[k] = getDotsForChar (inbuf[k]);
   passbuf1[srcmax] = getDotsForChar (' ');
+  if (!(srcmapping = liblouis_allocMem (alloc_srcmapping, srcmax, destmax)))
+    return 0;
+  for (k = 0; k <= srcmax; k++)
+    srcmapping[k] = k;
+  srcmapping[srcmax] = srcmax;
   currentInput = passbuf1;
   if ((!(mode & pass1Only)) && (table->numPasses > 1 || table->corrections))
     {
@@ -267,7 +273,7 @@
        break;
       }
   if (src < *inlen)
-    *inlen = src;
+    *inlen = srcmapping[src];
   *outlen = dest;
   if (cursorPos != NULL)
     *cursorPos = cursorPosition;
@@ -534,7 +540,7 @@
 static void
 back_selectRule (void)
 {
-/*check for valid bcak-translations */
+/*check for valid back-translations */
   int length = srcmax - src;
   TranslationTableOffset ruleOffset = 0;
   static TranslationTableRule pseudoRule = { 0 };
@@ -796,26 +802,26 @@
          for (k = 0; k < outLength; k++)
            {
              if (inputPositions != NULL)
-               inputPositions[dest + k] = src + k;
+               inputPositions[dest + k] = srcmapping[src + k];
              if (outputPositions != NULL)
-               outputPositions[src + k] = dest + k;
+               outputPositions[srcmapping[src + k]] = dest + k;
            }
          for (k = outLength; k < inLength; k++)
            if (outputPositions != NULL)
-             outputPositions[src + k] = dest + outLength - 1;
+             outputPositions[srcmapping[src + k]] = dest + outLength - 1;
        }
       else
        {
          for (k = 0; k < inLength; k++)
            {
              if (inputPositions != NULL)
-               inputPositions[dest + k] = src + k;
+               inputPositions[dest + k] = srcmapping[src + k];
              if (outputPositions != NULL)
-               outputPositions[src + k] = dest + k;
+               outputPositions[srcmapping[src + k]] = dest + k;
            }
          for (k = inLength; k < outLength; k++)
            if (inputPositions != NULL)
-             inputPositions[dest + k] = src + inLength - 1;
+             inputPositions[dest + k] = srcmapping[src + inLength - 1];
        }
     }
   return putchars (outChars, outLength);
@@ -1432,7 +1438,10 @@
   if ((dest + startReplace - startMatch) > destmax)
     return 0;
   for (k = startMatch; k < startReplace; k++)
+  {
+    srcmapping[dest] = srcmapping[k];
     currentOutput[dest++] = currentInput[k];
+  }
   while (passIC < currentRule->dotslen)
     switch (passInstructions[passIC])
       {
@@ -1440,6 +1449,8 @@
       case pass_dots:
        if ((dest + passInstructions[passIC + 1]) > destmax)
          return 0;
+       for (k = 0;  k < passInstructions[passIC + 1];  ++k)
+         srcmapping[dest + k] = startMatch;
        memcpy (&currentOutput[dest], &passInstructions[passIC + 2],
                passInstructions[passIC + 1] * CHARSIZE);
        dest += passInstructions[passIC + 1];
@@ -1593,12 +1604,14 @@
        case CTO_Always:
          if ((dest + 1) > destmax)
            goto failure;
+         srcmapping[dest] = srcmapping[src];
          currentOutput[dest++] = currentInput[src++];
          break;
        default:
          goto failure;
        }
     }
+  srcmapping[dest] = srcmapping[src];
 failure:
   if (src < srcmax)
     {

Other related posts: