Dear all, On Thu, 20 May 2010 08:45:31 -0500, John J. Boyer wrote:
I am Timothy Lee. I'm the person who supplied the patches for outpos. I was not subscribed to the mailing list, so I missed the discussion on 20th May.Itg looks like the code for outpos in the forward translator is incomplete. I'll try to get my mind around the problem, but if someone beats me that's fine. I still have to look at the back-translator. On Thu, May 20, 2010 at 12:57:12PM +0200, Christian Egli wrote: > Hi > > "Jonathan Sharp"<jonathans@xxxxxxxxxx> writes: > > > The problem with holes in outpos also occurs during forward > > translation, for example forward translating the string "Pride and > > Prejudice" using en-us-g2 gives outpos values of 1 2 3 4 5 6 7 7 7 8 > > 12 -1 -1 13 14 15 16 17 18 > > Ah, OK I see. > > > I wonder if it would be better to fix these problems at their source > > rather than initializing outpos and then checking it's values after > > translation? > > You're saying this problem should be fixed properly in the translation > code? But it seems to me that the patch from timothy solves the problem > at least for the user. Of course it is not the best solution, but it > solves the problem. If someone comes along and solves the problem at its > source we can always take timothys patch out again. > > What do you think? > > Christian
Anyway, I've got a patch to correct inpos/outpos returned by the back-translator. The problem arises from the fact that during multi-pass translations, contraction invalidates the 1-to-1 relationship between source string and input buffer for each pass. This patch solves the problem by using the srcmapping[] array to map each character in current buffer back to original source string.
I've been testing this patch with en-us-g2.ctb, and fixes incorrect inpos/output that results from contracted spaces. But I know for a fact that are other places where the srcmapping[] array has to be modified. Please indicate whether this patch is in the right direction, and I'll continue to work on it.
Regards, Timothy Lee
Index: liblouis/compileTranslationTable.c =================================================================== --- liblouis/compileTranslationTable.c (revision 355) +++ liblouis/compileTranslationTable.c (working copy) @@ -3933,6 +3933,8 @@ static int sizePassbuf1 = 0; static widechar *passbuf2 = NULL; static int sizePassbuf2 = 0; +static int *srcmapping = NULL; +static int sizeSrcmapping = 0; void * liblouis_allocMem (AllocBuf buffer, int srcmax, int destmax) { @@ -3978,6 +3980,15 @@ sizePassbuf2 = destmax; } return passbuf2; + case alloc_srcmapping: + if (srcmax > sizeSrcmapping) + { + if (srcmapping != NULL) + free (srcmapping); + srcmapping = malloc ((srcmax + 4) * sizeof (int)); + sizeSrcmapping = srcmax; + } + return srcmapping; default: return NULL; } @@ -4018,6 +4029,10 @@ free (passbuf2); passbuf2 = NULL; sizePassbuf2 = 0; + if (srcmapping != NULL) + free (srcmapping); + srcmapping = NULL; + sizeSrcmapping = 0; opcodeLengths[0] = 0; } Index: liblouis/louis.h =================================================================== --- liblouis/louis.h (revision 355) +++ liblouis/louis.h (working copy) @@ -435,7 +435,8 @@ alloc_typebuf, alloc_destSpacing, alloc_passbuf1, - alloc_passbuf2 + alloc_passbuf2, + alloc_srcmapping, } AllocBuf; /* The following function definitions are hooks into * compileTranslationTable.c. Some are used by other library modules. Index: liblouis/lou_backTranslateString.c =================================================================== --- liblouis/lou_backTranslateString.c (revision 355) +++ liblouis/lou_backTranslateString.c (working copy) @@ -45,6 +45,7 @@ static widechar *passbuf2 = NULL; static widechar *currentOutput; static unsigned char *typebuf = NULL; +static int *srcmapping = NULL; static char *spacebuf; static int backTranslateString (void); static int makeCorrections (void); @@ -104,6 +105,11 @@ else passbuf1[k] = getDotsForChar (inbuf[k]); passbuf1[srcmax] = getDotsForChar (' '); + if (!(srcmapping = liblouis_allocMem (alloc_srcmapping, srcmax, destmax))) + return 0; + for (k = 0; k <= srcmax; k++) + srcmapping[k] = k; + srcmapping[srcmax] = srcmax; currentInput = passbuf1; if ((!(mode & pass1Only)) && (table->numPasses > 1 || table->corrections)) { @@ -267,7 +273,7 @@ break; } if (src < *inlen) - *inlen = src; + *inlen = srcmapping[src]; *outlen = dest; if (cursorPos != NULL) *cursorPos = cursorPosition; @@ -534,7 +540,7 @@ static void back_selectRule (void) { -/*check for valid bcak-translations */ +/*check for valid back-translations */ int length = srcmax - src; TranslationTableOffset ruleOffset = 0; static TranslationTableRule pseudoRule = { 0 }; @@ -796,26 +802,26 @@ for (k = 0; k < outLength; k++) { if (inputPositions != NULL) - inputPositions[dest + k] = src + k; + inputPositions[dest + k] = srcmapping[src + k]; if (outputPositions != NULL) - outputPositions[src + k] = dest + k; + outputPositions[srcmapping[src + k]] = dest + k; } for (k = outLength; k < inLength; k++) if (outputPositions != NULL) - outputPositions[src + k] = dest + outLength - 1; + outputPositions[srcmapping[src + k]] = dest + outLength - 1; } else { for (k = 0; k < inLength; k++) { if (inputPositions != NULL) - inputPositions[dest + k] = src + k; + inputPositions[dest + k] = srcmapping[src + k]; if (outputPositions != NULL) - outputPositions[src + k] = dest + k; + outputPositions[srcmapping[src + k]] = dest + k; } for (k = inLength; k < outLength; k++) if (inputPositions != NULL) - inputPositions[dest + k] = src + inLength - 1; + inputPositions[dest + k] = srcmapping[src + inLength - 1]; } } return putchars (outChars, outLength); @@ -1432,7 +1438,10 @@ if ((dest + startReplace - startMatch) > destmax) return 0; for (k = startMatch; k < startReplace; k++) + { + srcmapping[dest] = srcmapping[k]; currentOutput[dest++] = currentInput[k]; + } while (passIC < currentRule->dotslen) switch (passInstructions[passIC]) { @@ -1440,6 +1449,8 @@ case pass_dots: if ((dest + passInstructions[passIC + 1]) > destmax) return 0; + for (k = 0; k < passInstructions[passIC + 1]; ++k) + srcmapping[dest + k] = startMatch; memcpy (¤tOutput[dest], &passInstructions[passIC + 2], passInstructions[passIC + 1] * CHARSIZE); dest += passInstructions[passIC + 1]; @@ -1593,12 +1604,14 @@ case CTO_Always: if ((dest + 1) > destmax) goto failure; + srcmapping[dest] = srcmapping[src]; currentOutput[dest++] = currentInput[src++]; break; default: goto failure; } } + srcmapping[dest] = srcmapping[src]; failure: if (src < srcmax) {