eBookReaderSwitch/source/fitz/bidi.c

/*
 * Bidirectional text processing.
 *
 * Processes unicode text by arranging the characters into an order suitable
 * for display. E.g. Hebrew text will be arranged from right-to-left and
 * any English within the text will remain in the left-to-right order.
 * Characters such as parenthesis will be substituted for their mirrored
 * equivalents if they are part of text which must be reversed.
 *
 * This is an implementation of the unicode Bidirectional Algorithm which
 * can be found here: http://www.unicode.org/reports/tr9/ and is based
 * on the reference implementation of the algorithm found on that page.
 *
 * For a nice overview of how it works, read this...
 * http://www.w3.org/TR/REC-html40/struct/dirlang.html
 *
 * Extracted from the SmartOffice code, where it was modified by Ian
 * Beveridge.
 *
 * Copyright (C) Picsel, 2004. All Rights Reserved.
 */

/*
 * Original copyright notice from unicode reference implementation.
 * ----------------------------------------------------------------
 * Written by: Asmus Freytag
 *	C++ and Windows dependencies removed, and
 *	command line interface added by: Rick McGowan
 *
 *	Copyright (C) 1999, ASMUS, Inc. All Rights Reserved
 */

/*
 * Includes...
 */

#include "mupdf/fitz.h"
#include "mupdf/ucdn.h"
#include "bidi-imp.h" /* standard bidi code interface */
#include <assert.h>

/*
 * Macros...
 */

#define ODD(x) ((x) & 1)

#define REPLACEABLE_TYPE(t) ( \
		((t)==BDI_ES) || ((t)==BDI_ET) || ((t)==BDI_CS) || \
		((t)==BDI_NSM) || ((t)==BDI_PDF) || ((t)==BDI_BN) || \
		((t)==BDI_S) || ((t)==BDI_WS) || ((t)==BDI_N) )

#ifdef DEBUG_BIDI_VERBOSE
#define DBUGVF(params) do { fz_warn params; } while (0)
#else
#define DBUGVF(params) do {} while (0)
#endif

#ifdef DEBUG_BIDI_OUTLINE
#define DBUGH(params) do { fz_warn params; } while (0)
#else
#define DBUGH(params) do {} while (0)
#endif

#define UNICODE_EOS					0
#define UNICODE_DIGIT_ZERO				0x0030
#define UNICODE_DIGIT_NINE				0x0039
#define UNICODE_SUPERSCRIPT_TWO				0x00B2
#define UNICODE_SUPERSCRIPT_THREE			0x00B3
#define UNICODE_SUPERSCRIPT_ONE				0x00B9
#define UNICODE_RTL_START				0x0590
#define UNICODE_RTL_END					0x07BF
#define UNICODE_ARABIC_INDIC_DIGIT_ZERO			0x0660
#define UNICODE_ARABIC_INDIC_DIGIT_NINE			0x0669
#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO	0x06F0
#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE	0x06F9
#define UNICODE_ZERO_WIDTH_NON_JOINER			0x200C
#define UNICODE_SUPERSCRIPT_ZERO			0x2070
#define UNICODE_SUPERSCRIPT_FOUR			0x2074
#define UNICODE_SUPERSCRIPT_NINE			0x2079
#define UNICODE_SUBSCRIPT_ZERO				0x2080
#define UNICODE_SUBSCRIPT_NINE				0x2089
#define UNICODE_CIRCLED_DIGIT_ONE			0x2460
#define UNICODE_NUMBER_TWENTY_FULL_STOP			0x249B
#define UNICODE_CIRCLED_DIGIT_ZERO			0x24EA
#define UNICODE_FULLWIDTH_DIGIT_ZERO			0xFF10
#define UNICODE_FULLWIDTH_DIGIT_NINE			0xFF19

#ifndef TRUE
#define TRUE (1)
#endif
#ifndef FALSE
#define FALSE (0)
#endif

/*
 * Enumerations...
 */

#ifdef DEBUG_BIDI_VERBOSE
/* display support: */
static const char char_from_types[] =
{
	' ',	/* ON */
	'>',	/* L */
	'<',	/* R */
	'9',	/* AN */
	'1',	/* EN */
	'a',	/* AL */
	'@',	/* NSM */
	'.',	/* CS */
	',',	/* ES */
	'$',	/* ET */
	':',	/* BN */
	'X',	/* S */
	'_',	/* WS */
	'B',	/* B */
	'+',	/* RLO */
	'+',	/* RLE */
	'+',	/* LRO */
	'+',	/* LRE */
	'-',	/* PDF */
	'='	/* LS */
};
#endif

/*
 * Functions and static functions...
 */

/* UCDN uses a different ordering than Bidi does. We cannot
 * change to the UCDN ordering, as the bidi-std.c code relies
 * on the exact ordering (at least that N = ON = 0). We
 * therefore map between the two using this small table. It
 * also takes care of fudging LRI, RLI, FSI and PDI, that this
 * code does not currently support. */
static const uint8_t ucdn_to_bidi[] =
{
	BDI_L,		/* UCDN_BIDI_CLASS_L = 0 */
	BDI_LRE,	/* UCDN_BIDI_CLASS_LRE = 1 */
	BDI_LRO,	/* UCDN_BIDI_CLASS_LRO = 2 */
	BDI_R,		/* UCDN_BIDI_CLASS_R = 3 */
	BDI_AL,		/* UCDN_BIDI_CLASS_AL = 4 */
	BDI_RLE,	/* UCDN_BIDI_CLASS_RLE = 5 */
	BDI_RLO,	/* UCDN_BIDI_CLASS_RLO = 6 */
	BDI_PDF,	/* UCDN_BIDI_CLASS_PDF = 7 */
	BDI_EN,		/* UCDN_BIDI_CLASS_EN = 8 */
	BDI_ES,		/* UCDN_BIDI_CLASS_ES = 9 */
	BDI_ET,		/* UCDN_BIDI_CLASS_ET = 10 */
	BDI_AN,		/* UCDN_BIDI_CLASS_AN = 11 */
	BDI_CS,		/* UCDN_BIDI_CLASS_CS = 12 */
	BDI_NSM,	/* UCDN_BIDI_CLASS_NSM = 13 */
	BDI_BN,		/* UCDN_BIDI_CLASS_BN = 14 */
	BDI_B,		/* UCDN_BIDI_CLASS_B = 15 */
	BDI_S,		/* UCDN_BIDI_CLASS_S = 16 */
	BDI_WS,		/* UCDN_BIDI_CLASS_WS = 17 */
	BDI_ON,		/* UCDN_BIDI_CLASS_ON = 18 */
	BDI_LRE,	/* UCDN_BIDI_CLASS_LRI = 19 */
	BDI_RLE,	/* UCDN_BIDI_CLASS_RLI = 20 */
	BDI_N,		/* UCDN_BIDI_CLASS_FSI = 21 */
	BDI_N,		/* UCDN_BIDI_CLASS_PDI = 22 */
};

#define class_from_ch_ws(ch) (ucdn_to_bidi[ucdn_get_bidi_class(ch)])

/* Return a direction for white-space on the second pass of the algorithm. */
static fz_bidi_chartype class_from_ch_n(uint32_t ch)
{
	fz_bidi_chartype from_ch_ws = class_from_ch_ws(ch);
	if (from_ch_ws == BDI_S || from_ch_ws == BDI_WS)
		return BDI_N;
	return from_ch_ws;
}

/* Split fragments into single scripts (or punctuation + single script) */
static void
split_at_script(const uint32_t *fragment,
		size_t fragment_len,
		int level,
		void *arg,
		fz_bidi_fragment_fn *callback)
{
	int script = UCDN_SCRIPT_COMMON;
	size_t script_start, i;

	script_start = 0;
	for (i = 0; i < fragment_len; i++)
	{
		int s = ucdn_get_script(fragment[i]);
		if (s == UCDN_SCRIPT_COMMON || s == UCDN_SCRIPT_INHERITED)
		{
			/* Punctuation etc. This is fine. */
		}
		else if (s == script)
		{
			/* Same script. Still fine. */
		}
		else if (script == UCDN_SCRIPT_COMMON || script == UCDN_SCRIPT_INHERITED)
		{
			/* First non punctuation thing. Set the script. */
			script = s;
		}
		else
		{
			/* Change of script. Break the fragment. */
			(*callback)(&fragment[script_start], i - script_start, level, script, arg);
			script_start = i;
			script = s;
		}
	}
	if (script_start != fragment_len)
	{
		(*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg);
	}
}

/* Determines the character classes for all following
 * passes of the algorithm. A character class is basically the type of Bidi
 * behaviour that the character exhibits.
 */
static void
classify_characters(const uint32_t *text,
		fz_bidi_chartype *types,
		size_t len,
		fz_bidi_flags flags)
{
	size_t i;

	if ((flags & FZ_BIDI_CLASSIFY_WHITE_SPACE)!=0)
	{
		for (i = 0; i < len; i++)
		{
			types[i] = class_from_ch_ws(text[i]);
		}
	}
	else
	{
#ifdef DEBUG_BIDI_VERBOSE
		fprintf(stderr, "Text:  ");
		for (i = 0; i < len; i++)
		{
			/* So that we can actually sort of read the debug string, any
			 * non-ascii characters are replaced with a 1-digit hash
			 * value from 0-9, making non-english characters appear
			 * as numbers
			 */
			fprintf(stderr, "%c", (text[i] <= 127 && text[i] >= 32) ?
					text[i] : text[i] % 9 + '0');
		}
		fprintf(stderr, "\nTypes: ");
#endif
		for (i = 0; i < len; i++)
		{
			types[i] = class_from_ch_n(text[i]);
#ifdef DEBUG_BIDI_VERBOSE
			fprintf(stderr, "%c", char_from_types[(int)types[i]]);
#endif
		}
#ifdef DEBUG_BIDI_VERBOSE
		fprintf(stderr, "\n");
#endif
	}
}

/* Determines the base level of the text.
 * Implements rule P2 of the Unicode Bidi Algorithm.
 * Note: Ignores explicit embeddings
 */
static fz_bidi_level base_level_from_text(fz_bidi_chartype *types, size_t len)
{
	size_t i;

	for (i = 0; i < len; i++)
	{
		switch (types[i])
		{
		/* strong left */
		case BDI_L:
			return FZ_BIDI_LTR;

		/* strong right */
		case BDI_R:
		case BDI_AL:
			return FZ_BIDI_RTL;
		}
	}
	return FZ_BIDI_LTR;
}

static fz_bidi_direction direction_from_type(fz_bidi_chartype type)
{
	switch (type)
	{
	case BDI_L:
	case BDI_EN:
		return FZ_BIDI_LTR;

	case BDI_R:
	case BDI_AL:
		return FZ_BIDI_RTL;

	default:
		return FZ_BIDI_NEUTRAL;
	}
}

static void
classify_quoted_blocks(const uint32_t *text,
		fz_bidi_chartype *types,
		size_t len)
{
	size_t i;
	int inQuote = FALSE;
	int pdfNeeded = FALSE;
	int ltrFound = FALSE;
	int rtlFound = FALSE;

	/* Only do anything special here if there is mixed content
	 * (LTR *and* RTL) in the text.
	 */
	for (i = 0; i < len; i++)
	{
		switch (direction_from_type(types[i]))
		{
		case FZ_BIDI_LTR:
			ltrFound = TRUE;
			break;

		case FZ_BIDI_RTL:
			rtlFound = TRUE;
			break;

		default:
			break;
		}
	}

	/* Only make any changes if *both* LTR and RTL characters exist
	 * in this text.
	 */
	if (!ltrFound || !rtlFound)
	{
		return;
	}

	for (i = 0; i < len; i++)
	{
		if (text[i]=='"')
		{
			/* If we're already in a quote then terminate it,
			 * else start a new block.
			 */
			if (inQuote)
			{
				inQuote = FALSE;
				if (pdfNeeded)
				{
					pdfNeeded = FALSE;
					types[i] = BDI_PDF;
				}
			}
			else
			{
				size_t j;
				int done = FALSE;

				inQuote = TRUE;

				/* Find the first strong right or left type and
				 * use that to determine whether we should classify
				 * the quote as LRE or RLE. Or neither, if we
				 * hit another quote before any strongly-directional
				 * character.
				 */
				for (j = i + 1; !done && (j < len) && text[j] != '"'; ++j)
				{
					switch(types[j])
					{
					case BDI_RLE:
					case BDI_LRE:
						done = TRUE;
						break;

					case BDI_L:
					case BDI_EN:
						types[i] = BDI_LRE;
						pdfNeeded = TRUE;
						done = TRUE;
						break;

					case BDI_R:
					case BDI_AL:
						types[i] = BDI_RLE;
						pdfNeeded = TRUE;
						done = TRUE;
						break;

					default:
						break;
					}
				}
			}
		}
	}
}

/* Creates a buffer with an embedding level for every character in the
 * given text. Also determines the base level and returns it in
 * *baseDir if *baseDir does not initially contain a valid direction.
 */
static fz_bidi_level *
create_levels(fz_context *ctx,
		const uint32_t *text,
		size_t len,
		fz_bidi_direction *baseDir,
		int resolveWhiteSpace,
		int flags)
{
	fz_bidi_level *levels, *plevels;
	fz_bidi_chartype *types = NULL;
	fz_bidi_chartype *ptypes;
	fz_bidi_level baseLevel;
	const uint32_t *ptext;
	size_t plen, remaining;

	levels = fz_malloc(ctx, len * sizeof(*levels));

	fz_var(types);

	fz_try(ctx)
	{
		types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype));

		classify_characters(text, types, len, flags);

		if (*baseDir != FZ_BIDI_LTR && *baseDir != FZ_BIDI_RTL)
		{
			/* Derive the base level from the text and
			 * update *baseDir in case the caller wants to know.
			 */
			baseLevel = base_level_from_text(types, len);
			*baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR;
		}
		else
		{
			baseLevel = (fz_bidi_level)*baseDir;
		}

		{
			/* Replace tab with base direction, i.e. make tab appear as
			 * 'strong left' if the base direction is left-to-right and
			 * 'strong right' if base direction is right-to-left. This
			 * allows Layout to implicitly treat tabs as 'segment separators'.
			 */
			size_t i;

			for (i = 0u; i < len; i++)
			{
				if (text[i]=='\t')
				{
					types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L;
				}
			}
		}

		/* Look for quotation marks. Classify them as RLE or LRE
		 * or leave them alone, depending on what follows them.
		 */
		classify_quoted_blocks(text, types, len);

		/* Work one paragraph at a time. */
		plevels = levels;
		ptypes = types;
		ptext = text;
		remaining = len;
		while (remaining)
		{
			plen = fz_bidi_resolve_paragraphs(ptypes, remaining);

			/* Work out the levels and character types... */
			(void)fz_bidi_resolve_explicit(baseLevel, BDI_N, ptypes, plevels, plen, 0);
			fz_bidi_resolve_weak(ctx, baseLevel, ptypes, plevels, plen);
			fz_bidi_resolve_neutrals(baseLevel, ptypes, plevels, plen);
			fz_bidi_resolve_implicit(ptypes, plevels, plen);

			classify_characters(ptext, ptypes, plen, FZ_BIDI_CLASSIFY_WHITE_SPACE);

			if (resolveWhiteSpace)
			{
				/* resolve whitespace */
				fz_bidi_resolve_whitespace(baseLevel, ptypes, plevels, plen);
			}

			plevels += plen;
			ptypes += plen;
			ptext += plen;
			remaining -= plen;
		}

		/* The levels buffer now has odd and even numbers indicating
		 * rtl or ltr characters, respectively.
		 */
#ifdef DEBUG_BIDI_VERBOSE
		fprintf(stderr, "Levels: ");
		{
			size_t i;
			for (i = 0; i < len; i++)
			{
				fprintf(stderr, "%d", levels[i]>9?0:levels[i]);
			}
			fprintf(stderr, "\n");
		}
#endif
	}
	fz_always(ctx)
	{
		fz_free(ctx, types);
	}
	fz_catch(ctx)
	{
		fz_free(ctx, levels);
		fz_rethrow(ctx);
	}
	return levels;
}

/* Partitions the given character sequence into one or more unidirectional
 * fragments and invokes the given callback function for each fragment.
 */
void fz_bidi_fragment_text(fz_context *ctx,
		const uint32_t *text,
		size_t textlen,
		fz_bidi_direction *baseDir,
		fz_bidi_fragment_fn *callback,
		void *arg,
		int flags)
{
	size_t startOfFragment;
	size_t i;
	fz_bidi_level *levels;

	if (text == NULL || callback == NULL || textlen == 0)
		return;

	DBUGH(("fz_bidi_fragment_text('%S', len = %d)\n", text, textlen));

	levels = create_levels(ctx, text, textlen, baseDir, FALSE, flags);

	/* We now have an array with an embedding level
	 * for each character in text.
	 */
	assert(levels != NULL);

	fz_try(ctx)
	{
		startOfFragment = 0;
		for (i = 1; i < textlen; i++)
		{
			if (levels[i] != levels[i-1])
			{
				/* We've gone past the end of the fragment.
				 * Create a text object for it, then start
				 * a new fragment.
				 */
				split_at_script(&text[startOfFragment],
						i - startOfFragment,
						levels[startOfFragment],
						arg,
						callback);
				startOfFragment = i;
			}
		}
		/* Now i == textlen. Deal with the final (or maybe only) fragment. */
		/* otherwise create 1 fragment */
		split_at_script(&text[startOfFragment],
				i - startOfFragment,
				levels[startOfFragment],
				arg,
				callback);
	}
	fz_always(ctx)
	{
		fz_free(ctx, levels);
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}
}
Squashed 'mupdf/' content from commit 340abaf66 git-subtree-dir: mupdf git-subtree-split: 340abaf66f43a04477866e5df8ba9726709f43f4 2019-10-30 16:48:08 +00:00			`/*`
			`* Bidirectional text processing.`
			`*`
			`* Processes unicode text by arranging the characters into an order suitable`
			`* for display. E.g. Hebrew text will be arranged from right-to-left and`
			`* any English within the text will remain in the left-to-right order.`
			`* Characters such as parenthesis will be substituted for their mirrored`
			`* equivalents if they are part of text which must be reversed.`
			`*`
			`* This is an implementation of the unicode Bidirectional Algorithm which`
			`* can be found here: http://www.unicode.org/reports/tr9/ and is based`
			`* on the reference implementation of the algorithm found on that page.`
			`*`
			`* For a nice overview of how it works, read this...`
			`* http://www.w3.org/TR/REC-html40/struct/dirlang.html`
			`*`
			`* Extracted from the SmartOffice code, where it was modified by Ian`
			`* Beveridge.`
			`*`
			`* Copyright (C) Picsel, 2004. All Rights Reserved.`
			`*/`

			`/*`
			`* Original copyright notice from unicode reference implementation.`
			`* ----------------------------------------------------------------`
			`* Written by: Asmus Freytag`
			`* C++ and Windows dependencies removed, and`
			`* command line interface added by: Rick McGowan`
			`*`
			`* Copyright (C) 1999, ASMUS, Inc. All Rights Reserved`
			`*/`

			`/*`
			`* Includes...`
			`*/`

			`#include "mupdf/fitz.h"`
			`#include "mupdf/ucdn.h"`
			`#include "bidi-imp.h" /* standard bidi code interface */`
			`#include <assert.h>`

			`/*`
			`* Macros...`
			`*/`

			`#define ODD(x) ((x) & 1)`

			`#define REPLACEABLE_TYPE(t) ( \`
			`((t)==BDI_ES) \|\| ((t)==BDI_ET) \|\| ((t)==BDI_CS) \|\| \`
			`((t)==BDI_NSM) \|\| ((t)==BDI_PDF) \|\| ((t)==BDI_BN) \|\| \`
			`((t)==BDI_S) \|\| ((t)==BDI_WS) \|\| ((t)==BDI_N) )`

			`#ifdef DEBUG_BIDI_VERBOSE`
			`#define DBUGVF(params) do { fz_warn params; } while (0)`
			`#else`
			`#define DBUGVF(params) do {} while (0)`
			`#endif`

			`#ifdef DEBUG_BIDI_OUTLINE`
			`#define DBUGH(params) do { fz_warn params; } while (0)`
			`#else`
			`#define DBUGH(params) do {} while (0)`
			`#endif`

			`#define UNICODE_EOS 0`
			`#define UNICODE_DIGIT_ZERO 0x0030`
			`#define UNICODE_DIGIT_NINE 0x0039`
			`#define UNICODE_SUPERSCRIPT_TWO 0x00B2`
			`#define UNICODE_SUPERSCRIPT_THREE 0x00B3`
			`#define UNICODE_SUPERSCRIPT_ONE 0x00B9`
			`#define UNICODE_RTL_START 0x0590`
			`#define UNICODE_RTL_END 0x07BF`
			`#define UNICODE_ARABIC_INDIC_DIGIT_ZERO 0x0660`
			`#define UNICODE_ARABIC_INDIC_DIGIT_NINE 0x0669`
			`#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_ZERO 0x06F0`
			`#define UNICODE_EXTENDED_ARABIC_INDIC_DIGIT_NINE 0x06F9`
			`#define UNICODE_ZERO_WIDTH_NON_JOINER 0x200C`
			`#define UNICODE_SUPERSCRIPT_ZERO 0x2070`
			`#define UNICODE_SUPERSCRIPT_FOUR 0x2074`
			`#define UNICODE_SUPERSCRIPT_NINE 0x2079`
			`#define UNICODE_SUBSCRIPT_ZERO 0x2080`
			`#define UNICODE_SUBSCRIPT_NINE 0x2089`
			`#define UNICODE_CIRCLED_DIGIT_ONE 0x2460`
			`#define UNICODE_NUMBER_TWENTY_FULL_STOP 0x249B`
			`#define UNICODE_CIRCLED_DIGIT_ZERO 0x24EA`
			`#define UNICODE_FULLWIDTH_DIGIT_ZERO 0xFF10`
			`#define UNICODE_FULLWIDTH_DIGIT_NINE 0xFF19`

			`#ifndef TRUE`
			`#define TRUE (1)`
			`#endif`
			`#ifndef FALSE`
			`#define FALSE (0)`
			`#endif`

			`/*`
			`* Enumerations...`
			`*/`

			`#ifdef DEBUG_BIDI_VERBOSE`
			`/* display support: */`
			`static const char char_from_types[] =`
			`{`
			`' ', /* ON */`
			`'>', /* L */`
			`'<', /* R */`
			`'9', /* AN */`
			`'1', /* EN */`
			`'a', /* AL */`
			`'@', /* NSM */`
			`'.', /* CS */`
			`',', /* ES */`
			`'$', /* ET */`
			`':', /* BN */`
			`'X', /* S */`
			`'_', /* WS */`
			`'B', /* B */`
			`'+', /* RLO */`
			`'+', /* RLE */`
			`'+', /* LRO */`
			`'+', /* LRE */`
			`'-', /* PDF */`
			`'=' /* LS */`
			`};`
			`#endif`

			`/*`
			`* Functions and static functions...`
			`*/`

			`/* UCDN uses a different ordering than Bidi does. We cannot`
			`* change to the UCDN ordering, as the bidi-std.c code relies`
			`* on the exact ordering (at least that N = ON = 0). We`
			`* therefore map between the two using this small table. It`
			`* also takes care of fudging LRI, RLI, FSI and PDI, that this`
			`* code does not currently support. */`
			`static const uint8_t ucdn_to_bidi[] =`
			`{`
			`BDI_L, /* UCDN_BIDI_CLASS_L = 0 */`
			`BDI_LRE, /* UCDN_BIDI_CLASS_LRE = 1 */`
			`BDI_LRO, /* UCDN_BIDI_CLASS_LRO = 2 */`
			`BDI_R, /* UCDN_BIDI_CLASS_R = 3 */`
			`BDI_AL, /* UCDN_BIDI_CLASS_AL = 4 */`
			`BDI_RLE, /* UCDN_BIDI_CLASS_RLE = 5 */`
			`BDI_RLO, /* UCDN_BIDI_CLASS_RLO = 6 */`
			`BDI_PDF, /* UCDN_BIDI_CLASS_PDF = 7 */`
			`BDI_EN, /* UCDN_BIDI_CLASS_EN = 8 */`
			`BDI_ES, /* UCDN_BIDI_CLASS_ES = 9 */`
			`BDI_ET, /* UCDN_BIDI_CLASS_ET = 10 */`
			`BDI_AN, /* UCDN_BIDI_CLASS_AN = 11 */`
			`BDI_CS, /* UCDN_BIDI_CLASS_CS = 12 */`
			`BDI_NSM, /* UCDN_BIDI_CLASS_NSM = 13 */`
			`BDI_BN, /* UCDN_BIDI_CLASS_BN = 14 */`
			`BDI_B, /* UCDN_BIDI_CLASS_B = 15 */`
			`BDI_S, /* UCDN_BIDI_CLASS_S = 16 */`
			`BDI_WS, /* UCDN_BIDI_CLASS_WS = 17 */`
			`BDI_ON, /* UCDN_BIDI_CLASS_ON = 18 */`
			`BDI_LRE, /* UCDN_BIDI_CLASS_LRI = 19 */`
			`BDI_RLE, /* UCDN_BIDI_CLASS_RLI = 20 */`
			`BDI_N, /* UCDN_BIDI_CLASS_FSI = 21 */`
			`BDI_N, /* UCDN_BIDI_CLASS_PDI = 22 */`
			`};`

			`#define class_from_ch_ws(ch) (ucdn_to_bidi[ucdn_get_bidi_class(ch)])`

			`/* Return a direction for white-space on the second pass of the algorithm. */`
			`static fz_bidi_chartype class_from_ch_n(uint32_t ch)`
			`{`
			`fz_bidi_chartype from_ch_ws = class_from_ch_ws(ch);`
			`if (from_ch_ws == BDI_S \|\| from_ch_ws == BDI_WS)`
			`return BDI_N;`
			`return from_ch_ws;`
			`}`

			`/* Split fragments into single scripts (or punctuation + single script) */`
			`static void`
			`split_at_script(const uint32_t *fragment,`
			`size_t fragment_len,`
			`int level,`
			`void *arg,`
			`fz_bidi_fragment_fn *callback)`
			`{`
			`int script = UCDN_SCRIPT_COMMON;`
			`size_t script_start, i;`

			`script_start = 0;`
			`for (i = 0; i < fragment_len; i++)`
			`{`
			`int s = ucdn_get_script(fragment[i]);`
			`if (s == UCDN_SCRIPT_COMMON \|\| s == UCDN_SCRIPT_INHERITED)`
			`{`
			`/* Punctuation etc. This is fine. */`
			`}`
			`else if (s == script)`
			`{`
			`/* Same script. Still fine. */`
			`}`
			`else if (script == UCDN_SCRIPT_COMMON \|\| script == UCDN_SCRIPT_INHERITED)`
			`{`
			`/* First non punctuation thing. Set the script. */`
			`script = s;`
			`}`
			`else`
			`{`
			`/* Change of script. Break the fragment. */`
			`(*callback)(&fragment[script_start], i - script_start, level, script, arg);`
			`script_start = i;`
			`script = s;`
			`}`
			`}`
			`if (script_start != fragment_len)`
			`{`
			`(*callback)(&fragment[script_start], fragment_len - script_start, level, script, arg);`
			`}`
			`}`

			`/* Determines the character classes for all following`
			`* passes of the algorithm. A character class is basically the type of Bidi`
			`* behaviour that the character exhibits.`
			`*/`
			`static void`
			`classify_characters(const uint32_t *text,`
			`fz_bidi_chartype *types,`
			`size_t len,`
			`fz_bidi_flags flags)`
			`{`
			`size_t i;`

			`if ((flags & FZ_BIDI_CLASSIFY_WHITE_SPACE)!=0)`
			`{`
			`for (i = 0; i < len; i++)`
			`{`
			`types[i] = class_from_ch_ws(text[i]);`
			`}`
			`}`
			`else`
			`{`
			`#ifdef DEBUG_BIDI_VERBOSE`
			`fprintf(stderr, "Text: ");`
			`for (i = 0; i < len; i++)`
			`{`
			`/* So that we can actually sort of read the debug string, any`
			`* non-ascii characters are replaced with a 1-digit hash`
			`* value from 0-9, making non-english characters appear`
			`* as numbers`
			`*/`
			`fprintf(stderr, "%c", (text[i] <= 127 && text[i] >= 32) ?`
			`text[i] : text[i] % 9 + '0');`
			`}`
			`fprintf(stderr, "\nTypes: ");`
			`#endif`
			`for (i = 0; i < len; i++)`
			`{`
			`types[i] = class_from_ch_n(text[i]);`
			`#ifdef DEBUG_BIDI_VERBOSE`
			`fprintf(stderr, "%c", char_from_types[(int)types[i]]);`
			`#endif`
			`}`
			`#ifdef DEBUG_BIDI_VERBOSE`
			`fprintf(stderr, "\n");`
			`#endif`
			`}`
			`}`

			`/* Determines the base level of the text.`
			`* Implements rule P2 of the Unicode Bidi Algorithm.`
			`* Note: Ignores explicit embeddings`
			`*/`
			`static fz_bidi_level base_level_from_text(fz_bidi_chartype *types, size_t len)`
			`{`
			`size_t i;`

			`for (i = 0; i < len; i++)`
			`{`
			`switch (types[i])`
			`{`
			`/* strong left */`
			`case BDI_L:`
			`return FZ_BIDI_LTR;`

			`/* strong right */`
			`case BDI_R:`
			`case BDI_AL:`
			`return FZ_BIDI_RTL;`
			`}`
			`}`
			`return FZ_BIDI_LTR;`
			`}`

			`static fz_bidi_direction direction_from_type(fz_bidi_chartype type)`
			`{`
			`switch (type)`
			`{`
			`case BDI_L:`
			`case BDI_EN:`
			`return FZ_BIDI_LTR;`

			`case BDI_R:`
			`case BDI_AL:`
			`return FZ_BIDI_RTL;`

			`default:`
			`return FZ_BIDI_NEUTRAL;`
			`}`
			`}`

			`static void`
			`classify_quoted_blocks(const uint32_t *text,`
			`fz_bidi_chartype *types,`
			`size_t len)`
			`{`
			`size_t i;`
			`int inQuote = FALSE;`
			`int pdfNeeded = FALSE;`
			`int ltrFound = FALSE;`
			`int rtlFound = FALSE;`

			`/* Only do anything special here if there is mixed content`
			`* (LTR and RTL) in the text.`
			`*/`
			`for (i = 0; i < len; i++)`
			`{`
			`switch (direction_from_type(types[i]))`
			`{`
			`case FZ_BIDI_LTR:`
			`ltrFound = TRUE;`
			`break;`

			`case FZ_BIDI_RTL:`
			`rtlFound = TRUE;`
			`break;`

			`default:`
			`break;`
			`}`
			`}`

			`/* Only make any changes if both LTR and RTL characters exist`
			`* in this text.`
			`*/`
			`if (!ltrFound \|\| !rtlFound)`
			`{`
			`return;`
			`}`

			`for (i = 0; i < len; i++)`
			`{`
			`if (text[i]=='"')`
			`{`
			`/* If we're already in a quote then terminate it,`
			`* else start a new block.`
			`*/`
			`if (inQuote)`
			`{`
			`inQuote = FALSE;`
			`if (pdfNeeded)`
			`{`
			`pdfNeeded = FALSE;`
			`types[i] = BDI_PDF;`
			`}`
			`}`
			`else`
			`{`
			`size_t j;`
			`int done = FALSE;`

			`inQuote = TRUE;`

			`/* Find the first strong right or left type and`
			`* use that to determine whether we should classify`
			`* the quote as LRE or RLE. Or neither, if we`
			`* hit another quote before any strongly-directional`
			`* character.`
			`*/`
			`for (j = i + 1; !done && (j < len) && text[j] != '"'; ++j)`
			`{`
			`switch(types[j])`
			`{`
			`case BDI_RLE:`
			`case BDI_LRE:`
			`done = TRUE;`
			`break;`

			`case BDI_L:`
			`case BDI_EN:`
			`types[i] = BDI_LRE;`
			`pdfNeeded = TRUE;`
			`done = TRUE;`
			`break;`

			`case BDI_R:`
			`case BDI_AL:`
			`types[i] = BDI_RLE;`
			`pdfNeeded = TRUE;`
			`done = TRUE;`
			`break;`

			`default:`
			`break;`
			`}`
			`}`
			`}`
			`}`
			`}`
			`}`

			`/* Creates a buffer with an embedding level for every character in the`
			`* given text. Also determines the base level and returns it in`
			`* baseDir if baseDir does not initially contain a valid direction.`
			`*/`
			`static fz_bidi_level *`
			`create_levels(fz_context *ctx,`
			`const uint32_t *text,`
			`size_t len,`
			`fz_bidi_direction *baseDir,`
			`int resolveWhiteSpace,`
			`int flags)`
			`{`
			`fz_bidi_level levels, plevels;`
			`fz_bidi_chartype *types = NULL;`
			`fz_bidi_chartype *ptypes;`
			`fz_bidi_level baseLevel;`
			`const uint32_t *ptext;`
			`size_t plen, remaining;`

			`levels = fz_malloc(ctx, len * sizeof(*levels));`

			`fz_var(types);`

			`fz_try(ctx)`
			`{`
			`types = fz_malloc(ctx, len * sizeof(fz_bidi_chartype));`

			`classify_characters(text, types, len, flags);`

			`if (baseDir != FZ_BIDI_LTR && baseDir != FZ_BIDI_RTL)`
			`{`
			`/* Derive the base level from the text and`
			`* update *baseDir in case the caller wants to know.`
			`*/`
			`baseLevel = base_level_from_text(types, len);`
			`*baseDir = ODD(baseLevel)==1 ? FZ_BIDI_RTL : FZ_BIDI_LTR;`
			`}`
			`else`
			`{`
			`baseLevel = (fz_bidi_level)*baseDir;`
			`}`

			`{`
			`/* Replace tab with base direction, i.e. make tab appear as`
			`* 'strong left' if the base direction is left-to-right and`
			`* 'strong right' if base direction is right-to-left. This`
			`* allows Layout to implicitly treat tabs as 'segment separators'.`
			`*/`
			`size_t i;`

			`for (i = 0u; i < len; i++)`
			`{`
			`if (text[i]=='\t')`
			`{`
			`types[i] = (*baseDir == FZ_BIDI_RTL) ? BDI_R : BDI_L;`
			`}`
			`}`
			`}`

			`/* Look for quotation marks. Classify them as RLE or LRE`
			`* or leave them alone, depending on what follows them.`
			`*/`
			`classify_quoted_blocks(text, types, len);`

			`/* Work one paragraph at a time. */`
			`plevels = levels;`
			`ptypes = types;`
			`ptext = text;`
			`remaining = len;`
			`while (remaining)`
			`{`
			`plen = fz_bidi_resolve_paragraphs(ptypes, remaining);`

			`/* Work out the levels and character types... */`
			`(void)fz_bidi_resolve_explicit(baseLevel, BDI_N, ptypes, plevels, plen, 0);`
			`fz_bidi_resolve_weak(ctx, baseLevel, ptypes, plevels, plen);`
			`fz_bidi_resolve_neutrals(baseLevel, ptypes, plevels, plen);`
			`fz_bidi_resolve_implicit(ptypes, plevels, plen);`

			`classify_characters(ptext, ptypes, plen, FZ_BIDI_CLASSIFY_WHITE_SPACE);`

			`if (resolveWhiteSpace)`
			`{`
			`/* resolve whitespace */`
			`fz_bidi_resolve_whitespace(baseLevel, ptypes, plevels, plen);`
			`}`

			`plevels += plen;`
			`ptypes += plen;`
			`ptext += plen;`
			`remaining -= plen;`
			`}`

			`/* The levels buffer now has odd and even numbers indicating`
			`* rtl or ltr characters, respectively.`
			`*/`
			`#ifdef DEBUG_BIDI_VERBOSE`
			`fprintf(stderr, "Levels: ");`
			`{`
			`size_t i;`
			`for (i = 0; i < len; i++)`
			`{`
			`fprintf(stderr, "%d", levels[i]>9?0:levels[i]);`
			`}`
			`fprintf(stderr, "\n");`
			`}`
			`#endif`
			`}`
			`fz_always(ctx)`
			`{`
			`fz_free(ctx, types);`
			`}`
			`fz_catch(ctx)`
			`{`
			`fz_free(ctx, levels);`
			`fz_rethrow(ctx);`
			`}`
			`return levels;`
			`}`

			`/* Partitions the given character sequence into one or more unidirectional`
			`* fragments and invokes the given callback function for each fragment.`
			`*/`
			`void fz_bidi_fragment_text(fz_context *ctx,`
			`const uint32_t *text,`
			`size_t textlen,`
			`fz_bidi_direction *baseDir,`
			`fz_bidi_fragment_fn *callback,`
			`void *arg,`
			`int flags)`
			`{`
			`size_t startOfFragment;`
			`size_t i;`
			`fz_bidi_level *levels;`

			`if (text == NULL \|\| callback == NULL \|\| textlen == 0)`
			`return;`

			`DBUGH(("fz_bidi_fragment_text('%S', len = %d)\n", text, textlen));`

			`levels = create_levels(ctx, text, textlen, baseDir, FALSE, flags);`

			`/* We now have an array with an embedding level`
			`* for each character in text.`
			`*/`
			`assert(levels != NULL);`

			`fz_try(ctx)`
			`{`
			`startOfFragment = 0;`
			`for (i = 1; i < textlen; i++)`
			`{`
			`if (levels[i] != levels[i-1])`
			`{`
			`/* We've gone past the end of the fragment.`
			`* Create a text object for it, then start`
			`* a new fragment.`
			`*/`
			`split_at_script(&text[startOfFragment],`
			`i - startOfFragment,`
			`levels[startOfFragment],`
			`arg,`
			`callback);`
			`startOfFragment = i;`
			`}`
			`}`
			`/* Now i == textlen. Deal with the final (or maybe only) fragment. */`
			`/* otherwise create 1 fragment */`
			`split_at_script(&text[startOfFragment],`
			`i - startOfFragment,`
			`levels[startOfFragment],`
			`arg,`
			`callback);`
			`}`
			`fz_always(ctx)`
			`{`
			`fz_free(ctx, levels);`
			`}`
			`fz_catch(ctx)`
			`{`
			`fz_rethrow(ctx);`
			`}`
			`}`