Initial revision

2026-06-11 09:40:11 -04:00 · 2000-01-25 22:38:34 +00:00 · 2000-01-25 22:38:34 +00:00 · fe98d9fa7b
commit fe98d9fa7b
parent 9fec129997
22 changed files with 10372 additions and 0 deletions
--- a/libraries/liblunicode/ucdata/MUTTUCData.txt
+++ b/libraries/liblunicode/ucdata/MUTTUCData.txt
@ -0,0 +1,303 @@
+#
+# $Id: MUTTUCData.txt,v 1.3 1999/10/29 00:04:35 mleisher Exp $
+#
+# Copyright 1999 Computing Research Labs, New Mexico State University
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+#
+# Implementation specific character properties.
+#
+#
+# Space, other.
+#
+0009;;Ss;;;;;;;;;;;;
+000A;;Ss;;;;;;;;;;;;
+000B;;Ss;;;;;;;;;;;;
+000C;;Ss;;;;;;;;;;;;
+000D;;Ss;;;;;;;;;;;;
+#
+# Non-breaking.
+#
+00A0;;Nb;;;;;;;;;;;;
+2007;;Nb;;;;;;;;;;;;
+2011;;Nb;;;;;;;;;;;;
+FEFF;;Nb;;;;;;;;;;;;
+#
+# Symmetric.
+#
+0028;;Sy;;;;;;;;;;;;
+0029;;Sy;;;;;;;;;;;;
+005B;;Sy;;;;;;;;;;;;
+005D;;Sy;;;;;;;;;;;;
+007B;;Sy;;;;;;;;;;;;
+007D;;Sy;;;;;;;;;;;;
+00AB;;Sy;;;;;;;;;;;;
+00BB;;Sy;;;;;;;;;;;;
+0F3A;;Sy;;;;;;;;;;;;
+0F3B;;Sy;;;;;;;;;;;;
+0F3C;;Sy;;;;;;;;;;;;
+0F3D;;Sy;;;;;;;;;;;;
+0F3E;;Sy;;;;;;;;;;;;
+0F3F;;Sy;;;;;;;;;;;;
+2018;;Sy;;;;;;;;;;;;
+2019;;Sy;;;;;;;;;;;;
+201A;;Sy;;;;;;;;;;;;
+201B;;Sy;;;;;;;;;;;;
+201C;;Sy;;;;;;;;;;;;
+201D;;Sy;;;;;;;;;;;;
+201E;;Sy;;;;;;;;;;;;
+201F;;Sy;;;;;;;;;;;;
+2039;;Sy;;;;;;;;;;;;
+203A;;Sy;;;;;;;;;;;;
+2045;;Sy;;;;;;;;;;;;
+2046;;Sy;;;;;;;;;;;;
+207D;;Sy;;;;;;;;;;;;
+207E;;Sy;;;;;;;;;;;;
+208D;;Sy;;;;;;;;;;;;
+208E;;Sy;;;;;;;;;;;;
+2329;;Sy;;;;;;;;;;;;
+232A;;Sy;;;;;;;;;;;;
+3008;;Sy;;;;;;;;;;;;
+3009;;Sy;;;;;;;;;;;;
+300A;;Sy;;;;;;;;;;;;
+300B;;Sy;;;;;;;;;;;;
+300C;;Sy;;;;;;;;;;;;
+300D;;Sy;;;;;;;;;;;;
+300E;;Sy;;;;;;;;;;;;
+300F;;Sy;;;;;;;;;;;;
+3010;;Sy;;;;;;;;;;;;
+3011;;Sy;;;;;;;;;;;;
+3014;;Sy;;;;;;;;;;;;
+3015;;Sy;;;;;;;;;;;;
+3016;;Sy;;;;;;;;;;;;
+3017;;Sy;;;;;;;;;;;;
+3018;;Sy;;;;;;;;;;;;
+3019;;Sy;;;;;;;;;;;;
+301A;;Sy;;;;;;;;;;;;
+301B;;Sy;;;;;;;;;;;;
+301D;;Sy;;;;;;;;;;;;
+301E;;Sy;;;;;;;;;;;;
+301F;;Sy;;;;;;;;;;;;
+FD3E;;Sy;;;;;;;;;;;;
+FD3F;;Sy;;;;;;;;;;;;
+FE35;;Sy;;;;;;;;;;;;
+FE36;;Sy;;;;;;;;;;;;
+FE37;;Sy;;;;;;;;;;;;
+FE38;;Sy;;;;;;;;;;;;
+FE39;;Sy;;;;;;;;;;;;
+FE3A;;Sy;;;;;;;;;;;;
+FE3B;;Sy;;;;;;;;;;;;
+FE3C;;Sy;;;;;;;;;;;;
+FE3D;;Sy;;;;;;;;;;;;
+FE3E;;Sy;;;;;;;;;;;;
+FE3F;;Sy;;;;;;;;;;;;
+FE40;;Sy;;;;;;;;;;;;
+FE41;;Sy;;;;;;;;;;;;
+FE42;;Sy;;;;;;;;;;;;
+FE43;;Sy;;;;;;;;;;;;
+FE44;;Sy;;;;;;;;;;;;
+FE59;;Sy;;;;;;;;;;;;
+FE5A;;Sy;;;;;;;;;;;;
+FE5B;;Sy;;;;;;;;;;;;
+FE5C;;Sy;;;;;;;;;;;;
+FE5D;;Sy;;;;;;;;;;;;
+FE5E;;Sy;;;;;;;;;;;;
+FF08;;Sy;;;;;;;;;;;;
+FF09;;Sy;;;;;;;;;;;;
+FF3B;;Sy;;;;;;;;;;;;
+FF3D;;Sy;;;;;;;;;;;;
+FF5B;;Sy;;;;;;;;;;;;
+FF5D;;Sy;;;;;;;;;;;;
+FF62;;Sy;;;;;;;;;;;;
+FF63;;Sy;;;;;;;;;;;;
+#
+# Hex digit.
+#
+0030;;Hd;;;;;;;;;;;;
+0031;;Hd;;;;;;;;;;;;
+0032;;Hd;;;;;;;;;;;;
+0033;;Hd;;;;;;;;;;;;
+0034;;Hd;;;;;;;;;;;;
+0035;;Hd;;;;;;;;;;;;
+0036;;Hd;;;;;;;;;;;;
+0037;;Hd;;;;;;;;;;;;
+0038;;Hd;;;;;;;;;;;;
+0039;;Hd;;;;;;;;;;;;
+0041;;Hd;;;;;;;;;;;;
+0042;;Hd;;;;;;;;;;;;
+0043;;Hd;;;;;;;;;;;;
+0044;;Hd;;;;;;;;;;;;
+0045;;Hd;;;;;;;;;;;;
+0046;;Hd;;;;;;;;;;;;
+0061;;Hd;;;;;;;;;;;;
+0062;;Hd;;;;;;;;;;;;
+0063;;Hd;;;;;;;;;;;;
+0064;;Hd;;;;;;;;;;;;
+0065;;Hd;;;;;;;;;;;;
+0066;;Hd;;;;;;;;;;;;
+FF10;;Hd;;;;;;;;;;;;
+FF11;;Hd;;;;;;;;;;;;
+FF12;;Hd;;;;;;;;;;;;
+FF13;;Hd;;;;;;;;;;;;
+FF14;;Hd;;;;;;;;;;;;
+FF15;;Hd;;;;;;;;;;;;
+FF16;;Hd;;;;;;;;;;;;
+FF17;;Hd;;;;;;;;;;;;
+FF18;;Hd;;;;;;;;;;;;
+FF19;;Hd;;;;;;;;;;;;
+FF21;;Hd;;;;;;;;;;;;
+FF22;;Hd;;;;;;;;;;;;
+FF23;;Hd;;;;;;;;;;;;
+FF24;;Hd;;;;;;;;;;;;
+FF25;;Hd;;;;;;;;;;;;
+FF26;;Hd;;;;;;;;;;;;
+FF41;;Hd;;;;;;;;;;;;
+FF42;;Hd;;;;;;;;;;;;
+FF43;;Hd;;;;;;;;;;;;
+FF44;;Hd;;;;;;;;;;;;
+FF45;;Hd;;;;;;;;;;;;
+FF46;;Hd;;;;;;;;;;;;
+#
+# Quote marks.
+#
+0022;;Qm;;;;;;;;;;;;
+0027;;Qm;;;;;;;;;;;;
+00AB;;Qm;;;;;;;;;;;;
+00BB;;Qm;;;;;;;;;;;;
+2018;;Qm;;;;;;;;;;;;
+2019;;Qm;;;;;;;;;;;;
+201A;;Qm;;;;;;;;;;;;
+201B;;Qm;;;;;;;;;;;;
+201C;;Qm;;;;;;;;;;;;
+201D;;Qm;;;;;;;;;;;;
+201E;;Qm;;;;;;;;;;;;
+201F;;Qm;;;;;;;;;;;;
+2039;;Qm;;;;;;;;;;;;
+203A;;Qm;;;;;;;;;;;;
+300C;;Qm;;;;;;;;;;;;
+300D;;Qm;;;;;;;;;;;;
+300E;;Qm;;;;;;;;;;;;
+300F;;Qm;;;;;;;;;;;;
+301D;;Qm;;;;;;;;;;;;
+301E;;Qm;;;;;;;;;;;;
+301F;;Qm;;;;;;;;;;;;
+FE41;;Qm;;;;;;;;;;;;
+FE42;;Qm;;;;;;;;;;;;
+FE43;;Qm;;;;;;;;;;;;
+FE44;;Qm;;;;;;;;;;;;
+FF02;;Qm;;;;;;;;;;;;
+FF07;;Qm;;;;;;;;;;;;
+FF62;;Qm;;;;;;;;;;;;
+FF63;;Qm;;;;;;;;;;;;
+#
+# Special Devanagari forms
+#
+E900;DEVANAGARI KSHA LIGATURE;Lo;0;L;0915 094D 0937;;;;N;;;;;
+E901;DEVANAGARI GNYA LIGATURE;Lo;0;L;091C 094D 091E;;;;N;;;;;
+E902;DEVANAGARI TTA LIGATURE;Lo;0;L;0924 094D 0924;;;;N;;;;;
+E903;DEVANAGARI TRA LIGATURE;Lo;0;L;0924 094D 0930;;;;N;;;;;
+E904;DEVANAGARI SHCHA LIGATURE;Lo;0;L;0936 094D 091B;;;;N;;;;;
+E905;DEVANAGARI SHRA LIGATURE;Lo;0;L;0936 094D 0930;;;;N;;;;;
+E906;DEVANAGARI SHVA LIGATURE;Lo;0;L;0936 094D 0935;;;;N;;;;;
+E907;DEVANAGARI KRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E908;DEVANAGARI JRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E909;DEVANAGARI ZRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E90A;DEVANAGARI PHRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E90B;DEVANAGARI FRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E90C;DEVANAGARI PRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E90D;DEVANAGARI SRA LIGATURE;Lo;0;L;;;;;N;;;;;
+E90E;DEVANAGARI RU LIGATURE;Lo;0;L;;;;;N;;;;;
+E90F;DEVANAGARI RUU LIGATURE;Lo;0;L;;;;;N;;;;;
+E915;DEVANAGARI HALF LETTER KA;Lo;0;L;;;;;N;;;;;
+E916;DEVANAGARI HALF LETTER KHA;Lo;0;L;;;;;N;;;;;
+E917;DEVANAGARI HALF LETTER GA;Lo;0;L;;;;;N;;;;;
+E918;DEVANAGARI HALF LETTER GHA;Lo;0;L;;;;;N;;;;;
+E919;DEVANAGARI HALF LETTER NGA;Lo;0;L;;;;;N;;;;;
+E91A;DEVANAGARI HALF LETTER CA;Lo;0;L;;;;;N;;;;;
+E91B;DEVANAGARI HALF LETTER CHA;Lo;0;L;;;;;N;;;;;
+E91C;DEVANAGARI HALF LETTER JA;Lo;0;L;;;;;N;;;;;
+E91D;DEVANAGARI HALF LETTER JHA;Lo;0;L;;;;;N;;;;;
+E91E;DEVANAGARI HALF LETTER NYA;Lo;0;L;;;;;N;;;;;
+E91F;DEVANAGARI HALF LETTER TTA;Lo;0;L;;;;;N;;;;;
+E920;DEVANAGARI HALF LETTER TTHA;Lo;0;L;;;;;N;;;;;
+E921;DEVANAGARI HALF LETTER DDA;Lo;0;L;;;;;N;;;;;
+E922;DEVANAGARI HALF LETTER DDHA;Lo;0;L;;;;;N;;;;;
+E923;DEVANAGARI HALF LETTER NNA;Lo;0;L;;;;;N;;;;;
+E924;DEVANAGARI HALF LETTER TA;Lo;0;L;;;;;N;;;;;
+E925;DEVANAGARI HALF LETTER THA;Lo;0;L;;;;;N;;;;;
+E926;DEVANAGARI HALF LETTER DA;Lo;0;L;;;;;N;;;;;
+E927;DEVANAGARI HALF LETTER DHA;Lo;0;L;;;;;N;;;;;
+E928;DEVANAGARI HALF LETTER NA;Lo;0;L;;;;;N;;;;;
+E929;DEVANAGARI HALF LETTER NNNA;Lo;0;L;0928 093C;;;;N;;;;;
+E92A;DEVANAGARI HALF LETTER PA;Lo;0;L;;;;;N;;;;;
+E92B;DEVANAGARI HALF LETTER PHA;Lo;0;L;;;;;N;;;;;
+E92C;DEVANAGARI HALF LETTER BA;Lo;0;L;;;;;N;;;;;
+E92D;DEVANAGARI HALF LETTER BHA;Lo;0;L;;;;;N;;;;;
+E92E;DEVANAGARI HALF LETTER MA;Lo;0;L;;;;;N;;;;;
+E92F;DEVANAGARI HALF LETTER YA;Lo;0;L;;;;;N;;;;;
+E930;DEVANAGARI HALF LETTER RA;Lo;0;L;;;;;N;;;;;
+E931;DEVANAGARI HALF LETTER RRA;Lo;0;L;0930 093C;;;;N;;;;;
+E932;DEVANAGARI HALF LETTER LA;Lo;0;L;;;;;N;;;;;
+E933;DEVANAGARI HALF LETTER LLA;Lo;0;L;;;;;N;;;;;
+E934;DEVANAGARI HALF LETTER LLLA;Lo;0;L;0933 093C;;;;N;;;;;
+E935;DEVANAGARI HALF LETTER VA;Lo;0;L;;;;;N;;;;;
+E936;DEVANAGARI HALF LETTER SHA;Lo;0;L;;;;;N;;;;;
+E937;DEVANAGARI HALF LETTER SSA;Lo;0;L;;;;;N;;;;;
+E938;DEVANAGARI HALF LETTER SA;Lo;0;L;;;;;N;;;;;
+E939;DEVANAGARI HALF LETTER HA;Lo;0;L;;;;;N;;;;;
+E940;DEVANAGARI KKA LIGATURE;Lo;0;L;0915 094D 0915;;;;N;;;;;
+E941;DEVANAGARI KTA LIGATURE;Lo;0;L;0915 094D 0924;;;;N;;;;;
+E942;DEVANAGARI NGKA LIGATURE;Lo;0;L;0919 094D 0915;;;;N;;;;;
+E943;DEVANAGARI NGKHA LIGATURE;Lo;0;L;0919 094D 0916;;;;N;;;;;
+E944;DEVANAGARI NGGA LIGATURE;Lo;0;L;0919 094D 0917;;;;N;;;;;
+E945;DEVANAGARI NGGHA LIGATURE;Lo;0;L;0919 094D 0918;;;;N;;;;;
+E946;DEVANAGARI NYJA LIGATURE;Lo;0;L;091E 094D 091C;;;;N;;;;;
+E947;DEVANAGARI DGHA LIGATURE;Lo;0;L;0926 094D 0918;;;;N;;;;;
+E948;DEVANAGARI DDA LIGATURE;Lo;0;L;0926 094D 0926;;;;N;;;;;
+E949;DEVANAGARI DDHA LIGATURE;Lo;0;L;0926 094D 0927;;;;N;;;;;
+E94A;DEVANAGARI DBA LIGATURE;Lo;0;L;0926 094D 092C;;;;N;;;;;
+E94B;DEVANAGARI DBHA LIGATURE;Lo;0;L;0926 094D 092D;;;;N;;;;;
+E94C;DEVANAGARI DMA LIGATURE;Lo;0;L;0926 094D 092E;;;;N;;;;;
+E94D;DEVANAGARI DYA LIGATURE;Lo;0;L;0926 094D 092F;;;;N;;;;;
+E94E;DEVANAGARI DVA LIGATURE;Lo;0;L;0926 094D 0935;;;;N;;;;;
+E94F;DEVANAGARI TT-TTA LIGATURE;Lo;0;L;091F 094D 091F;;;;N;;;;;
+E950;DEVANAGARI TT-TTHA LIGATURE;Lo;0;L;091F 094D 0920;;;;N;;;;;
+E951;DEVANAGARI TTH-TTHA LIGATURE;Lo;0;L;0920 094D 0920;;;;N;;;;;
+E952;DEVANAGARI DD-GA LIGATURE;Lo;0;L;0921 094D 0917;;;;N;;;;;
+E953;DEVANAGARI DD-DDA LIGATURE;Lo;0;L;0921 094D 0921;;;;N;;;;;
+E954;DEVANAGARI DD-DDHA LIGATURE;Lo;0;L;0921 094D 0922;;;;N;;;;;
+E955;DEVANAGARI NNA LIGATURE;Lo;0;L;0928 094D 0928;;;;N;;;;;
+E956;DEVANAGARI HMA LIGATURE;Lo;0;L;0939 094D 092E;;;;N;;;;;
+E957;DEVANAGARI HYA LIGATURE;Lo;0;L;0939 094D 092F;;;;N;;;;;
+E958;DEVANAGARI HLA LIGATURE;Lo;0;L;0939 094D 0932;;;;N;;;;;
+E959;DEVANAGARI HVA LIGATURE;Lo;0;L;0939 094D 0935;;;;N;;;;;
+E95A;DEVANAGARI STRA LIGATURE;Lo;0;L;0938 094D 0924 094D 0930;;;;N;;;;;
+E970;DEVANAGARI HALF KSHA LIGATURE;Lo;0;L;0915 094D 0937;;;;N;;;;;
+E971;DEVANAGARI HALF GNYA LIGATURE;Lo;0;L;091C 094D 091E;;;;N;;;;;
+E972;DEVANAGARI HALF TTA LIGATURE;Lo;0;L;0924 094D 0924;;;;N;;;;;
+E973;DEVANAGARI HALF TRA LIGATURE;Lo;0;L;0924 094D 0930;;;;N;;;;;
+E974;DEVANAGARI HALF SHCHA LIGATURE;Lo;0;L;0936 094D 091B;;;;N;;;;;
+E975;DEVANAGARI HALF SHRA LIGATURE;Lo;0;L;0936 094D 0930;;;;N;;;;;
+E976;DEVANAGARI HALF SHVA LIGATURE;Lo;0;L;0936 094D 0935;;;;N;;;;;
+E97B;DEVANAGARI SIGN RRA-REPHA;Mn;36;L;;;;;N;;;;;
+E97C;DEVANAGARI HAR LIGATURE;Lo;0;L;0939 0943;;;;N;;;;;
+E97D;DEVANAGARI SIGN EYELASH RA;Lo;0;L;;;;;N;;;;;
+E97E;DEVANAGARI SIGN REPHA;Mn;36;L;;;;;N;;;;;
+E97F;DEVANAGARI SIGN SUBJOINED RA;Mn;36;L;;;;;N;;;;;
--- a/libraries/liblunicode/ucdata/README
+++ b/libraries/liblunicode/ucdata/README
@ -0,0 +1,300 @@
+#
+# $Id: README,v 1.32 1999/11/29 16:41:05 mleisher Exp $
+#
+
+                           MUTT UCData Package 2.4
+                           -----------------------
+
+This is a package that supports ctype-like operations for Unicode UCS-2 text
+(and surrogates), case mapping, decomposition lookup, and provides a
+bidirectional reordering algorithm.  To use it, you will need to get the
+latest "UnicodeData-*.txt" (or later) file from the Unicode Web or FTP site.
+
+The character information portion of the package consists of three parts:
+
+  1. A program called "ucgendat" which generates five data files from the
+     UnicodeData-*.txt file.  The files are:
+
+     A. case.dat   - the case mappings.
+     B. ctype.dat  - the character property tables.
+     C. decomp.dat - the character decompositions.
+     D. cmbcl.dat  - the non-zero combining classes.
+     E. num.dat    - the codes representing numbers.
+
+  2. The "ucdata.[ch]" files which implement the functions needed to
+     check to see if a character matches groups of properties, to map between
+     upper, lower, and title case, to look up the decomposition of a
+     character, look up the combining class of a character, and get the number
+     value of a character.
+
+  3. The UCData.java class which provides the same API (with minor changes for
+     the numbers) and loads the same binary data files as the C code.
+
+A short reference to the functions available is in the "api.txt" file.
+
+Techie Details
+==============
+
+The "ucgendat" program parses files from the command line which are all in the
+Unicode Character Database (UCDB) format.  An additional properties file,
+"MUTTUCData.txt", provides some extra properties for some characters.
+
+The program looks for the two character properties fields (2 and 4), the
+combining class field (3), the decomposition field (5), the numeric value
+field (8), and the case mapping fields (12, 13, and 14).  The decompositions
+are recursively expanded before being written out.
+
+The decomposition table contains all the canonical decompositions.  This means
+all decompositions that do not have tags such as "<compat>" or "<font>".
+
+The data is almost all stored as unsigned longs (32-bits assumed) and the
+routines that load the data take care of endian swaps when necessary.  This
+also means that surrogates (>= 0x10000) can be placed in the data files the
+"ucgendat" program parses.
+
+The data is written as external files and broken into five parts so it can be
+selectively updated at runtime if necessary.
+
+The data files currently generated from the "ucgendat" program total about 56K
+in size all together.
+
+The format of the binary data files is documented in the "format.txt" file.
+
+==========================================================================
+
+                       The "Pretty Good Bidi Algorithm"
+                       --------------------------------
+
+This routine provides an alternative to the Unicode Bidi algorithm.  The
+difference is that this version of the PGBA does not handle the explicit
+directional codes (LRE, RLE, LRO, RLO, PDF).  It should now produce the same
+results as the Unicode BiDi algorithm for implicit reordering.  Included are
+functions for doing cursor motion in both logical and visual order.
+
+This implementation is provided to demonstrate an effective alternate method
+for implicit reordering.  To make this useful for an application, it probably
+needs some changes to the memory allocation and deallocation, as well as data
+structure additions for rendering.
+
+Mark Leisher <mleisher@crl.nmsu.edu>
+19 November 1999
+
+-----------------------------------------------------------------------------
+
+CHANGES
+=======
+
+Version 2.4
+-----------
+1. Improved some bidi algorithm documentation in the code.
+
+2. Fixed a code mixup that produced a non-working version.
+
+Version 2.3
+-----------
+1. Fixed a misspelling in the ucpgba.h header file.
+
+2. Fixed a bug which caused trailing weak non-digit sequences to be left out of
+   the reordered string in the bidi algorithm.
+
+3. Fixed a problem with weak sequences containing non-spacing marks in the
+   bidi algorithm.
+
+4. Fixed a problem with text runs of the opposite direction of the string
+   surrounding a weak + neutral text run appearing in the wrong order in the
+   bidi algorithm.
+
+5. Added a default overall direction parameter to the reordering function for
+   cases of strings with no strong directional characters in the bidi
+   algorithm.
+
+6. The bidi API documentation was improved.
+
+7. Added a man page for the bidi API.
+
+Version 2.2
+-----------
+1. Fixed a problem with the bidi algorithm locating directional section
+   boundaries.
+
+2. Fixed a problem with the bidi algorithm starting the reordering correctly.
+
+3. Fixed a problem with the bidi algorithm determining end boundaries for LTR
+   segments.
+
+4. Fixed a problem with the bidi algorithm reordering weak (digits and number
+   separators) segments.
+
+5. Added automatic switching of symmetrically paired characters when
+   reversing RTL segments.
+
+6. Added a missing symmetric character to the extra character properties in
+   MUTTUCData.txt.
+
+7. Added support for doing logical and visual cursor traversal.
+
+Version 2.1
+-----------
+1. Updated the ucgendat program to handle the Unicode 3.0 character database
+   properties.  The AL and BM bidi properties gets marked as strong RTL and
+   Other Neutral, the NSM, LRE, RLE, PDF, LRO, and RLO controls all get marked
+   as Other Neutral.
+
+2. Fixed some problems with testing against signed values in the UCData.java
+   code and some minor cleanup.
+
+3. Added the "Pretty Good Bidi Algorithm."
+
+Version 2.0
+-----------
+1. Removed the old Java stuff for a new class that loads directly from the
+   same data files as the C code does.
+
+2. Fixed a problem with choosing the correct field when mapping case.
+
+3. Adjust some search routines to start their search in the correct position.
+
+4. Moved the copyright year to 1999.
+
+Version 1.9
+-----------
+1. Fixed a problem with an incorrect amount of storage being allocated for the
+   combining class nodes.
+
+2. Fixed an invalid initialization in the number code.
+
+3. Changed the Java template file formatting a bit.
+
+4. Added tables and function for getting decompositions in the Java class.
+
+Version 1.8
+-----------
+1. Fixed a problem with adding certain ranges.
+
+2. Added two more macros for testing for identifiers.
+
+3. Tested with the UnicodeData-2.1.5.txt file.
+
+Version 1.7
+-----------
+1. Fixed a problem with looking up decompositions in "ucgendat."
+
+Version 1.6
+-----------
+1. Added two new properties introduced with UnicodeData-2.1.4.txt.
+
+2. Changed the "ucgendat.c" program a little to automatically align the
+   property data on a 4-byte boundary when new properties are added.
+
+3. Changed the "ucgendat.c" programs to only generate canonical
+   decompositions.
+
+4. Added two new macros ucisinitialpunct() and ucisfinalpunct() to check for
+   initial and final punctuation characters.
+
+5. Minor additions and changes to the documentation.
+
+Version 1.5
+-----------
+1. Changed all file open calls to include binary mode with "b" for DOS/WIN
+   platforms.
+
+2. Wrapped the unistd.h include so it won't be included when compiled under
+   Win32.
+
+3. Fixed a bad range check for hex digits in ucgendat.c.
+
+4. Fixed a bad endian swap for combining classes.
+
+5. Added code to make a number table and associated lookup functions.
+   Functions added are ucnumber(), ucdigit(), and ucgetnumber().  The last
+   function is to maintain compatibility with John Cowan's "uctype" package.
+
+Version 1.4
+-----------
+1. Fixed a bug with adding a range.
+
+2. Fixed a bug with inserting a range in order.
+
+3. Fixed incorrectly specified ucisdefined() and ucisundefined() macros.
+
+4. Added the missing unload for the combining class data.
+
+5. Fixed a bad macro placement in ucisweak().
+
+Version 1.3
+-----------
+1. Bug with case mapping calculations fixed.
+
+2. Bug with empty character property entries fixed.
+
+3. Bug with incorrect type in the combining class lookup fixed.
+
+4. Some corrections done to api.txt.
+
+5. Bug in certain character property lookups fixed.
+
+6. Added a character property table that records the defined characters.
+
+7. Replaced ucisunknown() with ucisdefined() and ucisundefined().
+
+Version 1.2
+-----------
+1. Added code to ucgendat to generate a combining class table.
+
+2. Fixed an endian problem with the byte count of decompositions.
+
+3. Fixed some minor problems in the "format.txt" file.
+
+4. Removed some bogus "Ss" values from MUTTUCData.txt file.
+
+5. Added API function to get combining class.
+
+6. Changed the open mode to "rb" so binary data files will be opened correctly
+   on DOS/WIN as well as other platforms.
+
+7. Added the "api.txt" file.
+
+Version 1.1
+-----------
+1. Added ucisxdigit() which I overlooked.
+
+2. Added UC_LT to the ucisalpha() macro which I overlooked.
+
+3. Change uciscntrl() to include UC_CF.
+
+4. Added ucisocntrl() and ucfntcntrl() macros.
+
+5. Added a ucisblank() which I overlooked.
+
+6. Added missing properties to ucissymbol() and ucisnumber().
+
+7. Added ucisgraph() and ucisprint().
+
+8. Changed the "Mr" property to "Sy" to mark this subset of mirroring
+   characters as symmetric to avoid trampling the Unicode/ISO10646 sense of
+   mirroring.
+
+9. Added another property called "Ss" which includes control characters
+   traditionally seen as spaces in the isspace() macro.
+
+10. Added a bunch of macros to be API compatible with John Cowan's package.
+
+ACKNOWLEDGEMENTS
+================
+
+Thanks go to John Cowan <cowan@locke.ccil.org> for pointing out lots of
+missing things and giving me stuff, particularly a bunch of new macros.
+
+Thanks go to Bob Verbrugge <bob_verbrugge@nl.compuware.com> for pointing out
+various bugs.
+
+Thanks go to Christophe Pierret <cpierret@businessobjects.com> for pointing
+out that file modes need to have "b" for DOS/WIN machines, pointing out
+unistd.h is not a Win 32 header, and pointing out a problem with ucisalnum().
+
+Thanks go to Kent Johnson <kent@pondview.mv.com> for finding a bug that caused
+incomplete decompositions to be generated by the "ucgendat" program.
+
+Thanks go to Valeriy E. Ushakov <uwe@ptc.spbu.ru> for spotting an allocation
+error and an initialization error.
--- a/libraries/liblunicode/ucdata/UCData.java
+++ b/libraries/liblunicode/ucdata/UCData.java
@ -0,0 +1,935 @@
+/*
+ * $Id: UCData.java,v 1.2 1999/10/07 20:49:56 mleisher Exp $
+ *
+ * Copyright 1999 Computing Research Labs, New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+import java.io.*;
+import java.net.*;
+
+public class UCData {
+    private static byte[] buffer;
+    private static boolean endian;
+    private static int bytes, buffpos;
+
+    //
+    // Do the static initialization.
+    //
+    static {
+        buffer = new byte[24576];
+    }
+
+    private static boolean load_file(InputStream in) {
+        buffpos = 0;
+        try {
+            bytes = in.read(buffer);
+        } catch (IOException e) {
+            return false;
+        }
+        endian = (buffer[0] == -2 && buffer[1] == -2);
+        buffpos = 2;
+        return (bytes > 0);
+    }
+
+    private static int getInt() {
+        int b1, b2, b3, b4;
+
+        if (!endian) {
+            b1 = buffer[buffpos++];
+            b2 = buffer[buffpos++];
+            b3 = buffer[buffpos++];
+            b4 = buffer[buffpos++];
+        } else {
+            b4 = buffer[buffpos++];
+            b3 = buffer[buffpos++];
+            b2 = buffer[buffpos++];
+            b1 = buffer[buffpos++];
+        }
+        if (b1 < 0)
+          b1 += 256;
+        if (b2 < 0)
+          b2 += 256;
+        if (b3 < 0)
+          b3 += 256;
+        if (b4 < 0)
+          b4 += 256;
+        return ((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
+    }
+
+    private static int getInt(int from) {
+        buffpos = from;
+        return getInt();
+    }
+
+    private static short getShort() {
+        int b1, b2;
+
+        if (!endian) {
+            b1 = buffer[buffpos++];
+            b2 = buffer[buffpos++];
+        } else {
+            b2 = buffer[buffpos++];
+            b1 = buffer[buffpos++];
+        }
+        if (b1 < 0)
+          b1 += 256;
+        if (b2 < 0)
+          b2 += 256;
+
+        return (short) ((b1 << 8) | b2);
+    }
+
+    private static short getShort(int from) {
+        buffpos = from;
+        return getShort();
+    }
+
+    /**********************************************************************
+     *
+     * Character type info section.
+     *
+     **********************************************************************/
+
+    private static int masks32[] = {
+        0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
+        0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
+        0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
+        0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
+        0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+        0x40000000, 0x80000000
+    };
+
+    //
+    // The arrays with the character property info.
+    //
+    private static short[] _ucprop_offsets = null;
+    private static int[] _ucprop_ranges = null;
+
+    public static final int UC_MN = 0x00000001;
+    public static final int UC_MC = 0x00000002;
+    public static final int UC_ME = 0x00000004;
+    public static final int UC_ND = 0x00000008;
+    public static final int UC_NL = 0x00000010;
+    public static final int UC_NO = 0x00000020;
+    public static final int UC_ZS = 0x00000040;
+    public static final int UC_ZL = 0x00000080;
+    public static final int UC_ZP = 0x00000100;
+    public static final int UC_CC = 0x00000200;
+    public static final int UC_CF = 0x00000400;
+    public static final int UC_OS = 0x00000800;
+    public static final int UC_CO = 0x00001000;
+    public static final int UC_CN = 0x00002000;
+    public static final int UC_LU = 0x00004000;
+    public static final int UC_LL = 0x00008000;
+    public static final int UC_LT = 0x00010000;
+    public static final int UC_LM = 0x00020000;
+    public static final int UC_LO = 0x00040000;
+    public static final int UC_PC = 0x00080000;
+    public static final int UC_PD = 0x00100000;
+    public static final int UC_PS = 0x00200000;
+    public static final int UC_PE = 0x00400000;
+    public static final int UC_PO = 0x00800000;
+    public static final int UC_SM = 0x01000000;
+    public static final int UC_SC = 0x02000000;
+    public static final int UC_SK = 0x04000000;
+    public static final int UC_SO = 0x08000000;
+    public static final int UC_L = 0x10000000;
+    public static final int UC_R = 0x20000000;
+    public static final int UC_EN = 0x40000000;
+    public static final int UC_ES = 0x80000000;
+    public static final int UC_ET = 0x00000001;
+    public static final int UC_AN = 0x00000002;
+    public static final int UC_CS = 0x00000004;
+    public static final int UC_B = 0x00000008;
+    public static final int UC_S = 0x00000010;
+    public static final int UC_WS = 0x00000020;
+    public static final int UC_ON = 0x00000040;
+    public static final int UC_CM = 0x00000080;
+    public static final int UC_NB = 0x00000100;
+    public static final int UC_SY = 0x00000200;
+    public static final int UC_HD = 0x00000400;
+    public static final int UC_QM = 0x00000800;
+    public static final int UC_MR = 0x00001000;
+    public static final int UC_SS = 0x00002000;
+    public static final int UC_CP = 0x00004000;
+    public static final int UC_PI = 0x00008000;
+    public static final int UC_PF = 0x00010000;
+
+    private static boolean _ucprop_load(URL where) {
+        int i, hsize, size = 0;
+        boolean res;
+        InputStream in = null;
+
+        //
+        // If the offsets array is not null, then this file has been loaded.
+        //
+        if (_ucprop_offsets != null)
+          return true;
+
+        try {
+            in = where.openStream();
+        } catch (IOException e1) {
+            return false;
+        }
+
+        res = load_file(in);
+
+        try {
+            in.close();
+        } catch (IOException e) {}
+
+        if (res == false)
+          return res;
+            
+        hsize = getShort();
+
+        if (((size = (hsize + 1) << 1) & 3) != 0)
+          size += 4 - (size & 3);
+
+        _ucprop_offsets = new short[hsize + 1];
+
+        //
+        // Skip the byte count which won't be needed.
+        //
+        buffpos += 4;
+
+        //
+        // Adjust the byte count used to position at the beginning of the
+        // ranges to include the 4 bytes at the beginning and the byte count
+        // which is unused.
+        //
+        size += 8;
+
+        for (i = 0; i <= hsize; i++)
+          _ucprop_offsets[i] = getShort();
+
+        //
+        // Now allocate the ranges.
+        //
+        _ucprop_ranges = new int[_ucprop_offsets[hsize]];
+        for (i = 0, buffpos = size; i < _ucprop_offsets[hsize]; i++)
+          _ucprop_ranges[i] = getInt();
+
+        return true;
+    }
+
+    private static void _ucprop_unload() {
+        _ucprop_offsets = null;
+        _ucprop_ranges = null;
+    }
+
+    private static boolean uclookup(int code, int n) {
+        int l, r, m;
+
+        if ((l = _ucprop_offsets[n]) == -1)
+          return false;
+
+        for (m = 1; n + m < _ucprop_offsets.length &&
+                 _ucprop_offsets[n + m] == -1; m++) ;
+
+        r = _ucprop_offsets[n + m] - 1;
+        while (l <= r) {
+            m = (l + r) >> 1;
+            m -= (m & 1);
+            if (code > _ucprop_ranges[m + 1])
+              l = m + 2;
+            else if (code < _ucprop_ranges[m])
+              r = m - 2;
+            else if (_ucprop_ranges[m] <= code && code <= _ucprop_ranges[m+1])
+              return true;
+        }
+        return false;
+    }
+
+    public static boolean ucisprop(int code, int mask1, int mask2) {
+        int i;
+
+        if (mask1 == 0 && mask2 == 0)
+          return false;
+
+        if (mask1 != 0) {
+            for (i = 0; i < 32; i++) {
+                if ((mask1 & masks32[i]) != 0 && uclookup(code, i))
+                  return true;
+            }
+        }
+
+        if (mask2 != 0) {
+            for (i = 32; i < _ucprop_offsets.length; i++) {
+                if ((mask2 & masks32[i & 31]) != 0 && uclookup(code, i))
+                  return true;
+            }
+        }
+        return false;
+    }
+
+    public static boolean ucisalpha(int code) {
+        return ucisprop(code, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT, 0);
+    }
+    public static boolean ucisdigit(int code) {
+        return ucisprop(code, UC_ND, 0);
+    }
+    public static boolean ucisalnum(int code) {
+        return ucisprop(code, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT|UC_ND, 0);
+    }
+    public static boolean uciscntrl(int code) {
+        return ucisprop(code, UC_CC|UC_CF, 0);
+    }
+    public static boolean ucisspace(int code) {
+        return ucisprop(code, UC_ZS|UC_SS, 0);
+    }
+    public static boolean ucisblank(int code) {
+        return ucisprop(code, UC_ZS, 0);
+    }
+    public static boolean ucispunct(int code) {
+        return ucisprop(code, UC_PD|UC_PS|UC_PE|UC_PO, UC_PI|UC_PF);
+    }
+    public static boolean ucisgraph(int code) {
+        return ucisprop(code, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|
+                             UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|
+                             UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|
+                             UC_SO, UC_PI|UC_PF);
+    }
+    public static boolean ucisprint(int code) {
+        return ucisprop(code, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|
+                             UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|
+                             UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|
+                             UC_SO|UC_ZS, UC_PI|UC_PF);
+    }
+    public static boolean ucisupper(int code) {
+        return ucisprop(code, UC_LU, 0);
+    }
+    public static boolean ucislower(int code) {
+        return ucisprop(code, UC_LL, 0);
+    }
+    public static boolean ucistitle(int code) {
+        return ucisprop(code, UC_LT, 0);
+    }
+    public static boolean ucisxdigit(int code) {
+        return ucisprop(code, 0, UC_HD);
+    }
+    public static boolean ucisisocntrl(int code) {
+        return ucisprop(code, UC_CC, 0);
+    }
+    public static boolean ucisfmtcntrl(int code) {
+        return ucisprop(code, UC_CF, 0);
+    }
+    public static boolean ucissymbol(int code) {
+        return ucisprop(code, UC_SM|UC_SC|UC_SO|UC_SK, 0);
+    }
+    public static boolean ucisnumber(int code) {
+        return ucisprop(code, UC_ND|UC_NO|UC_NL, 0);
+    }
+    public static boolean ucisnonspacing(int code) {
+        return ucisprop(code, UC_MN, 0);
+    }
+    public static boolean ucisopenpunct(int code) {
+        return ucisprop(code, UC_PS, 0);
+    }
+    public static boolean ucisclosepunct(int code) {
+        return ucisprop(code, UC_PE, 0);
+    }
+    public static boolean ucisinitialpunct(int code) {
+        return ucisprop(code, 0, UC_PI);
+    }
+    public static boolean ucisfinalpunct(int code) {
+        return ucisprop(code, 0, UC_PF);
+    }
+    public static boolean uciscomposite(int code) {
+        return ucisprop(code, 0, UC_CM);
+    }
+    public static boolean ucishex(int code) {
+        return ucisprop(code, 0, UC_HD);
+    }
+    public static boolean ucisquote(int code) {
+        return ucisprop(code, 0, UC_QM);
+    }
+    public static boolean ucissymmetric(int code) {
+        return ucisprop(code, 0, UC_SY);
+    }
+    public static boolean ucismirroring(int code) {
+        return ucisprop(code, 0, UC_MR);
+    }
+    public static boolean ucisnonbreaking(int code) {
+        return ucisprop(code, 0, UC_NB);
+    }
+    public static boolean ucisrtl(int code) {
+        return ucisprop(code, UC_R, 0);
+    }
+    public static boolean ucisltr(int code) {
+        return ucisprop(code, UC_L, 0);
+    }
+    public static boolean ucisstrong(int code) {
+        return ucisprop(code, UC_L|UC_R, 0);
+    }
+    public static boolean ucisweak(int code) {
+        return ucisprop(code, UC_EN|UC_ES, UC_ET|UC_AN|UC_CS);
+    }
+    public static boolean ucisneutral(int code) {
+        return ucisprop(code, 0, UC_B|UC_S|UC_WS|UC_ON);
+    }
+    public static boolean ucisseparator(int code) {
+        return ucisprop(code, 0, UC_B|UC_S);
+    }
+    public static boolean ucismark(int code) {
+        return ucisprop(code, UC_MN|UC_MC|UC_ME, 0);
+    }
+    public static boolean ucismodif(int code) {
+        return ucisprop(code, UC_LM, 0);
+    }
+    public static boolean ucisletnum(int code) {
+        return ucisprop(code, UC_NL, 0);
+    }
+    public static boolean ucisconnect(int code) {
+        return ucisprop(code, UC_PC, 0);
+    }
+    public static boolean ucisdash(int code) {
+        return ucisprop(code, UC_PD, 0);
+    }
+    public static boolean ucismath(int code) {
+        return ucisprop(code, UC_SM, 0);
+    }
+    public static boolean uciscurrency(int code) {
+        return ucisprop(code, UC_SC, 0);
+    }
+    public static boolean ucismodifsymbol(int code) {
+        return ucisprop(code, UC_SK, 0);
+    }
+    public static boolean ucisnsmark(int code) {
+        return ucisprop(code, UC_MN, 0);
+    }
+    public static boolean ucisspmark(int code) {
+        return ucisprop(code, UC_MC, 0);
+    }
+    public static boolean ucisenclosing(int code) {
+        return ucisprop(code, UC_ME, 0);
+    }
+    public static boolean ucisprivate(int code) {
+        return ucisprop(code, UC_CO, 0);
+    }
+    public static boolean ucissurrogate(int code) {
+        return ucisprop(code, UC_OS, 0);
+    }
+    public static boolean ucislsep(int code) {
+        return ucisprop(code, UC_ZL, 0);
+    }
+    public static boolean ucispsep(int code) {
+        return ucisprop(code, UC_ZP, 0);
+    }
+    public static boolean ucisidentstart(int code) {
+        return ucisprop(code, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL, 0);
+    }
+    public static boolean ucisidentpart(int code) {
+        return ucisprop(code, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL|
+                             UC_MN|UC_MC|UC_ND|UC_PC|UC_CF, 0);
+    }
+    public static boolean ucisdefined(int code) {
+        return ucisprop(code, 0, UC_CP);
+    }
+    public static boolean ucisundefined(int code) {
+        return (ucisprop(code, 0, UC_CP) == true) ? false : true;
+    }
+    public static boolean ucishan(int code) {
+        return ((0x4e00 <= code && code <= 0x9fff) ||
+                (0xf900 <= code && code <= 0xfaff)) ? true : false;
+    }
+    public static boolean ucishangul(int code) {
+        return (0xac00 <= code && code <= 0xd7ff) ? true : false;
+    }
+
+    /**********************************************************************
+     *
+     * Case mapping section.
+     *
+     **********************************************************************/
+
+    private static int[] _uccase_len = {0, 0};
+    private static int[] _uccase_map = null;
+
+    private static boolean _uccase_load(URL where) {
+        int i, n;
+        boolean res;
+        InputStream in = null;
+
+        //
+        // If this array exists, then the file has already been loaded.
+        //
+        if (_uccase_map != null)
+          return true;
+
+        try {
+            in = where.openStream();
+        } catch (IOException e1) {
+            return false;
+        }
+
+        res = load_file(in);
+
+        try {
+            in.close();
+        } catch (IOException e) {}
+
+        if (res == false)
+          return res;
+
+        n = getShort(2) * 3;
+        _uccase_len[0] = getShort() * 3;
+        _uccase_len[1] = getShort() * 3;
+
+        _uccase_map = new int[n];
+        for (i = 0; i < n; i++)
+          _uccase_map[i] = getInt();
+
+        return true;
+    }
+
+    private static void _uccase_unload() {
+        _uccase_len[0] = _uccase_len[1] = 0;
+        _uccase_map = null;
+    }
+
+    private static int _uccase_lookup(int code, int l, int r, int field) {
+        int m;
+
+        while (l <= r) {
+            m = (l + r) >> 1;
+            m -= (m % 3);
+            if (code > _uccase_map[m])
+              l = m + 3;
+            else if (code < _uccase_map[m])
+              r = m - 3;
+            else
+              return _uccase_map[m + field];
+        }
+        return -1;
+    }
+
+    public static int uctoupper(int code) {
+        int l, r, field;
+
+        if (ucisupper(code))
+          return code;
+
+        if (ucislower(code)) {
+            //
+            // Lower case.
+            //
+            field = 2;
+            l = _uccase_len[0];
+            r = (l + _uccase_len[1]) - 3;
+        } else {
+            //
+            // Title case.
+            //
+            field = 1;
+            l = _uccase_len[0] + _uccase_len[1];
+            r = _uccase_map.length - 3;
+        }
+        return _uccase_lookup(code, l, r, field);
+    }
+
+    public static int uctolower(int code) {
+        int l, r, field;
+
+        if (ucislower(code))
+          return code;
+
+        if (ucisupper(code)) {
+            //
+            // Upper case.
+            //
+            field = 1;
+            l = 0;
+            r = _uccase_len[0] - 3;
+        } else {
+            //
+            // Title case.
+            //
+            field = 2;
+            l = _uccase_len[0] + _uccase_len[1];
+            r = _uccase_map.length - 1;
+        }
+        return _uccase_lookup(code, l, r, field);
+    }
+
+    public static int uctotitle(int code) {
+        int l, r, field;
+
+        if (ucistitle(code))
+          return code;
+
+        field = 2;
+        if (ucisupper(code)) {
+            //
+            // Upper case.
+            //
+            l = 0;
+            r = _uccase_len[0] - 3;
+        } else {
+            //
+            // Lower case.
+            //
+            l = _uccase_len[0];
+            r = (l + _uccase_len[1]) - 3;
+        }
+        return _uccase_lookup(code, l, r, field);
+    }
+
+    /**********************************************************************
+     *
+     * Character decomposition section.
+     *
+     **********************************************************************/
+
+    static int _ucdcmp_node_count = 0;
+    static int[] _ucdcmp_data = null;
+
+    private static boolean _ucdcmp_load(URL where) {
+        int i, bcnt;
+        boolean res;
+        InputStream in = null;
+
+        //
+        // If this array is not null, then the file has already been loaded.
+        //
+        if (_ucdcmp_data != null)
+          return true;
+
+        try {
+            in = where.openStream();
+        } catch (IOException e1) {
+            return false;
+        }
+
+        res = load_file(in);
+
+        try {
+            in.close();
+        } catch (IOException e) {}
+
+        if (res == false)
+          return res;
+
+        //
+        // This specifies how many of the _ucdmp_data elements are nodes which
+        // leaves the remaining number to be decompositions.
+        //
+        _ucdcmp_node_count = getShort() << 1;
+
+        bcnt = getInt() >> 2;
+
+        _ucdcmp_data = new int[bcnt];
+
+        for (i = 0; i < bcnt; i++)
+          _ucdcmp_data[i] = getInt();
+
+        return res;
+    }
+
+    private static void _ucdcmp_unload() {
+        _ucdcmp_node_count = 0;
+        _ucdcmp_data = null;
+    }
+
+    public static int[] ucdecomp(int code) {
+        int l, r, m, out[];
+
+        l = 0;
+        r = _ucdcmp_data[_ucdcmp_node_count] - 1;
+
+        while (l <= r) {
+            //
+            // Determine a "mid" point and adjust to make sure the mid point
+            // is at the beginning of a code+offset pair.
+            //
+            m = (l + r) >> 1;
+            m -= (m & 1);
+            if (code > _ucdcmp_data[m])
+              l = m + 2;
+            else if (code < _ucdcmp_data[m])
+              r = m - 2;
+            else {
+                l = _ucdcmp_data[m + 3] - _ucdcmp_data[m + 1];
+                out = new int[l];
+                for (r = 0; r < l; r++)
+                  out[r] = _ucdcmp_data[_ucdcmp_node_count + 1 +
+                                       _ucdcmp_data[m + 1] + r];
+                return out;
+            }
+        }
+        return null;
+    }
+
+    public static int[] ucdecomp_hangul(int code) {
+        int out[], decomp[] = {0, 0, 0};
+
+        if (!ucishangul(code))
+          return null;
+
+        code -= 0xac00;
+        decomp[0] = 0x1100 + (code / 588);
+        decomp[1] = 0x1161 + ((code % 588) / 28);
+        decomp[2] = 0x11a7 + (code % 28);
+
+        out = new int[(decomp[2] != 0x11a7) ? 3 : 2];
+        out[0] = decomp[0];
+        out[1] = decomp[1];
+        if (decomp[0] != 0x11a7)
+          out[2] = decomp[2];
+        return out;
+    }
+
+    /**********************************************************************
+     *
+     * Combining class section.
+     *
+     **********************************************************************/
+
+    private static int[] _uccmbcl_nodes = null;
+
+    private static boolean _uccmbcl_load(URL where) {
+        int i, n;
+        boolean res;
+        InputStream in = null;
+
+        //
+        // If this array is not null, the file has already been loaded.
+        //
+        if (_uccmbcl_nodes != null)
+          return true;
+
+        try {
+            in = where.openStream();
+        } catch (IOException e1) {
+            return false;
+        }
+
+        res = load_file(in);
+
+        try {
+            in.close();
+        } catch (IOException e) {}
+
+        if (res == false)
+          return res;
+
+        n = getShort() * 3;
+
+        buffpos += 4;
+
+        _uccmbcl_nodes = new int[n];
+        for (i = 0; i < n; i++)
+          _uccmbcl_nodes[i] = getInt();
+
+        return true;
+    }
+
+    private static void _uccmbcl_unload() {
+        _uccmbcl_nodes = null;
+    }
+
+    public static int uccombining_class(int code) {
+        int l, r, m;
+
+        l = 0;
+        r = _uccmbcl_nodes.length - 3;
+
+        while (l <= r) {
+            m = (l + r) >> 1;
+            m -= (m % 3);
+            if (code > _uccmbcl_nodes[m + 1])
+              l = m + 3;
+            else if (code < _uccmbcl_nodes[m])
+              r = m - 3;
+            else if (_uccmbcl_nodes[m] <= code &&
+                     code <= _uccmbcl_nodes[m + 1])
+              return _uccmbcl_nodes[m + 2];
+        }
+        return 0;
+    }
+
+    /**********************************************************************
+     *
+     * Number section.
+     *
+     **********************************************************************/
+
+    private static short[] _ucnum_vals;
+    private static int[] _ucnum_nodes;
+
+    private static boolean _ucnumb_load(URL where) {
+        int i, n, b;
+        boolean res;
+        InputStream in = null;
+
+        //
+        // If this array is not null, then the file has already been loaded.
+        //
+        if (_ucnum_nodes != null)
+          return true;
+
+        try {
+            in = where.openStream();
+        } catch (IOException e1) {
+            return false;
+        }
+
+        res = load_file(in);
+
+        try {
+            in.close();
+        } catch (IOException e) {}
+
+        if (res == false)
+          return res;
+
+        n = getShort();
+        b = (getInt() - (n << 2)) >> 1;
+
+        _ucnum_nodes = new int[n];
+        for (i = 0; i < n; i++)
+          _ucnum_nodes[i] = getInt();
+
+        _ucnum_vals = new short[b];
+        for (i = 0; i < b; i++)
+          _ucnum_vals[i] = getShort();
+
+        return true;
+    }
+
+    private static void _ucnumb_unload() {
+        _ucnum_vals = null;
+        _ucnum_nodes = null;
+    }
+
+    public static boolean ucnumber_lookup(int code, int[] result) {
+        int l, r, m;
+
+        result[0] = result[1] = 0;
+
+        l = 0;
+        r = _ucnum_nodes.length - 1;
+        while (l <= r) {
+            m = (l + r) >> 1;
+            m -= (m & 1);
+            if (code > _ucnum_nodes[m])
+              l = m + 2;
+            else if (code < _ucnum_nodes[m])
+              r = m - 2;
+            else {
+                result[0] = _ucnum_vals[_ucnum_nodes[m + 1]];
+                result[1] = _ucnum_vals[_ucnum_nodes[m + 1] + 1];
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public static boolean ucdigit_lookup(int code, int[] result) {
+        int l, r, m;
+
+        result[0] = -1;
+
+        l = 0;
+        r = _ucnum_nodes.length - 1;
+        while (l <= r) {
+            m = (l + r) >> 1;
+            m -= (m & 1);
+            if (code > _ucnum_nodes[m])
+              l = m + 2;
+            else if (code < _ucnum_nodes[m])
+              r = m - 2;
+            else {
+                short d1 = _ucnum_vals[_ucnum_nodes[m + 1]];
+                short d2 = _ucnum_vals[_ucnum_nodes[m + 1] + 1];
+                if (d1 == d2) {
+                    result[0] = d1;
+                    return true;
+                }
+                return false;
+            }
+        }
+        return false;
+    }
+
+    /**********************************************************************
+     *
+     * File loading and unloading routines.
+     *
+     **********************************************************************/
+
+    //
+    // Masks that combine to load and unload files using a base URL.
+    //
+    public final static int UCDATA_CASE   = 0x01;
+    public final static int UCDATA_CTYPE  = 0x02;
+    public final static int UCDATA_DECOMP = 0x04;
+    public final static int UCDATA_CMBCL  = 0x08;
+    public final static int UCDATA_NUM    = 0x10;
+    public final static int UCDATA_ALL    = 0x1f;
+
+    public static void ucdata_load(URL base, int masks) {
+        //
+        // Make sure the base has the trailing slash.
+        //
+        String url = base.toString();
+        if (url.lastIndexOf('/') != url.length() - 1)
+          url += "/";
+
+        if ((masks & UCDATA_CTYPE) != 0) {
+            try {
+                _ucprop_load(new URL(url + "ctype.dat"));
+            } catch (MalformedURLException mue) {}
+        }
+        if ((masks & UCDATA_CASE) != 0) {
+            try {
+                _uccase_load(new URL(url + "case.dat"));
+            } catch (MalformedURLException mue) {}
+        }
+        if ((masks & UCDATA_DECOMP) != 0) {
+            try {
+                _ucdcmp_load(new URL(url + "decomp.dat"));
+            } catch (MalformedURLException mue) {}
+        }
+        if ((masks & UCDATA_CMBCL) != 0) {
+            try {
+                _uccmbcl_load(new URL(url + "cmbcl.dat"));
+            } catch (MalformedURLException mue) {}
+        }
+        if ((masks & UCDATA_NUM) != 0) {
+            try {
+                _ucnumb_load(new URL(url + "num.dat"));
+            } catch (MalformedURLException mue) {}
+        }
+    }
+
+    public static void ucdata_unload(int masks) {
+        if ((masks & UCDATA_CTYPE) != 0)
+          _ucprop_unload();
+        if ((masks & UCDATA_CASE) != 0)
+          _uccase_unload();
+        if ((masks & UCDATA_DECOMP) != 0)
+          _ucdcmp_unload();
+        if ((masks & UCDATA_CMBCL) != 0)
+          _uccmbcl_unload();
+        if ((masks & UCDATA_NUM) != 0)
+          _ucnumb_unload();
+    }
+}
--- a/libraries/liblunicode/ucdata/UCDataTest.java
+++ b/libraries/liblunicode/ucdata/UCDataTest.java
@ -0,0 +1,94 @@
+/*
+ * $Id: UCDataTest.java,v 1.1 1999/08/23 16:14:08 mleisher Exp $
+ *
+ * Copyright 1999 Computing Research Labs, New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+import java.io.*;
+import java.net.*;
+import UCData.*;
+
+public class UCDataTest {
+    /**********************************************************************
+     *
+     * Main.
+     *
+     **********************************************************************/
+
+    public static void main(String[] args) {
+        URL url = null;
+
+        try {
+            url = new URL("file:/home/mleisher/unicode/textutils/ucdata");
+        } catch (MalformedURLException mue) {}
+
+        UCData.ucdata_load(url, UCData.UCDATA_ALL);
+
+        if (UCData.ucisalpha(0x1d5))
+          System.out.println("0x1d5 is alpha");
+        else
+          System.out.println("0x1d5 is not alpha");
+
+        int c;
+
+        c = UCData.uctolower(0x1f1);
+        System.out.println("0x1f1 lower is 0x"+Integer.toHexString(c));
+        c = UCData.uctotitle(0x1f1);
+        System.out.println("0x1f1 title is 0x"+Integer.toHexString(c));
+
+        c = UCData.uctolower(0xff3a);
+        System.out.println("0xff3a lower is 0x"+Integer.toHexString(c));
+        c = UCData.uctotitle(0xff3a);
+        System.out.println("0xff3a title is 0x"+Integer.toHexString(c));
+
+        int[] decomp = UCData.ucdecomp(0x1d5);
+        if (decomp != null) {
+            System.out.print("0x1d5 decomposition :");
+            for (int i = 0; i < decomp.length; i++)
+              System.out.print("0x"+Integer.toHexString(decomp[i])+" ");
+            System.out.println("");
+        }
+
+        int ccl = UCData.uccombining_class(0x41);
+        System.out.println("0x41 combining class " + ccl);
+        ccl = UCData.uccombining_class(0xfe23);
+        System.out.println("0xfe23 combining class " + ccl);
+
+        int num[] = {0,0};
+        if (UCData.ucnumber_lookup(0x30, num)) {
+            if (num[0] != num[1])
+              System.out.println("0x30 is fraction "+num[0]+"/"+num[1]);
+            else
+              System.out.println("0x30 is digit "+num[0]);
+        }
+
+        if (UCData.ucnumber_lookup(0xbc, num)) {
+            if (num[0] != num[1])
+              System.out.println("0xbc is fraction "+num[0]+"/"+num[1]);
+            else
+              System.out.println("0xbc is digit "+num[0]);
+        }
+
+        if (UCData.ucdigit_lookup(0x6f9, num))
+          System.out.println("0x6f9 is digit " + num[0]);
+        else
+          System.out.println("0x6f9 is not a digit");
+    }
+}
--- a/libraries/liblunicode/ucdata/api.txt
+++ b/libraries/liblunicode/ucdata/api.txt
@ -0,0 +1,343 @@
+#
+# $Id: api.txt,v 1.2 1999/11/19 15:24:29 mleisher Exp $
+#
+
+                             The MUTT UCData API
+                             -------------------
+
+
+
+
+-----------------------------------------------------------------------------
+
+Macros that combine to select data tables for ucdata_load(), ucdata_unload(),
+and ucdata_reload().
+
+#define UCDATA_CASE   0x01
+#define UCDATA_CTYPE  0x02
+#define UCDATA_DECOMP 0x04
+#define UCDATA_CMBCL  0x08
+#define UCDATA_NUM    0x10
+#define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
+                   UCDATA_CMBCL|UCDATA_NUM)
+-----------------------------------------------------------------------------
+
+void ucdata_load(char *paths, int masks)
+
+  This function initializes the UCData library by locating the data files in
+  one of the colon-separated directories in the `paths' parameter.  The data
+  files to be loaded are specified in the `masks' parameter as a bitwise
+  combination of the macros listed above.
+
+  This should be called before using any of the other functions.
+
+  NOTE: the ucdata_setup(char *paths) function is now a macro that expands
+        into this function at compile time.
+
+-----------------------------------------------------------------------------
+
+void ucdata_unload(int masks)
+
+  This function unloads the data tables specified in the `masks' parameter.
+
+  This function should be called when the application is done using the UCData
+  package.
+
+  NOTE: the ucdata_cleanup() function is now a macro that expands into this
+        function at compile time.
+
+-----------------------------------------------------------------------------
+
+void ucdata_reload(char *paths, int masks)
+
+  This function reloads the data files from one of the colon-separated
+  directories in the `paths' parameter.  The data files to be reloaded are
+  specified in the `masks' parameter as a bitwise combination of the macros
+  listed above.
+
+  If the data files have already been loaded, they are unloaded before the
+  data files are loaded again.
+
+-----------------------------------------------------------------------------
+
+int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
+
+  This function determines if a character has a decomposition and returns the
+  decomposition information if it exists.
+
+  If a zero is returned, there is no decomposition.  If a non-zero is
+  returned, then the `num' and `decomp' variables are filled in with the
+  appropriate values.
+
+  Example call:
+
+    unsigned long i, num, *decomp;
+
+    if (ucdecomp(0x1d5, &num, &decomp) != 0) {
+       for (i = 0; i < num; i++)
+         printf("0x%08lX,", decomp[i]);
+       putchar('\n');
+    }
+
+-----------------------------------------------------------------------------
+
+int ucdecomp_hangul(unsigned long code, unsigned long *num,
+                    unsigned long decomp[])
+
+  This function determines if a Hangul syllable has a decomposition and
+  returns the decomposition information.
+
+  An array of at least size 3 should be passed to the function for the
+  decomposition of the syllable.
+
+  If a zero is returned, the character is not a Hangul syllable.  If a
+  non-zero is returned, the `num' field will be 2 or 3 and the syllable will
+  be decomposed into the `decomp' array arithmetically.
+
+  Example call:
+
+    unsigned long i, num, decomp[3];
+
+    if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
+       for (i = 0; i < num; i++)
+         printf("0x%08lX,", decomp[i]);
+       putchar('\n');
+    }
+
+-----------------------------------------------------------------------------
+
+struct ucnumber {
+  int numerator;
+  int denominator;
+};
+
+int ucnumber_lookup(unsigned long code, struct ucnumber *num)
+
+  This function determines if the code is a number and fills in the `num'
+  field with the numerator and denominator.  If the code happens to be a
+  single digit, the numerator and denominator fields will be the same.
+
+  If the function returns 0, the code is not a number.  Any other return
+  value means the code is a number.
+
+int ucdigit_lookup(unsigned long code, int *digit)
+
+  This function determines if the code is a digit and fills in the `digit'
+  field with the digit value.
+
+  If the function returns 0, the code is not a number.  Any other return
+  value means the code is a number.
+
+struct ucnumber ucgetnumber(unsigned long code)
+
+  This is a compatibility function with John Cowan's "uctype" package.  It
+  uses ucnumber_lookup().
+
+int ucgetdigit(unsigned long code)
+
+  This is a compatibility function with John Cowan's "uctype" package.  It
+  uses ucdigit_lookup().
+
+-----------------------------------------------------------------------------
+
+unsigned long uctoupper(unsigned long code)
+
+  This function returns the code unchanged if it is already upper case or has
+  no upper case equivalent.  Otherwise the upper case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+unsigned long uctolower(unsigned long code)
+
+  This function returns the code unchanged if it is already lower case or has
+  no lower case equivalent.  Otherwise the lower case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+unsigned long uctotitle(unsigned long code)
+
+  This function returns the code unchanged if it is already title case or has
+  no title case equivalent.  Otherwise the title case equivalent is returned.
+
+-----------------------------------------------------------------------------
+
+int ucisalpha(unsigned long code)
+int ucisalnum(unsigned long code)
+int ucisdigit(unsigned long code)
+int uciscntrl(unsigned long code)
+int ucisspace(unsigned long code)
+int ucisblank(unsigned long code)
+int ucispunct(unsigned long code)
+int ucisgraph(unsigned long code)
+int ucisprint(unsigned long code)
+int ucisxdigit(unsigned long code)
+
+int ucisupper(unsigned long code)
+int ucislower(unsigned long code)
+int ucistitle(unsigned long code)
+
+  These functions (actually macros) determine if a character has these
+  properties.  These behave in a fashion very similar to the venerable ctype
+  package.
+
+-----------------------------------------------------------------------------
+
+int ucisisocntrl(unsigned long code)
+
+  Is the character a C0 control character (< 32) ?
+
+int ucisfmtcntrl(unsigned long code)
+
+  Is the character a format control character?
+
+int ucissymbol(unsigned long code)
+
+  Is the character a symbol?
+
+int ucisnumber(unsigned long code)
+
+  Is the character a number or digit?
+
+int ucisnonspacing(unsigned long code)
+
+  Is the character non-spacing?
+
+int ucisopenpunct(unsigned long code)
+
+  Is the character an open/left punctuation (i.e. '[')
+
+int ucisclosepunct(unsigned long code)
+
+  Is the character an close/right punctuation (i.e. ']')
+
+int ucisinitialpunct(unsigned long code)
+
+  Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION
+  MARK)
+
+int ucisfinalpunct(unsigned long code)
+
+  Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION
+  MARK)
+
+int uciscomposite(unsigned long code)
+
+  Can the character be decomposed into a set of other characters?
+
+int ucisquote(unsigned long code)
+
+  Is the character one of the many quotation marks?
+
+int ucissymmetric(unsigned long code)
+
+  Is the character one that has an opposite form (i.e. <>)
+
+int ucismirroring(unsigned long code)
+
+  Is the character mirroring (superset of symmetric)?
+
+int ucisnonbreaking(unsigned long code)
+
+  Is the character non-breaking (i.e. non-breaking space)?
+
+int ucisrtl(unsigned long code)
+
+  Does the character have strong right-to-left directionality (i.e. Arabic
+  letters)?
+
+int ucisltr(unsigned long code)
+
+  Does the character have strong left-to-right directionality (i.e. Latin
+  letters)?
+
+int ucisstrong(unsigned long code)
+
+  Does the character have strong directionality?
+
+int ucisweak(unsigned long code)
+
+  Does the character have weak directionality (i.e. numbers)?
+
+int ucisneutral(unsigned long code)
+
+  Does the character have neutral directionality (i.e. whitespace)?
+
+int ucisseparator(unsigned long code)
+
+  Is the character a block or segment separator?
+
+int ucislsep(unsigned long code)
+
+  Is the character a line separator?
+
+int ucispsep(unsigned long code)
+
+  Is the character a paragraph separator?
+
+int ucismark(unsigned long code)
+
+  Is the character a mark of some kind?
+
+int ucisnsmark(unsigned long code)
+
+  Is the character a non-spacing mark?
+
+int ucisspmark(unsigned long code)
+
+  Is the character a spacing mark?
+
+int ucismodif(unsigned long code)
+
+  Is the character a modifier letter?
+
+int ucismodifsymbol(unsigned long code)
+
+  Is the character a modifier symbol?
+
+int ucisletnum(unsigned long code)
+
+  Is the character a number represented by a letter?
+
+int ucisconnect(unsigned long code)
+
+  Is the character connecting punctuation?
+
+int ucisdash(unsigned long code)
+
+  Is the character dash punctuation?
+
+int ucismath(unsigned long code)
+
+  Is the character a math character?
+
+int uciscurrency(unsigned long code)
+
+  Is the character a currency character?
+
+int ucisenclosing(unsigned long code)
+
+  Is the character enclosing (i.e. enclosing box)?
+
+int ucisprivate(unsigned long code)
+
+  Is the character from the Private Use Area?
+
+int ucissurrogate(unsigned long code)
+
+  Is the character one of the surrogate codes?
+
+int ucisdefined(unsigned long code)
+
+  Is the character defined (appeared in one of the data files)?
+
+int ucisundefined(unsigned long code)
+
+  Is the character not defined (non-Unicode)?
+
+int ucishan(unsigned long code)
+
+  Is the character a Han ideograph?
+
+int ucishangul(unsigned long code)
+
+  Is the character a pre-composed Hangul syllable?
--- a/libraries/liblunicode/ucdata/bidiapi.txt
+++ b/libraries/liblunicode/ucdata/bidiapi.txt
@ -0,0 +1,84 @@
+#
+# $Id: bidiapi.txt,v 1.2 1999/11/19 15:24:29 mleisher Exp $
+#
+
+                       "Pretty Good Bidi Algorithm" API
+
+The PGBA (Pretty Good Bidi Algorithm) is an effective alternative to the
+Unicode BiDi algorithm.  It currently provides only implicit reordering and
+does not yet support explicit reordering codes that the Unicode BiDi algorithm
+supports.  In addition to reordering, the PGBA includes cursor movement
+support for both visual and logical navigation.
+
+-----------------------------------------------------------------------------
+
+#define UCPGBA_LTR 0
+#define UCPGBA_RTL 1
+
+  These macros appear in the `direction' field of the data structures.
+
+#define UCPGBA_CURSOR_VISUAL  0
+#define UCPGBA_CURSOR_LOGICAL 1
+
+  These macros are used to set the cursor movement for each reordered string.
+
+-----------------------------------------------------------------------------
+
+ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
+                            unsigned long end, int default_direction,
+                            int cursor_motion)
+
+  This function will create a reordered string by using the implicit
+  directionality of the characters in the specified substring.
+
+  The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
+  and is used only in cases where a string contains no characters with strong
+  directionality.
+
+  The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
+  UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
+  behavior.  This behavior can be switched at any time using
+  ustring_set_cursor_motion().
+
+-----------------------------------------------------------------------------
+
+void ucstring_free(ucstring_t *string)
+
+  This function will deallocate the memory used by the string, incuding the
+  string itself.
+
+-----------------------------------------------------------------------------
+
+void ucstring_cursor_info(ustring_t *string, int *direction,
+                          unsigned long *position)
+
+  This function will return the text position of the internal cursor and the
+  directionality of the text at that position.  The position returned is the
+  original text position of the character.
+
+-----------------------------------------------------------------------------
+
+int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
+
+  This function will change the cursor motion type and return the previous
+  cursor motion type.
+
+-----------------------------------------------------------------------------
+
+int ucstring_cursor_right(ucstring_t *string, int count)
+
+  This function will move the internal cursor to the right according to the
+  type of cursor motion set for the string.
+
+  If no cursor motion is performed, it returns 0.  Otherwise it will return a
+  1.
+
+-----------------------------------------------------------------------------
+
+int ucstring_cursor_left(ucstring_t *string, int count)
+
+  This function will move the internal cursor to the left according to the
+  type of cursor motion set for the string.
+
+  If no cursor motion is performed, it returns 0.  Otherwise it will return a
+  1.
--- a/libraries/liblunicode/ucdata/format.txt
+++ b/libraries/liblunicode/ucdata/format.txt
@ -0,0 +1,243 @@
+#
+# $Id: format.txt,v 1.1 1998/07/24 15:17:21 mleisher Exp $
+#
+
+CHARACTER DATA
+==============
+
+This package generates some data files that contain character properties useful
+for text processing.
+
+CHARACTER PROPERTIES
+====================
+
+The first data file is called "ctype.dat" and contains a compressed form of
+the character properties found in the Unicode Character Database (UCDB).
+Additional properties can be specified in limited UCDB format in another file
+to avoid modifying the original UCDB.
+
+The following is a property name and code table to be used with the character
+data:
+
+NAME CODE DESCRIPTION
+---------------------
+Mn   0    Mark, Non-Spacing
+Mc   1    Mark, Spacing Combining
+Me   2    Mark, Enclosing
+Nd   3    Number, Decimal Digit
+Nl   4    Number, Letter
+No   5    Number, Other
+Zs   6    Separator, Space
+Zl   7    Separator, Line
+Zp   8    Separator, Paragraph
+Cc   9    Other, Control
+Cf   10   Other, Format
+Cs   11   Other, Surrogate
+Co   12   Other, Private Use
+Cn   13   Other, Not Assigned
+Lu   14   Letter, Uppercase
+Ll   15   Letter, Lowercase
+Lt   16   Letter, Titlecase
+Lm   17   Letter, Modifier
+Lo   18   Letter, Other
+Pc   19   Punctuation, Connector
+Pd   20   Punctuation, Dash
+Ps   21   Punctuation, Open
+Pe   22   Punctuation, Close
+Po   23   Punctuation, Other
+Sm   24   Symbol, Math
+Sc   25   Symbol, Currency
+Sk   26   Symbol, Modifier
+So   27   Symbol, Other
+L    28   Left-To-Right
+R    29   Right-To-Left
+EN   30   European Number
+ES   31   European Number Separator
+ET   32   European Number Terminator
+AN   33   Arabic Number
+CS   34   Common Number Separator
+B    35   Block Separator
+S    36   Segment Separator
+WS   37   Whitespace
+ON   38   Other Neutrals
+Pi   47   Punctuation, Initial
+Pf   48   Punctuation, Final
+#
+# Implementation specific properties.
+#
+Cm   39   Composite
+Nb   40   Non-Breaking
+Sy   41   Symmetric (characters which are part of open/close pairs)
+Hd   42   Hex Digit
+Qm   43   Quote Mark
+Mr   44   Mirroring
+Ss   45   Space, Other (controls viewed as spaces in ctype isspace())
+Cp   46   Defined character
+
+The actual binary data is formatted as follows:
+
+  Assumptions: unsigned short is at least 16-bits in size and unsigned long
+               is at least 32-bits in size.
+
+    unsigned short ByteOrderMark
+    unsigned short OffsetArraySize
+    unsigned long  Bytes
+    unsigned short Offsets[OffsetArraySize + 1]
+    unsigned long  Ranges[N], N = value of Offsets[OffsetArraySize]
+
+  The Bytes field provides the total byte count used for the Offsets[] and
+  Ranges[] arrays.  The Offsets[] array is aligned on a 4-byte boundary and
+  there is always one extra node on the end to hold the final index of the
+  Ranges[] array.  The Ranges[] array contains pairs of 4-byte values
+  representing a range of Unicode characters.  The pairs are arranged in
+  increasing order by the first character code in the range.
+
+  Determining if a particular character is in the property list requires a
+  simple binary search to determine if a character is in any of the ranges
+  for the property.
+
+  If the ByteOrderMark is equal to 0xFFFE, then the data was generated on a
+  machine with a different endian order and the values must be byte-swapped.
+
+  To swap a 16-bit value:
+     c = (c >> 8) | ((c & 0xff) << 8)
+
+  To swap a 32-bit value:
+     c = ((c & 0xff) << 24) | (((c >> 8) & 0xff) << 16) |
+         (((c >> 16) & 0xff) << 8) | (c >> 24)
+
+CASE MAPPINGS
+=============
+
+The next data file is called "case.dat" and contains three case mapping tables
+in the following order: upper, lower, and title case.  Each table is in
+increasing order by character code and each mapping contains 3 unsigned longs
+which represent the possible mappings.
+
+The format for the binary form of these tables is:
+
+  unsigned short ByteOrderMark
+  unsigned short NumMappingNodes, count of all mapping nodes
+  unsigned short CaseTableSizes[2], upper and lower mapping node counts
+  unsigned long  CaseTables[NumMappingNodes]
+
+  The starting indexes of the case tables are calculated as following:
+
+    UpperIndex = 0;
+    LowerIndex = CaseTableSizes[0] * 3;
+    TitleIndex = LowerIndex + CaseTableSizes[1] * 3;
+
+  The order of the fields for the three tables are:
+
+    Upper case
+    ----------
+    unsigned long upper;
+    unsigned long lower;
+    unsigned long title;
+
+    Lower case
+    ----------
+    unsigned long lower;
+    unsigned long upper;
+    unsigned long title;
+
+    Title case
+    ----------
+    unsigned long title;
+    unsigned long upper;
+    unsigned long lower;
+
+  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
+  same way as described in the CHARACTER PROPERTIES section.
+
+  Because the tables are in increasing order by character code, locating a
+  mapping requires a simple binary search on one of the 3 codes that make up
+  each node.
+
+  It is important to note that there can only be 65536 mapping nodes which
+  divided into 3 portions allows 21845 nodes for each case mapping table.  The
+  distribution of mappings may be more or less than 21845 per table, but only
+  65536 are allowed.
+
+DECOMPOSITIONS
+==============
+
+The next data file is called "decomp.dat" and contains the decomposition data
+for all characters with decompositions containing more than one character and
+are *not* compatibility decompositions.  Compatibility decompositions are
+signaled in the UCDB format by the use of the <compat> tag in the
+decomposition field.  Each list of character codes represents a full
+decomposition of a composite character.  The nodes are arranged in increasing
+order by character code.
+
+The format for the binary form of this table is:
+
+  unsigned short ByteOrderMark
+  unsigned short NumDecompNodes, count of all decomposition nodes
+  unsigned long  Bytes
+  unsigned long  DecompNodes[(NumDecompNodes * 2) + 1]
+  unsigned long  Decomp[N], N = sum of all counts in DecompNodes[]
+
+  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
+  same way as described in the CHARACTER PROPERTIES section.
+
+  The DecompNodes[] array consists of pairs of unsigned longs, the first of
+  which is the character code and the second is the initial index of the list
+  of character codes representing the decomposition.
+
+  Locating the decomposition of a composite character requires a binary search
+  for a character code in the DecompNodes[] array and using its index to
+  locate the start of the decomposition.  The length of the decomposition list
+  is the index in the following element in DecompNode[] minus the current
+  index.
+
+COMBINING CLASSES
+=================
+
+The fourth data file is called "cmbcl.dat" and contains the characters with
+non-zero combining classes.
+
+The format for the binary form of this table is:
+
+  unsigned short ByteOrderMark
+  unsigned short NumCCLNodes
+  unsigned long  Bytes
+  unsigned long  CCLNodes[NumCCLNodes * 3]
+
+  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
+  same way as described in the CHARACTER PROPERTIES section.
+
+  The CCLNodes[] array consists of groups of three unsigned longs.  The first
+  and second are the beginning and ending of a range and the third is the
+  combining class of that range.
+
+  If a character is not found in this table, then the combining class is
+  assumed to be 0.
+
+  It is important to note that only 65536 distinct ranges plus combining class
+  can be specified because the NumCCLNodes is usually a 16-bit number.
+
+NUMBER TABLE
+============
+
+The final data file is called "num.dat" and contains the characters that have
+a numeric value associated with them.
+
+The format for the binary form of the table is:
+
+  unsigned short ByteOrderMark
+  unsigned short NumNumberNodes
+  unsigned long  Bytes
+  unsigned long  NumberNodes[NumNumberNodes]
+  unsigned short ValueNodes[(Bytes - (NumNumberNodes * sizeof(unsigned long)))
+                            / sizeof(short)]
+
+  If the ByteOrderMark is equal to 0xFFFE, endian swapping is required in the
+  same way as described in the CHARACTER PROPERTIES section.
+
+  The NumberNodes array contains pairs of values, the first of which is the
+  character code and the second an index into the ValueNodes array.  The
+  ValueNodes array contains pairs of integers which represent the numerator
+  and denominator of the numeric value of the character.  If the character
+  happens to map to an integer, both the values in ValueNodes will be the
+  same.
--- a/libraries/liblunicode/ucdata/ucdata.c
+++ b/libraries/liblunicode/ucdata/ucdata.c
--- a/libraries/liblunicode/ucdata/ucdata.h
+++ b/libraries/liblunicode/ucdata/ucdata.h
@ -0,0 +1,306 @@
+/*
+ * Copyright 1999 Computing Research Labs, New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _h_ucdata
+#define _h_ucdata
+
+/*
+ * $Id: ucdata.h,v 1.5 1999/11/19 15:24:29 mleisher Exp $
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef __
+#ifdef __STDC__
+#define __(x) x
+#else
+#define __(x) ()
+#endif
+
+#define UCDATA_VERSION "2.3"
+
+/**************************************************************************
+ *
+ * Masks and macros for character properties.
+ *
+ **************************************************************************/
+
+/*
+ * Values that can appear in the `mask1' parameter of the ucisprop()
+ * function.
+ */
+#define UC_MN 0x00000001 /* Mark, Non-Spacing          */
+#define UC_MC 0x00000002 /* Mark, Spacing Combining    */
+#define UC_ME 0x00000004 /* Mark, Enclosing            */
+#define UC_ND 0x00000008 /* Number, Decimal Digit      */
+#define UC_NL 0x00000010 /* Number, Letter             */
+#define UC_NO 0x00000020 /* Number, Other              */
+#define UC_ZS 0x00000040 /* Separator, Space           */
+#define UC_ZL 0x00000080 /* Separator, Line            */
+#define UC_ZP 0x00000100 /* Separator, Paragraph       */
+#define UC_CC 0x00000200 /* Other, Control             */
+#define UC_CF 0x00000400 /* Other, Format              */
+#define UC_OS 0x00000800 /* Other, Surrogate           */
+#define UC_CO 0x00001000 /* Other, Private Use         */
+#define UC_CN 0x00002000 /* Other, Not Assigned        */
+#define UC_LU 0x00004000 /* Letter, Uppercase          */
+#define UC_LL 0x00008000 /* Letter, Lowercase          */
+#define UC_LT 0x00010000 /* Letter, Titlecase          */
+#define UC_LM 0x00020000 /* Letter, Modifier           */
+#define UC_LO 0x00040000 /* Letter, Other              */
+#define UC_PC 0x00080000 /* Punctuation, Connector     */
+#define UC_PD 0x00100000 /* Punctuation, Dash          */
+#define UC_PS 0x00200000 /* Punctuation, Open          */
+#define UC_PE 0x00400000 /* Punctuation, Close         */
+#define UC_PO 0x00800000 /* Punctuation, Other         */
+#define UC_SM 0x01000000 /* Symbol, Math               */
+#define UC_SC 0x02000000 /* Symbol, Currency           */
+#define UC_SK 0x04000000 /* Symbol, Modifier           */
+#define UC_SO 0x08000000 /* Symbol, Other              */
+#define UC_L  0x10000000 /* Left-To-Right              */
+#define UC_R  0x20000000 /* Right-To-Left              */
+#define UC_EN 0x40000000 /* European Number            */
+#define UC_ES 0x80000000 /* European Number Separator  */
+
+/*
+ * Values that can appear in the `mask2' parameter of the ucisprop()
+ * function.
+ */
+#define UC_ET 0x00000001 /* European Number Terminator */
+#define UC_AN 0x00000002 /* Arabic Number              */
+#define UC_CS 0x00000004 /* Common Number Separator    */
+#define UC_B  0x00000008 /* Block Separator            */
+#define UC_S  0x00000010 /* Segment Separator          */
+#define UC_WS 0x00000020 /* Whitespace                 */
+#define UC_ON 0x00000040 /* Other Neutrals             */
+/*
+ * Implementation specific character properties.
+ */
+#define UC_CM 0x00000080 /* Composite                  */
+#define UC_NB 0x00000100 /* Non-Breaking               */
+#define UC_SY 0x00000200 /* Symmetric                  */
+#define UC_HD 0x00000400 /* Hex Digit                  */
+#define UC_QM 0x00000800 /* Quote Mark                 */
+#define UC_MR 0x00001000 /* Mirroring                  */
+#define UC_SS 0x00002000 /* Space, other               */
+
+#define UC_CP 0x00004000 /* Defined                    */
+
+/*
+ * Added for UnicodeData-2.1.3.
+ */
+#define UC_PI 0x00008000 /* Punctuation, Initial       */
+#define UC_PF 0x00010000 /* Punctuation, Final         */
+
+/*
+ * This is the primary function for testing to see if a character has some set
+ * of properties.  The macros that test for various character properties all
+ * call this function with some set of masks.
+ */
+extern int ucisprop __((unsigned long code, unsigned long mask1,
+                        unsigned long mask2));
+
+#define ucisalpha(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT, 0)
+#define ucisdigit(cc) ucisprop(cc, UC_ND, 0)
+#define ucisalnum(cc) ucisprop(cc, UC_LU|UC_LL|UC_LM|UC_LO|UC_LT|UC_ND, 0)
+#define uciscntrl(cc) ucisprop(cc, UC_CC|UC_CF, 0)
+#define ucisspace(cc) ucisprop(cc, UC_ZS|UC_SS, 0)
+#define ucisblank(cc) ucisprop(cc, UC_ZS, 0)
+#define ucispunct(cc) ucisprop(cc, UC_PD|UC_PS|UC_PE|UC_PO, UC_PI|UC_PF)
+#define ucisgraph(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
+                               UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
+                               UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
+                               UC_SO, UC_PI|UC_PF)
+#define ucisprint(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME|UC_ND|UC_NL|UC_NO|\
+                               UC_LU|UC_LL|UC_LT|UC_LM|UC_LO|UC_PC|UC_PD|\
+                               UC_PS|UC_PE|UC_PO|UC_SM|UC_SM|UC_SC|UC_SK|\
+                               UC_SO|UC_ZS, UC_PI|UC_PF)
+#define ucisupper(cc) ucisprop(cc, UC_LU, 0)
+#define ucislower(cc) ucisprop(cc, UC_LL, 0)
+#define ucistitle(cc) ucisprop(cc, UC_LT, 0)
+#define ucisxdigit(cc) ucisprop(cc, 0, UC_HD)
+
+#define ucisisocntrl(cc) ucisprop(cc, UC_CC, 0)
+#define ucisfmtcntrl(cc) ucisprop(cc, UC_CF, 0)
+
+#define ucissymbol(cc) ucisprop(cc, UC_SM|UC_SC|UC_SO|UC_SK, 0)
+#define ucisnumber(cc) ucisprop(cc, UC_ND|UC_NO|UC_NL, 0)
+#define ucisnonspacing(cc) ucisprop(cc, UC_MN, 0)
+#define ucisopenpunct(cc) ucisprop(cc, UC_PS, 0)
+#define ucisclosepunct(cc) ucisprop(cc, UC_PE, 0)
+#define ucisinitialpunct(cc) ucisprop(cc, 0, UC_PI)
+#define ucisfinalpunct(cc) ucisprop(cc, 0, UC_PF)
+
+#define uciscomposite(cc) ucisprop(cc, 0, UC_CM)
+#define ucishex(cc) ucisprop(cc, 0, UC_HD)
+#define ucisquote(cc) ucisprop(cc, 0, UC_QM)
+#define ucissymmetric(cc) ucisprop(cc, 0, UC_SY)
+#define ucismirroring(cc) ucisprop(cc, 0, UC_MR)
+#define ucisnonbreaking(cc) ucisprop(cc, 0, UC_NB)
+
+/*
+ * Directionality macros.
+ */
+#define ucisrtl(cc) ucisprop(cc, UC_R, 0)
+#define ucisltr(cc) ucisprop(cc, UC_L, 0)
+#define ucisstrong(cc) ucisprop(cc, UC_L|UC_R, 0)
+#define ucisweak(cc) ucisprop(cc, UC_EN|UC_ES, UC_ET|UC_AN|UC_CS)
+#define ucisneutral(cc) ucisprop(cc, 0, UC_B|UC_S|UC_WS|UC_ON)
+#define ucisseparator(cc) ucisprop(cc, 0, UC_B|UC_S)
+
+/*
+ * Other macros inspired by John Cowan.
+ */
+#define ucismark(cc) ucisprop(cc, UC_MN|UC_MC|UC_ME, 0)
+#define ucismodif(cc) ucisprop(cc, UC_LM, 0)
+#define ucisletnum(cc) ucisprop(cc, UC_NL, 0)
+#define ucisconnect(cc) ucisprop(cc, UC_PC, 0)
+#define ucisdash(cc) ucisprop(cc, UC_PD, 0)
+#define ucismath(cc) ucisprop(cc, UC_SM, 0)
+#define uciscurrency(cc) ucisprop(cc, UC_SC, 0)
+#define ucismodifsymbol(cc) ucisprop(cc, UC_SK, 0)
+#define ucisnsmark(cc) ucisprop(cc, UC_MN, 0)
+#define ucisspmark(cc) ucisprop(cc, UC_MC, 0)
+#define ucisenclosing(cc) ucisprop(cc, UC_ME, 0)
+#define ucisprivate(cc) ucisprop(cc, UC_CO, 0)
+#define ucissurrogate(cc) ucisprop(cc, UC_OS, 0)
+#define ucislsep(cc) ucisprop(cc, UC_ZL, 0)
+#define ucispsep(cc) ucisprop(cc, UC_ZP, 0)
+
+#define ucisidentstart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL, 0)
+#define ucisidentpart(cc) ucisprop(cc, UC_LU|UC_LL|UC_LT|UC_LO|UC_NL|\
+                                   UC_MN|UC_MC|UC_ND|UC_PC|UC_CF, 0)
+
+#define ucisdefined(cc) ucisprop(cc, 0, UC_CP)
+#define ucisundefined(cc) !ucisprop(cc, 0, UC_CP)
+
+/*
+ * Other miscellaneous character property macros.
+ */
+#define ucishan(cc) (((cc) >= 0x4e00 && (cc) <= 0x9fff) ||\
+                     ((cc) >= 0xf900 && (cc) <= 0xfaff))
+#define ucishangul(cc) ((cc) >= 0xac00 && (cc) <= 0xd7ff)
+
+/**************************************************************************
+ *
+ * Functions for case conversion.
+ *
+ **************************************************************************/
+
+extern unsigned long uctoupper __((unsigned long code));
+extern unsigned long uctolower __((unsigned long code));
+extern unsigned long uctotitle __((unsigned long code));
+
+/**************************************************************************
+ *
+ * Functions for getting decompositions.
+ *
+ **************************************************************************/
+
+/*
+ * This routine determines if the code has a decomposition.  If it returns 0,
+ * there is no decomposition.  Any other value indicates a decomposition was
+ * returned.
+ */
+extern int ucdecomp __((unsigned long code, unsigned long *num,
+
+                        unsigned long **decomp));
+
+/*
+ * If the code is a Hangul syllable, this routine decomposes it into the array
+ * passed.  The array size should be at least 3.
+ */
+extern int ucdecomp_hangul __((unsigned long code, unsigned long *num,
+                               unsigned long decomp[]));
+
+/**************************************************************************
+ *
+ * Functions for getting combining classes.
+ *
+ **************************************************************************/
+
+/*
+ * This will return the combining class for a character to be used with the
+ * Canonical Ordering algorithm.
+ */
+extern unsigned long uccombining_class __((unsigned long code));
+
+/**************************************************************************
+ *
+ * Functions for getting numbers and digits.
+ *
+ **************************************************************************/
+
+struct ucnumber {
+    int numerator;
+    int denominator;
+};
+
+extern int ucnumber_lookup __((unsigned long code, struct ucnumber *num));
+extern int ucdigit_lookup __((unsigned long code, int *digit));
+
+/*
+ * For compatibility with John Cowan's "uctype" package.
+ */
+extern struct ucnumber ucgetnumber __((unsigned long code));
+extern int ucgetdigit __((unsigned long code));
+
+/**************************************************************************
+ *
+ * Functions library initialization and cleanup.
+ *
+ **************************************************************************/
+
+/*
+ * Macros for specifying the data tables to be loaded, unloaded, or reloaded
+ * by the ucdata_load(), ucdata_unload(), and ucdata_reload() routines.
+ */
+#define UCDATA_CASE   0x01
+#define UCDATA_CTYPE  0x02
+#define UCDATA_DECOMP 0x04
+#define UCDATA_CMBCL  0x08
+#define UCDATA_NUM    0x10
+
+#define UCDATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
+                    UCDATA_CMBCL|UCDATA_NUM)
+
+/*
+ * Functions to load, unload, and reload specific data files.
+ */
+extern void ucdata_load __((char *paths, int mask));
+extern void ucdata_unload __((int mask));
+extern void ucdata_reload __((char *paths, int mask));
+
+/*
+ * Deprecated functions, now just compatibility macros.
+ */
+#define ucdata_setup(p) ucdata_load(p, UCDATA_ALL)
+#define ucdata_cleanup() ucdata_unload(UCDATA_ALL)
+
+#undef __
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _h_ucdata */
--- a/libraries/liblunicode/ucdata/ucdata.man
+++ b/libraries/liblunicode/ucdata/ucdata.man
@ -0,0 +1,464 @@
+.\"
+.\" $Id: ucdata.man,v 1.4 1999/11/19 16:08:33 mleisher Exp $
+.\"
+.TH ucdata 3 "19 November 1999"
+.SH NAME 
+ucdata \- package for providing Unicode/ISO10646 character information
+
+.SH SYNOPSIS
+#include <ucdata.h>
+.sp
+void ucdata_load(char * paths, int masks)
+.sp
+void ucdata_unload(int masks)
+.sp
+void ucdata_reload(char * paths, int masks)
+.sp
+int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
+.sp
+int ucdecomp_hangul(unsigned long code, unsigned long *num,
+unsigned long decomp[])
+.sp
+.nf
+struct ucnumber {
+  int numerator;
+  int denominator;
+};
+.sp
+int ucnumber_lookup(unsigned long code, struct ucnumber *num)
+.sp
+int ucdigit_lookup(unsigned long code, int *digit)
+.sp
+struct ucnumber ucgetnumber(unsigned long code)
+.sp
+int ucgetdigit(unsigned long code)
+.sp
+unsigned long uctoupper(unsigned long code)
+.sp
+unsigned long uctolower(unsigned long code)
+.sp
+unsigned long uctotitle(unsigned long code)
+.sp
+int ucisalpha(unsigned long code)
+.sp
+int ucisalnum(unsigned long code)
+.sp
+int ucisdigit(unsigned long code)
+.sp
+int uciscntrl(unsigned long code)
+.sp
+int ucisspace(unsigned long code)
+.sp
+int ucisblank(unsigned long code)
+.sp
+int ucispunct(unsigned long code)
+.sp
+int ucisgraph(unsigned long code)
+.sp
+int ucisprint(unsigned long code)
+.sp
+int ucisxdigit(unsigned long code)
+.sp
+int ucisupper(unsigned long code)
+.sp
+int ucislower(unsigned long code)
+.sp
+int ucistitle(unsigned long code)
+.sp
+int ucisisocntrl(unsigned long code)
+.sp
+int ucisfmtcntrl(unsigned long code)
+.sp
+int ucissymbol(unsigned long code)
+.sp
+int ucisnumber(unsigned long code)
+.sp
+int ucisnonspacing(unsigned long code)
+.sp
+int ucisopenpunct(unsigned long code)
+.sp
+int ucisclosepunct(unsigned long code)
+.sp
+int ucisinitialpunct(unsigned long code)
+.sp
+int ucisfinalpunct(unsigned long code)
+.sp
+int uciscomposite(unsigned long code)
+.sp
+int ucisquote(unsigned long code)
+.sp
+int ucissymmetric(unsigned long code)
+.sp
+int ucismirroring(unsigned long code)
+.sp
+int ucisnonbreaking(unsigned long code)
+.sp
+int ucisrtl(unsigned long code)
+.sp
+int ucisltr(unsigned long code)
+.sp
+int ucisstrong(unsigned long code)
+.sp
+int ucisweak(unsigned long code)
+.sp
+int ucisneutral(unsigned long code)
+.sp
+int ucisseparator(unsigned long code)
+.sp
+int ucislsep(unsigned long code)
+.sp
+int ucispsep(unsigned long code)
+.sp
+int ucismark(unsigned long code)
+.sp
+int ucisnsmark(unsigned long code)
+.sp
+int ucisspmark(unsigned long code)
+.sp
+int ucismodif(unsigned long code)
+.sp
+int ucismodifsymbol(unsigned long code)
+.sp
+int ucisletnum(unsigned long code)
+.sp
+int ucisconnect(unsigned long code)
+.sp
+int ucisdash(unsigned long code)
+.sp
+int ucismath(unsigned long code)
+.sp
+int uciscurrency(unsigned long code)
+.sp
+int ucisenclosing(unsigned long code)
+.sp
+int ucisprivate(unsigned long code)
+.sp
+int ucissurrogate(unsigned long code)
+.sp
+int ucisidentstart(unsigned long code)
+.sp
+int ucisidentpart(unsigned long code)
+.sp
+int ucisdefined(unsigned long code)
+.sp
+int ucisundefined(unsigned long code)
+.sp
+int ucishan(unsigned long code)
+.sp
+int ucishangul(unsigned long code)
+
+.SH DESCRIPTION
+.TP 4
+.BR Macros
+.br
+UCDATA_CASE
+.br
+UCDATA_CTYPE
+.br
+UCDATA_DECOMP
+.br
+UCDATA_CMBCL
+.br
+UCDATA_NUM
+.br
+UCDATA_ALL
+.br
+.TP 4
+.BR ucdata_load()
+This function initializes the UCData library by locating the data files in one
+of the colon-separated directories in the `paths' parameter.  The data files
+to be loaded are specified in the `masks' parameter as a bitwise combination
+of the macros listed above.
+.sp
+This should be called before using any of the other functions.
+.TP 4
+.BR ucdata_unload()
+This function unloads the data tables specified in the `masks' parameter.
+.sp
+This function should be called when the application is done using the UCData
+package.
+.TP 4
+.BR ucdata_reload()
+This function reloads the data files from one of the colon-separated
+directories in the `paths' parameter.  The data files to be reloaded are
+specified in the `masks' parameter as a bitwise combination of the macros
+listed above.
+.TP 4
+.BR ucdecomp()
+This function determines if a character has a decomposition and returns the
+decomposition information if it exists.
+.sp
+If a zero is returned, there is no decomposition.  If a non-zero is
+returned, then the `num' and `decomp' variables are filled in with the
+appropriate values.
+.sp
+Example call:
+.sp
+.nf
+    unsigned long i, num, *decomp;
+
+    if (ucdecomp(0x1d5, &num, &decomp) != 0) {
+       for (i = 0; i < num; i++)
+         printf("0x%08lX,", decomp[i]);
+       putchar('\n');
+    }
+.TP 4
+.BR ucdecomp_hangul()
+This function determines if a Hangul syllable has a
+decomposition and returns the decomposition information.
+.sp
+An array of at least size 3 should be passed to the function
+for the decomposition of the syllable.
+.sp
+If a zero is returned, the character is not a Hangul
+syllable. If a non-zero is returned, the `num' field
+will be 2 or 3 and the syllable will be decomposed into
+the `decomp' array arithmetically.
+.sp
+Example call:
+.sp
+.nf
+    unsigned long i, num, decomp[3];
+
+    if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
+       for (i = 0; i < num; i++)
+         printf("0x%08lX,", decomp[i]);
+       putchar('\n');
+    }
+.TP 4
+.BR ucnumber_lookup()
+This function determines if the code is a number and
+fills in the `num' field with the numerator and
+denominator.  If the code happens to be a single digit,
+the numerator and denominator fields will be the same.
+.sp
+If the function returns 0, the code is not a number.
+Any other return value means the code is a number.
+.TP 4
+.BR ucdigit_lookup()
+This function determines if the code is a digit and
+fills in the `digit' field with the digit value.
+.sp
+If the function returns 0, the code is not a number.
+Any other return value means the code is a number.
+.TP 4
+.BR ucgetnumber()
+This is a compatibility function with John Cowan's
+"uctype" package.  It uses ucnumber_lookup().
+.TP 4
+.BR ucgetdigit()
+This is a compatibility function with John Cowan's
+"uctype" package.  It uses ucdigit_lookup().
+.TP 4
+.BR uctoupper()
+This function returns the code unchanged if it is
+already upper case or has no upper case equivalent.
+Otherwise the upper case equivalent is returned.
+.TP 4
+.BR uctolower()
+This function returns the code unchanged if it is
+already lower case or has no lower case equivalent.
+Otherwise the lower case equivalent is returned.
+.TP 4
+.BR uctotitle()
+This function returns the code unchanged if it is
+already title case or has no title case equivalent.
+Otherwise the title case equivalent is returned.
+.TP 4
+.BR ucisalpha()
+Test if \fIcode\fR is an alpha character.
+.TP 4
+.BR ucisalnum()
+Test if \fIcode\fR is an alpha or digit character.
+.TP 4
+.BR ucisdigit()
+Test if \fIcode\fR is a digit character.
+.TP 4
+.BR uciscntrl()
+Test if \fIcode\fR is a control character.
+.TP 4
+.BR ucisspace()
+Test if \fIcode\fR is a space character.
+.TP 4
+.BR ucisblank()
+Test if \fIcode\fR is a blank character.
+.TP 4
+.BR ucispunct()
+Test if \fIcode\fR is a punctuation character.
+.TP 4
+.BR ucisgraph()
+Test if \fIcode\fR is a graphical (visible) character.
+.TP 4
+.BR ucisprint()
+Test if \fIcode\fR is a printable character.
+.TP 4
+.BR ucisxdigit()
+Test if \fIcode\fR is a hexadecimal digit character.
+.TP 4
+.BR ucisupper()
+Test if \fIcode\fR is an upper case character.
+.TP 4
+.BR ucislower()
+Test if \fIcode\fR is a lower case character.
+.TP 4
+.BR ucistitle()
+Test if \fIcode\fR is a title case character.
+.TP 4
+.BR ucisisocntrl()
+Is the character a C0 control character (< 32)?
+.TP 4
+.BR ucisfmtcntrl()
+Is the character a format control character?
+.TP 4
+.BR ucissymbol()
+Is the character a symbol?
+.TP 4
+.BR ucisnumber()
+Is the character a number or digit?
+.TP 4
+.BR ucisnonspacing()
+Is the character non-spacing?
+.TP 4
+.BR ucisopenpunct()
+Is the character an open/left punctuation (i.e. '[')
+.TP 4
+.BR ucisclosepunct()
+Is the character an close/right punctuation (i.e. ']')
+.TP 4
+.BR ucisinitialpunct()
+Is the character an initial punctuation (i.e. U+2018 LEFT
+SINGLE QUOTATION MARK)
+.TP 4
+.BR ucisfinalpunct()
+Is the character a final punctuation (i.e. U+2019 RIGHT
+SINGLE QUOTATION MARK)
+.TP 4
+.BR uciscomposite()
+Can the character be decomposed into a set of other
+characters?
+.TP 4
+.BR ucisquote()
+Is the character one of the many quotation marks?
+.TP 4
+.BR ucissymmetric()
+Is the character one that has an opposite form
+(i.e. <>)
+.TP 4
+.BR ucismirroring()
+Is the character mirroring (superset of symmetric)?
+.TP 4
+.BR ucisnonbreaking()
+Is the character non-breaking (i.e. non-breaking
+space)?
+.TP 4
+.BR ucisrtl()
+Does the character have strong right-to-left
+directionality (i.e. Arabic letters)?
+.TP 4
+.BR ucisltr()
+Does the character have strong left-to-right
+directionality (i.e. Latin letters)?
+.TP 4
+.BR ucisstrong()
+Does the character have strong directionality?
+.TP 4
+.BR ucisweak()
+Does the character have weak directionality
+(i.e. numbers)?
+.TP 4
+.BR ucisneutral()
+Does the character have neutral directionality
+(i.e. whitespace)?
+.TP 4
+.BR ucisseparator()
+Is the character a block or segment separator?
+.TP 4
+.BR ucislsep()
+Is the character a line separator?
+.TP 4
+.BR ucispsep()
+Is the character a paragraph separator?
+.TP 4
+.BR ucismark()
+Is the character a mark of some kind?
+.TP 4
+.BR ucisnsmark()
+Is the character a non-spacing mark?
+.TP 4
+.BR ucisspmark()
+Is the character a spacing mark?
+.TP 4
+.BR ucismodif()
+Is the character a modifier letter?
+.TP 4
+.BR ucismodifsymbol()
+Is the character a modifier symbol?
+.TP 4
+.BR ucisletnum()
+Is the character a number represented by a letter?
+.TP 4
+.BR ucisconnect()
+Is the character connecting punctuation?
+.TP 4
+.BR ucisdash()
+Is the character dash punctuation?
+.TP 4
+.BR ucismath()
+Is the character a math character?
+.TP 4
+.BR uciscurrency()
+Is the character a currency character?
+.TP 4
+.BR ucisenclosing()
+Is the character enclosing (i.e. enclosing box)?
+.TP 4
+.BR ucisprivate()
+Is the character from the Private Use Area?
+.TP 4
+.BR ucissurrogate()
+Is the character one of the surrogate codes?
+.TP 4
+.BR ucisidentstart()
+Is the character a legal initial character of an identifier?
+.TP 4
+.BR ucisidentpart()
+Is the character a legal identifier character?
+.TP 4
+.BR ucisdefined()
+Is the character defined (appeared in one of the data
+files)?
+.TP 4
+.BR ucisundefined()
+Is the character not defined (non-Unicode)?
+.TP 4
+.BR ucishan()
+Is the character a Han ideograph?
+.TP 4
+.BR ucishangul()
+Is the character a pre-composed Hangul syllable?
+
+.SH "SEE ALSO"
+ctype(3)
+
+.SH ACKNOWLEDGMENTS
+These are people who have helped with patches or
+alerted me about problems.
+.sp
+John Cowan <cowan@locke.ccil.org>
+.br
+Bob Verbrugge <bob_verbrugge@nl.compuware.com>
+.br
+Christophe Pierret <cpierret@businessobjects.com>
+.br
+Kent Johnson <kent@pondview.mv.com>
+.br
+Valeriy E. Ushakov <uwe@ptc.spbu.ru>
+
+.SH AUTHOR
+Mark Leisher
+.br
+Computing Research Lab
+.br
+New Mexico State University
+.br
+Email: mleisher@crl.nmsu.edu
--- a/libraries/liblunicode/ucdata/ucgendat.c
+++ b/libraries/liblunicode/ucdata/ucgendat.c
--- a/libraries/liblunicode/ucdata/ucpgba.c
+++ b/libraries/liblunicode/ucdata/ucpgba.c
@ -0,0 +1,813 @@
+/*
+ * Copyright 1999 Computing Research Labs, New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef lint
+#ifdef __GNUC__
+static char rcsid[] __attribute__ ((unused)) = "$Id: ucpgba.c,v 1.4 1999/11/29 16:41:06 mleisher Exp $";
+#else
+static char rcsid[] = "$Id: ucpgba.c,v 1.4 1999/11/29 16:41:06 mleisher Exp $";
+#endif
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "ucdata.h"
+#include "ucpgba.h"
+
+/*
+ * These macros are used while reordering of RTL runs of text for the
+ * special case of non-spacing characters being in runs of weakly
+ * directional text.  They check for weak and non-spacing, and digits and
+ * non-spacing.
+ */
+#define ISWEAKSPECIAL(cc)  ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
+#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
+
+/*
+ * These macros are used while breaking a string into runs of text in
+ * different directions.  Descriptions:
+ *
+ * ISLTR_LTR - Test for members of an LTR run in an LTR context.  This looks
+ *             for characters with ltr, non-spacing, weak, and neutral
+ *             properties.
+ *
+ * ISRTL_RTL - Test for members of an RTL run in an RTL context.  This looks
+ *             for characters with rtl, non-spacing, weak, and neutral
+ *             properties.
+ *
+ * ISRTL_NEUTRAL  - Test for RTL or neutral characters.
+ *
+ * ISWEAK_NEUTRAL - Test for weak or neutral characters.
+ */
+#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
+                               UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
+
+#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
+                               UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
+
+#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
+#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
+                                    UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
+
+/*
+ * This table is temporarily hard-coded here until it can be constructed
+ * automatically somehow.
+ */
+static unsigned long _symmetric_pairs[] = {
+    0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
+    0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
+    0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
+    0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
+    0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
+    0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
+    0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
+    0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
+    0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
+    0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
+    0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
+    0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
+};
+
+static int _symmetric_pairs_size =
+sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
+
+/*
+ * This routine looks up the other form of a symmetric pair.
+ */
+static unsigned long
+#ifdef __STDC__
+_ucsymmetric_pair(unsigned long c)
+#else
+_ucsymmetric_pair(c)
+unsigned long c;
+#endif
+{
+    int i;
+
+    for (i = 0; i < _symmetric_pairs_size; i += 2) {
+        if (_symmetric_pairs[i] == c)
+          return _symmetric_pairs[i+1];
+    }
+    return c;
+}
+
+/*
+ * This routine creates a new run, copies the text into it, links it into the
+ * logical text order chain and returns it to the caller to be linked into
+ * the visual text order chain.
+ */
+static ucrun_t *
+#ifdef __STDC__
+_add_run(ucstring_t *str, unsigned long *src,
+         unsigned long start, unsigned long end, int direction)
+#else
+_add_run(str, src, start, end, direction)
+ucstring_t *str;
+unsigned long *src, start, end;
+int direction;
+#endif
+{
+    long i, t;
+    ucrun_t *run;
+
+    run = (ucrun_t *) malloc(sizeof(ucrun_t));
+    run->visual_next = run->visual_prev = 0;
+    run->direction = direction;
+
+    run->cursor = ~0;
+
+    run->chars = (unsigned long *)
+        malloc(sizeof(unsigned long) * ((end - start) << 1));
+    run->positions = run->chars + (end - start);
+
+    run->source = src;
+    run->start = start;
+    run->end = end;
+
+    if (direction == UCPGBA_RTL) {
+        /*
+         * Copy the source text into the run in reverse order and select
+         * replacements for the pairwise punctuation and the <> characters.
+         */
+        for (i = 0, t = end - 1; start < end; start++, t--, i++) {
+            run->positions[i] = t;
+            if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
+              run->chars[i] = _ucsymmetric_pair(src[t]);
+            else
+              run->chars[i] = src[t];
+        }
+    } else {
+        /*
+         * Copy the source text into the run directly.
+         */
+        for (i = start; i < end; i++) {
+            run->positions[i - start] = i;
+            run->chars[i - start] = src[i];
+        }
+    }
+
+    /*
+     * Add the run to the logical list for cursor traversal.
+     */
+    if (str->logical_first == 0)
+      str->logical_first = str->logical_last = run;
+    else {
+        run->logical_prev = str->logical_last;
+        str->logical_last->logical_next = run;
+        str->logical_last = run;
+    }
+
+    return run;
+}
+
+static void
+#ifdef __STDC__
+_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
+                   unsigned long end)
+#else
+_ucadd_rtl_segment(str, source, start, end)
+ucstring_t *str;
+unsigned long *source, start, end;
+#endif
+{
+    unsigned long s, e;
+    ucrun_t *run, *lrun;
+
+    /*
+     * This is used to splice runs into strings with overall LTR direction.
+     * The `lrun' variable will never be NULL because at least one LTR run was
+     * added before this RTL run.
+     */
+    lrun = str->visual_last;
+
+    for (e = s = start; s < end;) {
+        for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
+
+        if (e > s) {
+            run = _add_run(str, source, s, e, UCPGBA_RTL);
+
+            /*
+             * Add the run to the visual list for cursor traversal.
+             */
+            if (str->visual_first != 0) {
+                if (str->direction == UCPGBA_LTR) {
+                    run->visual_prev = lrun;
+                    run->visual_next = lrun->visual_next;
+                    if (lrun->visual_next != 0)
+                      lrun->visual_next->visual_prev = run;
+                    lrun->visual_next = run;
+                    if (lrun == str->visual_last)
+                      str->visual_last = run;
+                } else {
+                    run->visual_next = str->visual_first;
+                    str->visual_first->visual_prev = run;
+                    str->visual_first = run;
+                }
+            } else
+              str->visual_first = str->visual_last = run;
+        }
+
+        /*
+         * Now handle the weak sequences such that multiple non-digit groups
+         * are kept together appropriately and added as RTL sequences.
+         */
+        for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
+            if (!ISDIGITSPECIAL(source[e]) &&
+                (e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
+              break;
+        }
+
+        if (e > s) {
+            run = _add_run(str, source, s, e, UCPGBA_LTR);
+
+            /*
+             * Add the run to the visual list for cursor traversal.
+             */
+            if (str->visual_first != 0) {
+                if (str->direction == UCPGBA_LTR) {
+                    run->visual_prev = lrun;
+                    run->visual_next = lrun->visual_next;
+                    if (lrun->visual_next != 0)
+                      lrun->visual_next->visual_prev = run;
+                    lrun->visual_next = run;
+                    if (lrun == str->visual_last)
+                      str->visual_last = run;
+                } else {
+                    run->visual_next = str->visual_first;
+                    str->visual_first->visual_prev = run;
+                    str->visual_first = run;
+                }
+            } else
+              str->visual_first = str->visual_last = run;
+        }
+
+        /*
+         * Collect all weak non-digit sequences for an RTL segment.  These
+         * will appear as part of the next RTL segment or will be added as
+         * an RTL segment by themselves.
+         */
+        for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
+             e++) ;
+    }
+
+    /*
+     * Capture any weak non-digit sequences that occur at the end of the RTL
+     * run.
+     */
+    if (e > s) {
+        run = _add_run(str, source, s, e, UCPGBA_RTL);
+
+        /*
+         * Add the run to the visual list for cursor traversal.
+         */
+        if (str->visual_first != 0) {
+            if (str->direction == UCPGBA_LTR) {
+                run->visual_prev = lrun;
+                run->visual_next = lrun->visual_next;
+                if (lrun->visual_next != 0)
+                  lrun->visual_next->visual_prev = run;
+                lrun->visual_next = run;
+                if (lrun == str->visual_last)
+                  str->visual_last = run;
+            } else {
+                run->visual_next = str->visual_first;
+                str->visual_first->visual_prev = run;
+                str->visual_first = run;
+            }
+        } else
+          str->visual_first = str->visual_last = run;
+    }
+}
+
+static void
+#ifdef __STDC__
+_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
+                   unsigned long end)
+#else
+_ucadd_ltr_segment(str, source, start, end)
+ucstring_t *str;
+unsigned long *source, start, end;
+#endif
+{
+    ucrun_t *run;
+
+    run = _add_run(str, source, start, end, UCPGBA_LTR);
+
+    /*
+     * Add the run to the visual list for cursor traversal.
+     */
+    if (str->visual_first != 0) {
+        if (str->direction == UCPGBA_LTR) {
+            run->visual_prev = str->visual_last;
+            str->visual_last->visual_next = run;
+            str->visual_last = run;
+        } else {
+            run->visual_next = str->visual_first;
+            str->visual_first->visual_prev = run;
+            str->visual_first = run;
+        }
+    } else
+      str->visual_first = str->visual_last = run;
+}
+
+ucstring_t *
+#ifdef __STDC__
+ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
+                int default_direction, int cursor_motion)
+#else
+ucstring_create(source, start, end, default_direction, cursor_motion)
+unsigned long *source, start, end;
+int default_direction, cursor_motion;
+#endif
+{
+    int rtl_first;
+    unsigned long s, e;
+    ucstring_t *str;
+
+    str = (ucstring_t *) malloc(sizeof(ucstring_t));
+
+    /*
+     * Set the initial values.
+     */
+    str->cursor_motion = cursor_motion;
+    str->logical_first = str->logical_last = 0;
+    str->visual_first = str->visual_last = str->cursor = 0;
+    str->source = source;
+    str->start = start;
+    str->end = end;
+
+    /*
+     * If the length of the string is 0, then just return it at this point.
+     */
+    if (start == end)
+      return str;
+
+    /*
+     * This flag indicates whether the collection loop for RTL is called
+     * before the LTR loop the first time.
+     */
+    rtl_first = 0;
+
+    /*
+     * Look for the first character in the string that has strong
+     * directionality.
+     */
+    for (s = start; s < end && !ucisstrong(source[s]); s++) ;
+
+    if (s == end)
+      /*
+       * If the string contains no characters with strong directionality, use
+       * the default direction.
+       */
+      str->direction = default_direction;
+    else
+      str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
+
+    if (str->direction == UCPGBA_RTL)
+      /*
+       * Set the flag that causes the RTL collection loop to run first.
+       */
+      rtl_first = 1;
+
+    /*
+     * This loop now separates the string into runs based on directionality.
+     */
+    for (s = e = 0; s < end; s = e) {
+        if (!rtl_first) {
+            /*
+             * Determine the next run of LTR text.
+             */
+
+            while (e < end && ISLTR_LTR(source[e]))
+              e++;
+            if (str->direction != UCPGBA_LTR) {
+                while (e > s && ISWEAK_NEUTRAL(source[e - 1]))
+                  e--;
+            }
+
+            /*
+             * Add the LTR segment to the string.
+             */
+            if (e > s)
+              _ucadd_ltr_segment(str, source, s, e);
+        }
+
+        /*
+         * Determine the next run of RTL text.
+         */
+        s = e;
+        while (e < end && ISRTL_RTL(source[e]))
+          e++;
+        if (str->direction != UCPGBA_RTL) {
+            while (e > s && ISWEAK_NEUTRAL(source[e - 1]))
+              e--;
+        }
+
+        /*
+         * Add the RTL segment to the string.
+         */
+        if (e > s)
+          _ucadd_rtl_segment(str, source, s, e);
+
+        /*
+         * Clear the flag that allowed the RTL collection loop to run first
+         * for strings with overall RTL directionality.
+         */
+        rtl_first = 0;
+    }
+
+    /*
+     * Set up the initial cursor run.
+     */
+    str->cursor = str->logical_first;
+    if (str != 0)
+      str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
+          str->cursor->end - str->cursor->start : 0;
+
+    return str;
+}
+
+void
+#ifdef __STDC__
+ucstring_free(ucstring_t *s)
+#else
+ucstring_free(s)
+ucstring_t *s;
+#endif
+{
+    ucrun_t *l, *r;
+
+    if (s == 0)
+      return;
+
+    for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
+        if (r->end > r->start)
+          free((char *) r->chars);
+        if (l)
+          free((char *) l);
+        l = r;
+    }
+    if (l)
+      free((char *) l);
+
+    free((char *) s);
+}
+
+int
+#ifdef __STDC__
+ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
+#else
+ucstring_set_cursor_motion(s, cursor_motion)
+ucstring_t *str;
+int cursor_motion;
+#endif
+{
+    int n;
+
+    if (str == 0)
+      return -1;
+
+    n = str->cursor_motion;
+    str->cursor_motion = cursor_motion;
+    return n;
+}
+
+static int
+#ifdef __STDC__
+_ucstring_visual_cursor_right(ucstring_t *str, int count)
+#else
+_ucstring_visual_cursor_right(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    int cnt = count;
+    unsigned long size;
+    ucrun_t *cursor;
+
+    if (str == 0)
+      return 0;
+
+    cursor = str->cursor;
+    while (cnt > 0) {
+        size = cursor->end - cursor->start;
+        if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
+            cursor->cursor + 1 > size) {
+            /*
+             * If the next run is NULL, then the cursor is already on the
+             * far right end already.
+             */
+            if (cursor->visual_next == 0)
+              /*
+               * If movement occured, then report it.
+               */
+              return (cnt != count);
+
+            /*
+             * Move to the next run.
+             */
+            str->cursor = cursor = cursor->visual_next;
+            cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
+            size = cursor->end - cursor->start;
+        } else
+          cursor->cursor++;
+        cnt--;
+    }
+    return 1;
+}
+
+static int
+#ifdef __STDC__
+_ucstring_logical_cursor_right(ucstring_t *str, int count)
+#else
+_ucstring_logical_cursor_right(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    int cnt = count;
+    unsigned long size;
+    ucrun_t *cursor;
+
+    if (str == 0)
+      return 0;
+
+    cursor = str->cursor;
+    while (cnt > 0) {
+        size = cursor->end - cursor->start;
+        if (str->direction == UCPGBA_RTL) {
+            if (cursor->direction == UCPGBA_RTL) {
+                if (cursor->cursor + 1 == size) {
+                    if (cursor == str->logical_first)
+                      /*
+                       * Already at the beginning of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_prev;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        size : 0;
+                } else
+                  cursor->cursor++;
+            } else {
+                if (cursor->cursor == 0) {
+                    if (cursor == str->logical_first)
+                      /*
+                       * At the beginning of the string already.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_prev;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        size : 0;
+                } else
+                  cursor->cursor--;
+            }
+        } else {
+            if (cursor->direction == UCPGBA_RTL) {
+                if (cursor->cursor == 0) {
+                    if (cursor == str->logical_last)
+                      /*
+                       * Already at the end of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_next;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        0 : size - 1;
+                } else
+                  cursor->cursor--;
+            } else {
+                if (cursor->cursor + 1 > size) {
+                    if (cursor == str->logical_last)
+                      /*
+                       * Already at the end of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_next;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        0 : size - 1;
+                } else
+                  cursor->cursor++;
+            }
+        }
+        cnt--;
+    }
+    return 1;
+}
+
+int
+#ifdef __STDC__
+ucstring_cursor_right(ucstring_t *str, int count)
+#else
+ucstring_cursor_right(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    if (str == 0)
+      return 0;
+    return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
+        _ucstring_visual_cursor_right(str, count) :
+        _ucstring_logical_cursor_right(str, count);
+}
+
+static int
+#ifdef __STDC__
+_ucstring_visual_cursor_left(ucstring_t *str, int count)
+#else
+_ucstring_visual_cursor_left(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    int cnt = count;
+    unsigned long size;
+    ucrun_t *cursor;
+
+    if (str == 0)
+      return 0;
+
+    cursor = str->cursor;
+    while (cnt > 0) {
+        size = cursor->end - cursor->start;
+        if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
+            cursor->cursor - 1 < -1) {
+            /*
+             * If the preceding run is NULL, then the cursor is already on the
+             * far left end already.
+             */
+            if (cursor->visual_prev == 0)
+              /*
+               * If movement occured, then report it.
+               */
+              return (cnt != count);
+
+            /*
+             * Move to the previous run.
+             */
+            str->cursor = cursor = cursor->visual_prev;
+            size = cursor->end - cursor->start;
+            cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
+                size : size - 1;
+        } else
+          cursor->cursor--;
+        cnt--;
+    }
+    return 1;
+}
+
+static int
+#ifdef __STDC__
+_ucstring_logical_cursor_left(ucstring_t *str, int count)
+#else
+_ucstring_logical_cursor_left(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    int cnt = count;
+    unsigned long size;
+    ucrun_t *cursor;
+
+    if (str == 0)
+      return 0;
+
+    cursor = str->cursor;
+    while (cnt > 0) {
+        size = cursor->end - cursor->start;
+        if (str->direction == UCPGBA_RTL) {
+            if (cursor->direction == UCPGBA_RTL) {
+                if (cursor->cursor == -1) {
+                    if (cursor == str->logical_last)
+                      /*
+                       * Already at the end of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_next;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        0 : size - 1;
+                } else
+                  cursor->cursor--;
+            } else {
+                if (cursor->cursor + 1 > size) {
+                    if (cursor == str->logical_last)
+                      /*
+                       * At the end of the string already.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_next;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        0 : size - 1;
+                } else
+                  cursor->cursor++;
+            }
+        } else {
+            if (cursor->direction == UCPGBA_RTL) {
+                if (cursor->cursor + 1 == size) {
+                    if (cursor == str->logical_first)
+                      /*
+                       * Already at the beginning of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_prev;
+                    size = cursor->end - cursor->start;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        size : 0;
+                } else
+                  cursor->cursor++;
+            } else {
+                if (cursor->cursor == 0) {
+                    if (cursor == str->logical_first)
+                      /*
+                       * Already at the beginning of the string.
+                       */
+                      return (cnt != count);
+
+                    str->cursor = cursor = cursor->logical_prev;
+                    cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
+                        size : 0;
+                } else
+                  cursor->cursor--;
+            }
+        }
+        cnt--;
+    }
+    return 1;
+}
+
+int
+#ifdef __STDC__
+ucstring_cursor_left(ucstring_t *str, int count)
+#else
+ucstring_cursor_left(str, count)
+ucstring_t *str;
+int count;
+#endif
+{
+    if (str == 0)
+      return 0;
+    return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
+        _ucstring_visual_cursor_left(str, count) :
+        _ucstring_logical_cursor_left(str, count);
+}
+
+void
+#ifdef __STDC__
+ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
+#else
+ucstring_cursor_info(str, direction, position)
+ucstring_t *str, int *direction;
+unsigned long *position;
+#endif
+{
+    long c;
+    unsigned long size;
+    ucrun_t *cursor;
+
+    if (str == 0 || direction == 0 || position == 0)
+      return;
+
+    cursor = str->cursor;
+
+    *direction = cursor->direction;
+
+    c = cursor->cursor;
+    size = cursor->end - cursor->start;
+
+    if (c == size)
+      *position = (cursor->direction == UCPGBA_RTL) ?
+          cursor->start : cursor->positions[c - 1];
+    else if (c == -1)
+      *position = (cursor->direction == UCPGBA_RTL) ?
+          cursor->end : cursor->start;
+    else
+      *position = cursor->positions[c];
+}
--- a/libraries/liblunicode/ucdata/ucpgba.h
+++ b/libraries/liblunicode/ucdata/ucpgba.h
@ -0,0 +1,162 @@
+/*
+ * Copyright 1999 Computing Research Labs, New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _h_ucpgba
+#define _h_ucpgba
+
+/*
+ * $Id: ucpgba.h,v 1.4 1999/11/19 15:24:30 mleisher Exp $
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef __
+#ifdef __STDC__
+#define __(x) x
+#else
+#define __(x) ()
+#endif
+
+/***************************************************************************
+ *
+ * Macros and types.
+ *
+ ***************************************************************************/
+
+/*
+ * These are the direction values that can appear in render runs and render
+ * strings.
+ */
+#define UCPGBA_LTR 0
+#define UCPGBA_RTL 1
+
+/*
+ * These are the flags for cursor motion.
+ */
+#define UCPGBA_CURSOR_VISUAL  0
+#define UCPGBA_CURSOR_LOGICAL 1
+
+/*
+ * This structure is used to contain runs of text in a particular direction.
+ */
+typedef struct _ucrun_t {
+    struct _ucrun_t *visual_prev;  /* Pointer to the previous visual run.    */
+    struct _ucrun_t *visual_next;  /* Pointer to the next visual run.        */
+
+    struct _ucrun_t *logical_prev; /* Pointer to the previous logical run.   */
+    struct _ucrun_t *logical_next; /* Pointer to the next logical run.       */
+
+    int direction;                 /* Direction of the run.                  */
+
+    long cursor;                   /* Position of "cursor" in the string.    */
+
+    unsigned long *chars;          /* List of characters for the run.        */
+    unsigned long *positions;      /* List of original positions in source.  */
+
+    unsigned long *source;         /* The source string.                     */
+    unsigned long start;           /* Beginning offset in the source string. */
+    unsigned long end;             /* Ending offset in the source string.    */
+} ucrun_t;
+
+/*
+ * This represents a string of runs rendered up to a point that is not
+ * platform specific.
+ */
+typedef struct _ucstring_t {
+    int direction;                /* Overall direction of the string.       */
+
+    int cursor_motion;            /* Logical or visual cursor motion flag.  */
+
+    ucrun_t *cursor;              /* The run containing the "cursor."       */
+
+    ucrun_t *logical_first;       /* First run in the logical order.        */
+    ucrun_t *logical_last;        /* Last run in the logical order.         */
+
+    ucrun_t *visual_first;        /* First run in the visual order.         */
+    ucrun_t *visual_last;         /* Last run in the visual order.          */
+
+    unsigned long *source;        /* The source string.                     */
+    unsigned long start;          /* The beginning offset in the source.    */
+    unsigned long end;            /* The ending offset in the source.       */
+} ucstring_t;
+
+/***************************************************************************
+ *
+ * API
+ *
+ ***************************************************************************/
+
+/*
+ * This creates and reorders the specified substring using the
+ * "Pretty Good Bidi Algorithm."  A default direction is provided for cases
+ * of a string containing no strong direction characters and the default
+ * cursor motion should be provided.
+ */
+extern ucstring_t *ucstring_create __((unsigned long *source,
+                                       unsigned long start,
+                                       unsigned long end,
+                                       int default_direction,
+                                       int cursor_motion));
+/*
+ * This releases the string.
+ */
+extern void ucstring_free __((ucstring_t *string));
+
+/*
+ * This changes the cursor motion flag for the string.
+ */
+extern int ucstring_set_cursor_motion __((ucstring_t *string,
+                                          int cursor_motion));
+
+/*
+ * This function will move the cursor to the right depending on the
+ * type of cursor motion that was specified for the string.
+ *
+ * A 0 is returned if no cursor motion is performed, otherwise a
+ * 1 is returned.
+ */
+extern int ucstring_cursor_right __((ucstring_t *string, int count));
+
+/*
+ * This function will move the cursor to the left depending on the
+ * type of cursor motion that was specified for the string.
+ *
+ * A 0 is returned if no cursor motion is performed, otherwise a
+ * 1 is returned.
+ */
+extern int ucstring_cursor_left __((ucstring_t *string, int count));
+
+/*
+ * This routine retrieves the direction of the run containing the cursor
+ * and the actual position in the original text string.
+ */
+extern void ucstring_cursor_info __((ucstring_t *string, int *direction,
+                                     unsigned long *position));
+
+#undef __
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _h_ucpgba */
--- a/libraries/liblunicode/ucdata/ucpgba.man
+++ b/libraries/liblunicode/ucdata/ucpgba.man
@ -0,0 +1,97 @@
+.\"
+.\" $Id: ucpgba.man,v 1.1 1999/11/19 16:08:34 mleisher Exp $
+.\"
+.TH ucpgba 3 "19 November 1999"
+.SH NAME 
+ucpgba \- functions for doing bidirectional reordering of Unicode text and
+logical and visual cursor motion
+
+.SH SYNOPSIS
+.nf
+#include <ucdata.h>
+#include <ucpgba.h>
+
+ucstring_t *ucstring_create(unsigned long *source, unsigned long start,
+                            unsigned long end, int default_direction,
+                            int cursor_motion)
+.sp
+void ucstring_free(ucstring_t *string)
+.sp
+int ucstring_set_cursor_motion(ucstring_t *string, int cursor_motion)
+.sp
+int ucstring_cursor_right(ucstring_t *string, int count)
+.sp
+int ucstring_cursor_left(ucstring_t *string, int count)
+.sp
+void ucstring_cursor_info(ucstring_t *string, int *direction,
+                          unsigned long *position)
+
+.SH DESCRIPTION
+.TP 4
+.BR Macros
+UCPGBA_LTR
+.br
+UCPGBA_RTL
+.br
+UCPGBA_CURSOR_VISUAL
+.br
+UCPGBA_CURSOR_LOGICAL
+
+.TP 4
+.BR ucstring_create()
+This function will create a reordered string by using the implicit
+directionality of the characters in the specified substring.
+.sp
+The `default_direction' parameter should be one of UCPGBA_LTR or UCPGBA_RTL
+and is used only in cases where a string contains no characters with strong
+directionality.
+.sp
+The `cursor_motion' parameter should be one of UCPGBA_CURSOR_VISUAL or
+UCPGBA_CURSOR_LOGICAL, and is used to specify the initial cursor motion
+behavior.  This behavior can be switched at any time using
+ustring_set_cursor_motion().
+
+.TP 4
+.BR ucstring_free()
+This function will deallocate the memory used by the string, incuding the
+string itself.
+
+.TP 4
+.BR ucstring_cursor_info()
+This function will return the text position of the internal cursor and the
+directionality of the text at that position.  The position returned is the
+original text position of the character.
+
+.TP 4
+.BR ucstring_set_cursor_motion()
+This function will change the cursor motion type and return the previous
+cursor motion type.
+
+.TP 4
+.BR ucstring_cursor_right()
+This function will move the internal cursor to the right according to the
+type of cursor motion set for the string.
+.sp
+If no cursor motion is performed, it returns 0.  Otherwise it will return a 1.
+
+.TP 4
+.BR ucstring_cursor_left()
+This function will move the internal cursor to the left according to the
+type of cursor motion set for the string.
+.sp
+If no cursor motion is performed, it returns 0.  Otherwise it will return a 1.
+
+.SH "SEE ALSO"
+ucdata(3)
+
+.SH ACKNOWLEDGMENTS
+These are people who have helped with patches or alerted me about problems.
+
+.SH AUTHOR
+Mark Leisher
+.br
+Computing Research Lab
+.br
+New Mexico State University
+.br
+Email: mleisher@crl.nmsu.edu
--- a/libraries/liblunicode/ure/README
+++ b/libraries/liblunicode/ure/README
@ -0,0 +1,212 @@
+#
+# $Id: README,v 1.3 1999/09/21 15:47:43 mleisher Exp $
+#
+# Copyright 1997, 1998, 1999 Computing Research Labs,
+# New Mexico State University
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+
+                       Unicode and Regular Expressions
+                                 Version 0.5
+
+This is a simple regular expression package for matching against Unicode text
+in UCS2 form.  The implementation of this URE package is a variation on the
+RE->DFA algorithm done by Mark Hopkins (markh@csd4.csd.uwm.edu).  Mark
+Hopkins' algorithm had the virtue of being very simple, so it was used as a
+model.
+
+---------------------------------------------------------------------------
+
+Assumptions:
+
+  o  Regular expression and text already normalized.
+
+  o  Conversion to lower case assumes a 1-1 mapping.
+
+Definitions:
+
+  Separator - any one of U+2028, U+2029, '\n', '\r'.
+
+Operators:
+  .   - match any character.
+  *   - match zero or more of the last subexpression.
+  +   - match one or more of the last subexpression.
+  ?   - match zero or one of the last subexpression.
+  ()  - subexpression grouping.
+
+  Notes:
+
+    o  The "." operator normally does not match separators, but a flag is
+       available for the ure_exec() function that will allow this operator to
+       match a separator.
+
+Literals and Constants:
+
+  c       - literal UCS2 character.
+  \x....  - hexadecimal number of up to 4 digits.
+  \X....  - hexadecimal number of up to 4 digits.
+  \u....  - hexadecimal number of up to 4 digits.
+  \U....  - hexadecimal number of up to 4 digits.
+
+Character classes:
+
+  [...]           - Character class.
+  [^...]          - Negated character class.
+  \pN1,N2,...,Nn  - Character properties class.
+  \PN1,N2,...,Nn  - Negated character properties class.
+
+  POSIX character classes recognized:
+
+    :alnum:
+    :alpha:
+    :cntrl:
+    :digit:
+    :graph:
+    :lower:
+    :print:
+    :punct:
+    :space:
+    :upper:
+    :xdigit:
+
+  Notes:
+
+    o  Character property classes are \p or \P followed by a comma separated
+       list of integers between 1 and 32.  These integers are references to
+       the following character properties:
+
+        N	Character Property
+        --------------------------
+        1	_URE_NONSPACING
+        2	_URE_COMBINING
+        3	_URE_NUMDIGIT
+        4	_URE_NUMOTHER
+        5	_URE_SPACESEP
+        6	_URE_LINESEP
+        7	_URE_PARASEP
+        8	_URE_CNTRL
+        9	_URE_PUA
+        10	_URE_UPPER
+        11	_URE_LOWER
+        12	_URE_TITLE
+        13	_URE_MODIFIER
+        14	_URE_OTHERLETTER
+        15	_URE_DASHPUNCT
+        16	_URE_OPENPUNCT
+        17	_URE_CLOSEPUNCT
+        18	_URE_OTHERPUNCT
+        19	_URE_MATHSYM
+        20	_URE_CURRENCYSYM
+        21	_URE_OTHERSYM
+        22	_URE_LTR
+        23	_URE_RTL
+        24	_URE_EURONUM
+        25	_URE_EURONUMSEP
+        26	_URE_EURONUMTERM
+        27	_URE_ARABNUM
+        28	_URE_COMMONSEP
+        29	_URE_BLOCKSEP
+        30	_URE_SEGMENTSEP
+        31	_URE_WHITESPACE
+        32	_URE_OTHERNEUT
+
+    o  Character classes can contain literals, constants, and character
+       property classes. Example:
+
+       [abc\U10A\p1,3,4]
+
+---------------------------------------------------------------------------
+
+Before using URE
+----------------
+Before URE is used, two functions need to be created.  One to check if a
+character matches a set of URE character properties, and one to convert a
+character to lower case.
+
+Stubs for these function are located in the urestubs.c file.
+
+Using URE
+---------
+
+Sample pseudo-code fragment.
+
+  ure_buffer_t rebuf;
+  ure_dfa_t dfa;
+  ucs2_t *re, *text;
+  unsigned long relen, textlen;
+  unsigned long match_start, match_end;
+
+  /*
+   * Allocate the dynamic storage needed to compile regular expressions.
+   */
+  rebuf = ure_buffer_create();
+
+  for each regular expression in a list {
+      re = next regular expression;
+      relen = length(re);
+
+      /*
+       * Compile the regular expression with the case insensitive flag
+       * turned on.
+       */
+      dfa = ure_compile(re, relen, 1, rebuf);
+
+      /*
+       * Look for the first match in some text.  The matching will be done
+       * in a case insensitive manner because the expression was compiled
+       * with the case insensitive flag on.
+       */
+      if (ure_exec(dfa, 0, text, textlen, &match_start, &match_end))
+        printf("MATCH: %ld %ld\n", match_start, match_end);
+
+      /*
+       * Look for the first match in some text, ignoring non-spacing
+       * characters.
+       */
+      if (ure_exec(dfa, URE_IGNORE_NONSPACING, text, textlen,
+                   &match_start, &match_end))
+        printf("MATCH: %ld %ld\n", match_start, match_end);
+
+      /*
+       * Free the DFA.
+       */
+      ure_free_dfa(dfa);
+  }
+
+  /*
+   * Free the dynamic storage used for compiling the expressions.
+   */
+  ure_free_buffer(rebuf);
+
+---------------------------------------------------------------------------
+
+Mark Leisher <mleisher@crl.nmsu.edu>
+29 March 1997
+
+===========================================================================
+
+CHANGES
+-------
+
+Version: 0.5
+Date   : 21 September 1999
+==========================
+  1. Added copyright stuff and put in CVS.
--- a/libraries/liblunicode/ure/ure.c
+++ b/libraries/liblunicode/ure/ure.c
--- a/libraries/liblunicode/ure/ure.h
+++ b/libraries/liblunicode/ure/ure.h
@ -0,0 +1,150 @@
+/*
+ * Copyright 1997, 1998, 1999 Computing Research Labs,
+ * New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _h_ure
+#define _h_ure
+
+/*
+ * $Id: ure.h,v 1.2 1999/09/21 15:47:44 mleisher Exp $
+ */
+
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef __
+#ifdef __STDC__
+#define __(x) x
+#else
+#define __(x) ()
+#endif
+
+/*
+ * Set of character class flags.
+ */
+#define _URE_NONSPACING  0x00000001
+#define _URE_COMBINING   0x00000002
+#define _URE_NUMDIGIT    0x00000004
+#define _URE_NUMOTHER    0x00000008
+#define _URE_SPACESEP    0x00000010
+#define _URE_LINESEP     0x00000020
+#define _URE_PARASEP     0x00000040
+#define _URE_CNTRL       0x00000080
+#define _URE_PUA         0x00000100
+
+#define _URE_UPPER       0x00000200
+#define _URE_LOWER       0x00000400
+#define _URE_TITLE       0x00000800
+#define _URE_MODIFIER    0x00001000
+#define _URE_OTHERLETTER 0x00002000
+#define _URE_DASHPUNCT   0x00004000
+#define _URE_OPENPUNCT   0x00008000
+#define _URE_CLOSEPUNCT  0x00010000
+#define _URE_OTHERPUNCT  0x00020000
+#define _URE_MATHSYM     0x00040000
+#define _URE_CURRENCYSYM 0x00080000
+#define _URE_OTHERSYM    0x00100000
+
+#define _URE_LTR         0x00200000
+#define _URE_RTL         0x00400000
+
+#define _URE_EURONUM     0x00800000
+#define _URE_EURONUMSEP  0x01000000
+#define _URE_EURONUMTERM 0x02000000
+#define _URE_ARABNUM     0x04000000
+#define _URE_COMMONSEP   0x08000000
+
+#define _URE_BLOCKSEP    0x10000000
+#define _URE_SEGMENTSEP  0x20000000
+
+#define _URE_WHITESPACE  0x40000000
+#define _URE_OTHERNEUT   0x80000000
+
+/*
+ * Error codes.
+ */
+#define _URE_OK               0
+#define _URE_UNEXPECTED_EOS   -1
+#define _URE_CCLASS_OPEN      -2
+#define _URE_UNBALANCED_GROUP -3
+#define _URE_INVALID_PROPERTY -4
+
+/*
+ * Options that can be combined for searching.
+ */
+#define URE_IGNORE_NONSPACING      0x01
+#define URE_DOT_MATCHES_SEPARATORS 0x02
+
+typedef unsigned long ucs4_t;
+typedef unsigned short ucs2_t;
+
+/*
+ * Opaque type for memory used when compiling expressions.
+ */
+typedef struct _ure_buffer_t *ure_buffer_t;
+
+/*
+ * Opaque type for the minimal DFA used when matching.
+ */
+typedef struct _ure_dfa_t *ure_dfa_t;
+
+/*************************************************************************
+ *
+ * API.
+ *
+ *************************************************************************/
+
+extern ure_buffer_t ure_buffer_create __((void));
+
+extern void ure_buffer_free __((ure_buffer_t buf));
+
+extern ure_dfa_t ure_compile __((ucs2_t *re, unsigned long relen,
+                                 int casefold, ure_buffer_t buf));
+
+extern void ure_dfa_free __((ure_dfa_t dfa));
+
+extern void ure_write_dfa __((ure_dfa_t dfa, FILE *out));
+
+extern int ure_exec __((ure_dfa_t dfa, int flags,
+                        ucs2_t *text, unsigned long textlen,
+                        unsigned long *match_start, unsigned long *match_end));
+
+/*************************************************************************
+ *
+ * Prototypes for stub functions used for URE.  These need to be rewritten to
+ * use the Unicode support available on the system.
+ *
+ *************************************************************************/
+
+extern ucs4_t _ure_tolower __((ucs4_t c));
+
+extern int _ure_matches_properties __((unsigned long props, ucs4_t c));
+
+#undef __
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _h_ure */
--- a/libraries/liblunicode/ure/urestubs.c
+++ b/libraries/liblunicode/ure/urestubs.c
@ -0,0 +1,64 @@
+/*
+ * Copyright 1997, 1998, 1999 Computing Research Labs,
+ * New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef lint
+static char rcsid[] = "$Id: urestubs.c,v 1.2 1999/09/21 15:47:44 mleisher Exp $";
+#endif
+
+#include "ure.h"
+
+/*
+ * This file contains stub routines needed by the URE package to test
+ * character properties and other Unicode implementation specific details.
+ */
+
+/*
+ * This routine should return the lower case equivalent for the character or,
+ * if there is no lower case quivalent, the character itself.
+ */
+ucs4_t
+#ifdef __STDC__
+_ure_tolower(ucs4_t c)
+#else
+_ure_tolower(c)
+ucs4_t c;
+#endif
+{
+    return c;
+}
+
+/*
+ * This routine takes a set of URE character property flags (see ure.h) along
+ * with a character and tests to see if the character has one or more of those
+ * properties.
+ */
+int
+#ifdef __STDC__
+_ure_matches_properties(unsigned long props, ucs4_t c)
+#else
+_ure_matches_properties(props, c)
+unsigned long props;
+ucs4_t c;
+#endif
+{
+    return 1;
+}
--- a/libraries/liblunicode/utbm/README
+++ b/libraries/liblunicode/utbm/README
@ -0,0 +1,121 @@
+#
+# $Id: README,v 1.1 1999/09/21 15:45:17 mleisher Exp $
+#
+# Copyright 1997, 1998, 1999 Computing Research Labs,
+# New Mexico State University
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+# OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+# THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+
+                       Unicode and Boyer-Moore Searching
+                                 Version 0.2
+
+UTBM (Unicode Tuned Boyer-Moore) is a simple package that provides tuned
+Boyer-Moore searches on Unicode UCS2 text (handles high and low surrogates).
+
+---------------------------------------------------------------------------
+
+Assumptions:
+
+  o  Search pattern and text already normalized in some fasion.
+
+  o  Upper, lower, and title case conversions are one-to-one.
+
+  o  For conversions between upper, lower, and title case, UCS2 characters
+     always convert to other UCS2 characters, and UTF-16 characters always
+     convert to other UTF-16 characters.
+
+Flags:
+
+  UTBM provides three processing flags:
+
+  o  UTBM_CASEFOLD          - search in a case-insensitive manner.
+
+  o  UTBM_IGNORE_NONSPACING - ignore non-spacing characters in the pattern and
+                              the text.
+
+  o  UTBM_SPACE_COMPRESS    - view as a *single space*, sequential groups of
+                              U+2028, U+2029, '\n', '\r', '\t', and any
+                              character identified as a space by the Unicode
+                              support on the platform.
+
+                              This flag also causes all characters identified
+                              as control by the Unicode support on the
+                              platform to be ignored (except for '\n', '\r',
+                              and '\t').
+
+---------------------------------------------------------------------------
+
+Before using UTBM
+-----------------
+Before UTBM is used, some functions need to be created.  The "utbmstub.c" file
+contains stubs that need to be rewritten so they work with the Unicode support
+on the platform on which this package is being used.
+
+Using UTBM
+----------
+
+Sample pseudo-code fragment.
+
+  utbm_pattern_t pat;
+  ucs2_t *pattern, *text;
+  unsigned long patternlen, textlen;
+  unsigned long flags, match_start, match_end;
+
+  /*
+   * Allocate the dynamic storage needed for a search pattern.
+   */
+  pat = utbm_create_pattern();
+
+  /*
+   * Set the search flags desired.
+   */
+  flags = UTBM_CASEFOLD|UTBM_IGNORE_NONSPACING;
+
+  /*
+   * Compile the search pattern.
+   */
+  utbm_compile(pattern, patternlen, flags, pat);
+
+  /*
+   * Find the first occurance of the search pattern in the text.
+   */
+  if (utbm_exec(pat, text, textlen, &match_start, &match_end))
+    printf("MATCH: %ld %ld\n", match_start, match_end);
+
+  /*
+   * Free the dynamic storage used for the search pattern.
+   */
+  ure_free_pattern(pat);
+
+---------------------------------------------------------------------------
+
+Mark Leisher <mleisher@crl.nmsu.edu>
+2 May 1997
+
+===========================================================================
+
+CHANGES
+-------
+
+Version: 0.2
+Date   : 21 September 1999
+==========================
+  1. Added copyright stuff and put in CVS.
+
--- a/libraries/liblunicode/utbm/utbm.c
+++ b/libraries/liblunicode/utbm/utbm.c
@ -0,0 +1,497 @@
+/*
+ * Copyright 1997, 1998, 1999 Computing Research Labs,
+ * New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef lint
+static char rcsid[] = "$Id: utbm.c,v 1.1 1999/09/21 15:45:17 mleisher Exp $";
+#endif
+
+/*
+ * Assumptions:
+ * 1. Case conversions of UTF-16 characters must also be UTF-16 characters.
+ * 2. Case conversions are all one-to-one.
+ * 3. Text and pattern have already been normalized in some fashion.
+ */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "utbm.h"
+
+/*
+ * Single pattern character.
+ */
+typedef struct {
+    ucs4_t lc;
+    ucs4_t uc;
+    ucs4_t tc;
+} _utbm_char_t;
+
+typedef struct {
+    _utbm_char_t *ch;
+    unsigned long skip;
+} _utbm_skip_t;
+
+typedef struct _utbm_pattern_t {
+    unsigned long flags;
+
+    _utbm_char_t *pat;
+    unsigned long pat_used;
+    unsigned long pat_size;
+    unsigned long patlen;
+
+    _utbm_skip_t *skip;
+    unsigned long skip_used;
+    unsigned long skip_size;
+
+    unsigned long md4;
+} _utbm_pattern_t;
+
+/*************************************************************************
+ *
+ * Support functions.
+ *
+ *************************************************************************/
+
+/*
+ * Routine to look up the skip value for a character.
+ */
+static unsigned long
+#ifdef __STDC__
+_utbm_skip(utbm_pattern_t p, ucs2_t *start, ucs2_t *end)
+#else
+_utbm_skip(p, start, end)
+utbm_pattern_t p;
+ucs2_t *start, *end;
+#endif
+{
+    unsigned long i;
+    ucs4_t c1, c2;
+    _utbm_skip_t *sp;
+
+    if (start >= end)
+      return 0;
+
+    c1 = *start;
+    c2 = (start + 1 < end) ? *(start + 1) : ~0;
+    if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
+      c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+
+    for (i = 0, sp = p->skip; i < p->skip_used; i++, sp++) {
+        if (!((c1 ^ sp->ch->uc) & (c1 ^ sp->ch->lc) & (c1 ^ sp->ch->tc))) {
+            return ((unsigned long) (end - start) < sp->skip) ?
+                end - start : sp->skip;
+        }
+    }
+    return p->patlen;
+}
+
+static int
+#ifdef __STDC__
+_utbm_match(utbm_pattern_t pat, ucs2_t *text, ucs2_t *start, ucs2_t *end,
+            unsigned long *match_start, unsigned long *match_end)
+#else
+_utbm_match(pat, text, start, end, match_start, match_end)
+utbm_pattern_t pat;
+ucs2_t *text, *start, *end;
+unsigned long *match_start, *match_end;
+#endif
+{
+    int check_space;
+    ucs4_t c1, c2;
+    unsigned long count;
+    _utbm_char_t *cp;
+
+    /*
+     * Set the potential match endpoint first.
+     */
+    *match_end = (start - text) + 1;
+
+    c1 = *start;
+    c2 = (start + 1 < end) ? *(start + 1) : ~0;
+    if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff) {
+        c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+        /*
+         * Adjust the match end point to occur after the UTF-16 character.
+         */
+        *match_end = *match_end + 1;
+    }
+
+    if (pat->pat_used == 1) {
+        *match_start = start - text;
+        return 1;
+    }
+
+    /*
+     * Compare backward.
+     */
+    cp = pat->pat + (pat->pat_used - 1);
+
+    for (count = pat->patlen; start > text && count > 0;) {
+        /*
+         * Ignore non-spacing characters if indicated.
+         */
+        if (pat->flags & UTBM_IGNORE_NONSPACING) {
+            while (start > text && _utbm_nonspacing(c1)) {
+                c2 = *--start;
+                c1 = (start - 1 > text) ? *(start - 1) : ~0;
+                if (0xdc00 <= c2 && c2 <= 0xdfff &&
+                    0xd800 <= c1 && c1 <= 0xdbff) {
+                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+                    start--;
+                } else
+                  c1 = c2;
+            }
+        }
+
+        /*
+         * Handle space compression if indicated.
+         */
+        if (pat->flags & UTBM_SPACE_COMPRESS) {
+            check_space = 0;
+            while (start > text &&
+                   (_utbm_isspace(c1, 1) || _utbm_iscntrl(c1))) {
+                check_space = _utbm_isspace(c1, 1);
+                c2 = *--start;
+                c1 = (start - 1 > text) ? *(start - 1) : ~0;
+                if (0xdc00 <= c2 && c2 <= 0xdfff &&
+                    0xd800 <= c1 && c1 <= 0xdbff) {
+                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+                    start--;
+                } else
+                  c1 = c2;
+            }
+            /*
+             * Handle things if space compression was indicated and one or
+             * more member characters were found.
+             */
+            if (check_space) {
+                if (cp->uc != ' ')
+                  return 0;
+                cp--;
+                count--;
+            }
+        }
+
+        /*
+         * Handle the normal comparison cases.
+         */
+        if (count > 0 && ((c1 ^ cp->uc) & (c1 ^ cp->lc) & (c1 ^ cp->tc)))
+          return 0;
+
+        count -= (c1 >= 0x10000) ? 2 : 1;
+        if (count > 0) {
+            cp--;
+
+            /*
+             * Get the next preceding character.
+             */
+            if (start > text) {
+                c2 = *--start;
+                c1 = (start - 1 > text) ? *(start - 1) : ~0;
+                if (0xdc00 <= c2 && c2 <= 0xdfff &&
+                    0xd800 <= c1 && c1 <= 0xdbff) {
+                    c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+                    start--;
+                } else
+                  c1 = c2;
+            }
+        }
+    }
+
+    /*
+     * Set the match start position.
+     */
+    *match_start = start - text;
+    return 1;
+}
+
+/*************************************************************************
+ *
+ * API.
+ *
+ *************************************************************************/
+
+utbm_pattern_t
+#ifdef __STDC__
+utbm_create_pattern(void)
+#else
+utbm_create_pattern()
+#endif
+{
+    utbm_pattern_t p;
+
+    p = (utbm_pattern_t) malloc(sizeof(_utbm_pattern_t));
+    (void) memset((char *) p, 0, sizeof(_utbm_pattern_t));
+    return p;
+}
+
+void
+#ifdef __STDC__
+utbm_free_pattern(utbm_pattern_t pattern)
+#else
+utbm_free_pattern(pattern)
+utbm_pattern_t pattern;
+#endif
+{
+    if (pattern == 0)
+      return;
+
+    if (pattern->pat_size > 0)
+      free((char *) pattern->pat);
+
+    if (pattern->skip_size > 0)
+      free((char *) pattern->skip);
+
+    free((char *) pattern);
+}
+
+void
+#ifdef __STDC__
+utbm_compile(ucs2_t *pat, unsigned long patlen, unsigned long flags,
+             utbm_pattern_t p)
+#else
+utbm_compile(pat, patlen, flags, p)
+ucs2_t *pat;
+unsigned long patlen, flags;
+utbm_pattern_t p;
+#endif
+{
+    int have_space;
+    unsigned long i, j, k, slen;
+    _utbm_char_t *cp;
+    _utbm_skip_t *sp;
+    ucs4_t c1, c2, sentinel;
+
+    if (p == 0 || pat == 0 || *pat == 0 || patlen == 0)
+      return;
+
+    /*
+     * Reset the pattern buffer.
+     */
+    p->patlen = p->pat_used = p->skip_used = 0;
+
+    /*
+     * Set the flags.
+     */
+    p->flags = flags;
+
+    /*
+     * Initialize the extra skip flag.
+     */
+    p->md4 = 1;
+
+    /*
+     * Allocate more storage if necessary.
+     */
+    if (patlen > p->pat_size) {
+        if (p->pat_size == 0) {
+            p->pat = (_utbm_char_t *) malloc(sizeof(_utbm_char_t) * patlen);
+            p->skip = (_utbm_skip_t *) malloc(sizeof(_utbm_skip_t) * patlen);
+        } else {
+            p->pat = (_utbm_char_t *)
+                realloc((char *) p->pat, sizeof(_utbm_char_t) * patlen);
+            p->skip = (_utbm_skip_t *)
+                realloc((char *) p->skip, sizeof(_utbm_skip_t) * patlen);
+        }
+        p->pat_size = p->skip_size = patlen;
+    }
+
+    /*
+     * Preprocess the pattern to remove controls (if specified) and determine
+     * case.
+     */
+    for (have_space = 0, cp = p->pat, i = 0; i < patlen; i++) {
+        c1 = pat[i];
+        c2 = (i + 1 < patlen) ? pat[i + 1] : ~0;
+        if (0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff)
+          c1 = 0x10000 + (((c1 & 0x03ff) << 10) | (c2 & 0x03ff));
+
+        /*
+         * Make sure the `have_space' flag is turned off if the character
+         * is not an appropriate one.
+         */
+        if (!_utbm_isspace(c1, flags & UTBM_SPACE_COMPRESS))
+          have_space = 0;
+
+        /*
+         * If non-spacing characters should be ignored, do it here.
+         */
+        if ((flags & UTBM_IGNORE_NONSPACING) && _utbm_nonspacing(c1))
+          continue;
+
+        /*
+         * Check if spaces and controls need to be compressed.
+         */
+        if (flags & UTBM_SPACE_COMPRESS) {
+            if (_utbm_isspace(c1, 1)) {
+                if (!have_space) {
+                    /*
+                     * Add a space and set the flag.
+                     */
+                    cp->uc = cp->lc = cp->tc = ' ';
+                    cp++;
+
+                    /*
+                     * Increase the real pattern length.
+                     */
+                    p->patlen++;
+                    sentinel = ' ';
+                    have_space = 1;
+                }
+                continue;
+            }
+
+            /*
+             * Ignore all control characters.
+             */
+            if (_utbm_iscntrl(c1))
+              continue;
+        }
+
+        /*
+         * Add the character.
+         */
+        if (flags & UTBM_CASEFOLD) {
+            cp->uc = _utbm_toupper(c1);
+            cp->lc = _utbm_tolower(c1);
+            cp->tc = _utbm_totitle(c1);
+        } else
+          cp->uc = cp->lc = cp->tc = c1;
+
+        /*
+         * Set the sentinel character.
+         */
+        sentinel = cp->uc;
+
+        /*
+         * Move to the next character.
+         */
+        cp++;
+
+        /*
+         * Increase the real pattern length appropriately.
+         */
+        p->patlen += (c1 >= 0x10000) ? 2 : 1;
+
+        /*
+         * Increment the loop index for UTF-16 characters.
+         */
+        i += (c1 >= 0x10000) ? 1 : 0;
+
+    }
+
+    /*
+     * Set the number of characters actually used.
+     */
+    p->pat_used = cp - p->pat;
+
+    /*
+     * Go through and construct the skip array and determine the actual length
+     * of the pattern in UCS2 terms.
+     */
+    slen = p->patlen - 1;
+    cp = p->pat;
+    for (i = k = 0; i < p->pat_used; i++, cp++) {
+        /*
+         * Locate the character in the skip array.
+         */
+        for (sp = p->skip, j = 0;
+             j < p->skip_used && sp->ch->uc != cp->uc; j++, sp++) ;
+
+        /*
+         * If the character is not found, set the new skip element and
+         * increase the number of skip elements.
+         */
+        if (j == p->skip_used) {
+            sp->ch = cp;
+            p->skip_used++;
+        }
+
+        /*
+         * Set the updated skip value.  If the character is UTF-16 and is
+         * not the last one in the pattern, add one to its skip value.
+         */
+        sp->skip = slen - k;
+        if (cp->uc >= 0x10000 && k + 2 < slen)
+          sp->skip++;
+
+        /*
+         * Set the new extra skip for the sentinel character.
+         */
+        if (((cp->uc >= 0x10000 && k + 2 <= slen) || k + 1 <= slen) &&
+            cp->uc == sentinel)
+          p->md4 = slen - k;
+
+        /*
+         * Increase the actual index.
+         */
+        k += (cp->uc >= 0x10000) ? 2 : 1;
+    }
+}
+
+int
+#ifdef __STDC__
+utbm_exec(utbm_pattern_t pat, ucs2_t *text, unsigned long textlen,
+          unsigned long *match_start, unsigned long *match_end)
+#else
+utbm_exec(pat, text, textlen, match_start, match_end)
+utbm_pattern_t pat;
+ucs2_t *text;
+unsigned long textlen, *match_start, *match_end;
+#endif
+{
+    unsigned long k;
+    ucs2_t *start, *end;
+
+    if (pat == 0 || pat->pat_used == 0 || text == 0 || textlen == 0 ||
+        textlen < pat->patlen)
+      return 0;
+
+    start = text + pat->patlen;
+    end = text + textlen;
+
+    /*
+     * Adjust the start point if it points to a low surrogate.
+     */
+    if (0xdc00 <= *start && *start <= 0xdfff &&
+        0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
+      start--;
+
+    while (start < end) {
+        while ((k = _utbm_skip(pat, start, end))) {
+            start += k;
+            if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
+                0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
+              start--;
+        }
+
+        if (start < end &&
+            _utbm_match(pat, text, start, end, match_start, match_end))
+          return 1;
+
+        start += pat->md4;
+        if (start < end && 0xdc00 <= *start && *start <= 0xdfff &&
+            0xd800 <= *(start - 1) && *(start - 1) <= 0xdbff)
+          start--;
+    }
+    return 0;
+}
--- a/libraries/liblunicode/utbm/utbm.h
+++ b/libraries/liblunicode/utbm/utbm.h
@ -0,0 +1,109 @@
+/*
+ * Copyright 1997, 1998, 1999 Computing Research Labs,
+ * New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _h_utbm
+#define _h_utbm
+
+/*
+ * $Id: utbm.h,v 1.1 1999/09/21 15:45:18 mleisher Exp $
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#undef __
+#ifdef __STDC__
+#define __(x) x
+#else
+#define __(x) ()
+#endif
+
+/*************************************************************************
+ *
+ * Types.
+ *
+ *************************************************************************/
+
+/*
+ * Fundamental character types.
+ */
+typedef unsigned long ucs4_t;
+typedef unsigned short ucs2_t;
+
+/*
+ * An opaque type used for the search pattern.
+ */
+typedef struct _utbm_pattern_t *utbm_pattern_t;
+
+/*************************************************************************
+ *
+ * Flags.
+ *
+ *************************************************************************/
+
+#define UTBM_CASEFOLD          0x01
+#define UTBM_IGNORE_NONSPACING 0x02
+#define UTBM_SPACE_COMPRESS    0x04
+
+/*************************************************************************
+ *
+ * API.
+ *
+ *************************************************************************/
+
+extern utbm_pattern_t utbm_create_pattern __((void));
+
+extern void utbm_free_pattern __((utbm_pattern_t pattern));
+
+extern void utbm_compile __((ucs2_t *pat, unsigned long patlen,
+                             unsigned long flags, utbm_pattern_t pattern));
+
+extern int utbm_exec __((utbm_pattern_t pat, ucs2_t *text,
+                         unsigned long textlen, unsigned long *match_start,
+                         unsigned long *match_end));
+
+/*************************************************************************
+ *
+ * Prototypes for the stub functions needed.
+ *
+ *************************************************************************/
+
+extern int _utbm_isspace __((ucs4_t c, int compress));
+
+extern int _utbm_iscntrl __((ucs4_t c));
+
+extern int _utbm_nonspacing __((ucs4_t c));
+
+extern ucs4_t _utbm_tolower __((ucs4_t c));
+
+extern ucs4_t _utbm_toupper __((ucs4_t c));
+
+extern ucs4_t _utbm_totitle __((ucs4_t c));
+
+#undef __
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _h_utbm */
--- a/libraries/liblunicode/utbm/utbmstub.c
+++ b/libraries/liblunicode/utbm/utbmstub.c
@ -0,0 +1,125 @@
+/*
+ * Copyright 1997, 1998, 1999 Computing Research Labs,
+ * New Mexico State University
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
+ * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef lint
+static char rcsid[] = "$Id: utbmstub.c,v 1.1 1999/09/21 15:45:18 mleisher Exp $";
+#endif
+
+#include "utbm.h"
+
+/*
+ * This should be redefined to use the `isspace' function available in the
+ * Unicode support on the platform where this is being used.
+ */
+#define _platform_isspace(x) 0
+
+/*
+ * Return non-zero for any character that should be considered the equivalent
+ * of a space character.  Return zero otherwise.
+ */
+int
+#ifdef __STDC__
+_utbm_isspace(ucs4_t c, int compress)
+#else
+_utbm_isspace(c, compress)
+ucs4_t c;
+int compress;
+#endif
+{
+    if (compress)
+      return (c == 0x09 || c == 0x0a || c == 0x0d ||
+              c == 0x2028 || c == 0x2029 || _platform_isspace(c)) ? 1 : 0;
+
+    return _platform_isspace(c);
+        
+}
+
+/*
+ * Return non-zero if the character is a control character, or zero otherwise.
+ */
+int
+#ifdef __STDC__
+_utbm_iscntrl(ucs4_t c)
+#else
+_utbm_iscntrl(c)
+ucs4_t c;
+#endif
+{
+    return 0;
+}
+
+/*
+ * Return non-zero if the character is a non-spacing character, or zero
+ * otherwise.
+ */
+int
+#ifdef __STDC__
+_utbm_nonspacing(ucs4_t c)
+#else
+_utbm_nonspacing(c)
+ucs4_t c;
+#endif
+{
+    return 0;
+}
+
+/*
+ * Convert a character to lower case.
+ */
+ucs4_t
+#ifdef __STDC__
+_utbm_tolower(ucs4_t c)
+#else
+_utbm_tolower(c)
+ucs4_t c;
+#endif
+{
+    return c;
+}
+
+/*
+ * Convert a character to upper case.
+ */
+ucs4_t
+#ifdef __STDC__
+_utbm_toupper(ucs4_t c)
+#else
+_utbm_toupper(c)
+ucs4_t c;
+#endif
+{
+    return c;
+}
+
+/*
+ * Convert a character to title case.
+ */
+ucs4_t
+#ifdef __STDC__
+_utbm_totitle(ucs4_t c)
+#else
+_utbm_totitle(c)
+ucs4_t c;
+#endif
+{
+    return c;
+}