Remove MULE_INTERNAL encoding.

This was useful before widespread Unicode adoption, and was based on the
internal encoding Emacs used to mix multiple sub-encodings.  Emacs
itself has stopped using it, and our implementation hadn't been updated
with modern underlying standards.  It is thought to be very unlikely
that anyone is still using it in the field.  Since such a complex
encoding comes with costs and risks, we agreed to drop support.

Any existing database using this encoding would need to be dumped and
restored with a new encoding to upgrade to PostgreSQL 19, most likely
UTF8, since pg_upgrade would fail.

Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Tatsuo Ishii <ishii@postgresql.org>
Reviewed-by: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/CA%2BhUKGKXDXh-FdU0orjfv%2BF08f%3DD91BhV3Ra-4zL-q%2BJmGYqTA%40mail.gmail.com
This commit is contained in:
Thomas Munro 2026-04-08 17:40:06 +12:00
parent 2c16deee2f
commit 77645d44e3
38 changed files with 55 additions and 3254 deletions

View file

@ -1763,7 +1763,7 @@ ORDER BY c COLLATE ebcdic;
encodings), including
single-byte character sets such as the ISO 8859 series and
multiple-byte character sets such as <acronym>EUC</acronym> (Extended Unix
Code), UTF-8, and Mule internal code. All supported character sets
Code) and UTF-8. All supported character sets
can be used transparently by clients, but a few are not supported
for use within the server (that is, as a server-side encoding).
The default character set is selected while
@ -2045,15 +2045,6 @@ ORDER BY c COLLATE ebcdic;
<entry>1</entry>
<entry><literal>ISO885916</literal></entry>
</row>
<row>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry>Mule internal code</entry>
<entry>Multilingual Emacs</entry>
<entry>Yes</entry>
<entry>No</entry>
<entry>1&ndash;4</entry>
<entry></entry>
</row>
<row>
<entry><literal>SJIS</literal></entry>
<entry>Shift JIS</entry>
@ -2205,7 +2196,7 @@ ORDER BY c COLLATE ebcdic;
<para>
Not all client <acronym>API</acronym>s support all the listed character sets. For example, the
<productname>PostgreSQL</productname>
JDBC driver does not support <literal>MULE_INTERNAL</literal>, <literal>LATIN6</literal>,
JDBC driver does not support <literal>LATIN6</literal>,
<literal>LATIN8</literal>, and <literal>LATIN10</literal>.
</para>
@ -2463,14 +2454,12 @@ RESET client_encoding;
<row>
<entry><literal>EUC_CN</literal></entry>
<entry><emphasis>EUC_CN</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>EUC_JP</literal></entry>
<entry><emphasis>EUC_JP</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>SJIS</literal>,
<literal>UTF8</literal>
</entry>
@ -2485,7 +2474,6 @@ RESET client_encoding;
<row>
<entry><literal>EUC_KR</literal></entry>
<entry><emphasis>EUC_KR</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
@ -2493,7 +2481,6 @@ RESET client_encoding;
<entry><literal>EUC_TW</literal></entry>
<entry><emphasis>EUC_TW</emphasis>,
<literal>BIG5</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
@ -2511,7 +2498,6 @@ RESET client_encoding;
<entry><literal>ISO_8859_5</literal></entry>
<entry><emphasis>ISO_8859_5</emphasis>,
<literal>KOI8R</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>,
<literal>WIN1251</literal>
@ -2544,7 +2530,6 @@ RESET client_encoding;
<entry><literal>KOI8R</literal></entry>
<entry><emphasis>KOI8R</emphasis>,
<literal>ISO_8859_5</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>,
<literal>WIN1251</literal>
@ -2559,14 +2544,12 @@ RESET client_encoding;
<row>
<entry><literal>LATIN1</literal></entry>
<entry><emphasis>LATIN1</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>LATIN2</literal></entry>
<entry><emphasis>LATIN2</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN1250</literal>
</entry>
@ -2574,14 +2557,12 @@ RESET client_encoding;
<row>
<entry><literal>LATIN3</literal></entry>
<entry><emphasis>LATIN3</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>LATIN4</literal></entry>
<entry><emphasis>LATIN4</emphasis>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
@ -2621,23 +2602,6 @@ RESET client_encoding;
<literal>UTF8</literal>
</entry>
</row>
<row>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><emphasis>MULE_INTERNAL</emphasis>,
<literal>BIG5</literal>,
<literal>EUC_CN</literal>,
<literal>EUC_JP</literal>,
<literal>EUC_KR</literal>,
<literal>EUC_TW</literal>,
<literal>ISO_8859_5</literal>,
<literal>KOI8R</literal>,
<literal>LATIN1</literal> to <literal>LATIN4</literal>,
<literal>SJIS</literal>,
<literal>WIN866</literal>,
<literal>WIN1250</literal>,
<literal>WIN1251</literal>
</entry>
</row>
<row>
<entry><literal>SJIS</literal></entry>
<entry><emphasis>not supported as a server encoding</emphasis>
@ -2668,7 +2632,6 @@ RESET client_encoding;
<entry><emphasis>WIN866</emphasis>,
<literal>ISO_8859_5</literal>,
<literal>KOI8R</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN1251</literal>
</entry>
@ -2683,7 +2646,6 @@ RESET client_encoding;
<entry><literal>WIN1250</literal></entry>
<entry><emphasis>WIN1250</emphasis>,
<literal>LATIN2</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>
</entry>
</row>
@ -2692,7 +2654,6 @@ RESET client_encoding;
<entry><emphasis>WIN1251</emphasis>,
<literal>ISO_8859_5</literal>,
<literal>KOI8R</literal>,
<literal>MULE_INTERNAL</literal>,
<literal>UTF8</literal>,
<literal>WIN866</literal>
</entry>
@ -2775,31 +2736,16 @@ RESET client_encoding;
<entry><literal>BIG5</literal></entry>
<entry><literal>EUC_TW</literal></entry>
</row>
<row>
<entry><literal>big5_to_mic</literal></entry>
<entry><literal>BIG5</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>big5_to_utf8</literal></entry>
<entry><literal>BIG5</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>euc_cn_to_mic</literal></entry>
<entry><literal>EUC_CN</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>euc_cn_to_utf8</literal></entry>
<entry><literal>EUC_CN</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>euc_jp_to_mic</literal></entry>
<entry><literal>EUC_JP</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>euc_jp_to_sjis</literal></entry>
<entry><literal>EUC_JP</literal></entry>
@ -2810,11 +2756,6 @@ RESET client_encoding;
<entry><literal>EUC_JP</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>euc_kr_to_mic</literal></entry>
<entry><literal>EUC_KR</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>euc_kr_to_utf8</literal></entry>
<entry><literal>EUC_KR</literal></entry>
@ -2825,11 +2766,6 @@ RESET client_encoding;
<entry><literal>EUC_TW</literal></entry>
<entry><literal>BIG5</literal></entry>
</row>
<row>
<entry><literal>euc_tw_to_mic</literal></entry>
<entry><literal>EUC_TW</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>euc_tw_to_utf8</literal></entry>
<entry><literal>EUC_TW</literal></entry>
@ -2870,21 +2806,11 @@ RESET client_encoding;
<entry><literal>LATIN10</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>iso_8859_1_to_mic</literal></entry>
<entry><literal>LATIN1</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>iso_8859_1_to_utf8</literal></entry>
<entry><literal>LATIN1</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>iso_8859_2_to_mic</literal></entry>
<entry><literal>LATIN2</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>iso_8859_2_to_utf8</literal></entry>
<entry><literal>LATIN2</literal></entry>
@ -2895,21 +2821,11 @@ RESET client_encoding;
<entry><literal>LATIN2</literal></entry>
<entry><literal>WIN1250</literal></entry>
</row>
<row>
<entry><literal>iso_8859_3_to_mic</literal></entry>
<entry><literal>LATIN3</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>iso_8859_3_to_utf8</literal></entry>
<entry><literal>LATIN3</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>iso_8859_4_to_mic</literal></entry>
<entry><literal>LATIN4</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>iso_8859_4_to_utf8</literal></entry>
<entry><literal>LATIN4</literal></entry>
@ -2920,11 +2836,6 @@ RESET client_encoding;
<entry><literal>ISO_8859_5</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
<row>
<entry><literal>iso_8859_5_to_mic</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>iso_8859_5_to_utf8</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
@ -2970,11 +2881,6 @@ RESET client_encoding;
<entry><literal>KOI8R</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
</row>
<row>
<entry><literal>koi8_r_to_mic</literal></entry>
<entry><literal>KOI8R</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>koi8_r_to_utf8</literal></entry>
<entry><literal>KOI8R</literal></entry>
@ -2995,91 +2901,11 @@ RESET client_encoding;
<entry><literal>KOI8U</literal></entry>
<entry><literal>UTF8</literal></entry>
</row>
<row>
<entry><literal>mic_to_big5</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>BIG5</literal></entry>
</row>
<row>
<entry><literal>mic_to_euc_cn</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>EUC_CN</literal></entry>
</row>
<row>
<entry><literal>mic_to_euc_jp</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>EUC_JP</literal></entry>
</row>
<row>
<entry><literal>mic_to_euc_kr</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>EUC_KR</literal></entry>
</row>
<row>
<entry><literal>mic_to_euc_tw</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>EUC_TW</literal></entry>
</row>
<row>
<entry><literal>mic_to_iso_8859_1</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>LATIN1</literal></entry>
</row>
<row>
<entry><literal>mic_to_iso_8859_2</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>LATIN2</literal></entry>
</row>
<row>
<entry><literal>mic_to_iso_8859_3</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>LATIN3</literal></entry>
</row>
<row>
<entry><literal>mic_to_iso_8859_4</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>LATIN4</literal></entry>
</row>
<row>
<entry><literal>mic_to_iso_8859_5</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>ISO_8859_5</literal></entry>
</row>
<row>
<entry><literal>mic_to_koi8_r</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
<row>
<entry><literal>mic_to_sjis</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>SJIS</literal></entry>
</row>
<row>
<entry><literal>mic_to_windows_1250</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>WIN1250</literal></entry>
</row>
<row>
<entry><literal>mic_to_windows_1251</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>WIN1251</literal></entry>
</row>
<row>
<entry><literal>mic_to_windows_866</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
<entry><literal>WIN866</literal></entry>
</row>
<row>
<entry><literal>sjis_to_euc_jp</literal></entry>
<entry><literal>SJIS</literal></entry>
<entry><literal>EUC_JP</literal></entry>
</row>
<row>
<entry><literal>sjis_to_mic</literal></entry>
<entry><literal>SJIS</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>sjis_to_utf8</literal></entry>
<entry><literal>SJIS</literal></entry>
@ -3285,11 +3111,6 @@ RESET client_encoding;
<entry><literal>WIN1250</literal></entry>
<entry><literal>LATIN2</literal></entry>
</row>
<row>
<entry><literal>windows_1250_to_mic</literal></entry>
<entry><literal>WIN1250</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>windows_1250_to_utf8</literal></entry>
<entry><literal>WIN1250</literal></entry>
@ -3305,11 +3126,6 @@ RESET client_encoding;
<entry><literal>WIN1251</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
<row>
<entry><literal>windows_1251_to_mic</literal></entry>
<entry><literal>WIN1251</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>windows_1251_to_utf8</literal></entry>
<entry><literal>WIN1251</literal></entry>
@ -3340,11 +3156,6 @@ RESET client_encoding;
<entry><literal>WIN866</literal></entry>
<entry><literal>KOI8R</literal></entry>
</row>
<row>
<entry><literal>windows_866_to_mic</literal></entry>
<entry><literal>WIN866</literal></entry>
<entry><literal>MULE_INTERNAL</literal></entry>
</row>
<row>
<entry><literal>windows_866_to_utf8</literal></entry>
<entry><literal>WIN866</literal></entry>

View file

@ -74,244 +74,6 @@ local2local(const unsigned char *l,
return l - start;
}
/*
* LATINn ---> MIC when the charset's local codes map directly to MIC
*
* l points to the source string of length len
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/
int
latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding, bool noError)
{
const unsigned char *start = l;
int c1;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(encoding, (const char *) l, len);
}
if (IS_HIGHBIT_SET(c1))
*p++ = lc;
*p++ = c1;
l++;
len--;
}
*p = '\0';
return l - start;
}
/*
* MIC ---> LATINn when the charset's local codes map directly to MIC
*
* mic points to the source string of length len
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/
int
mic2latin(const unsigned char *mic, unsigned char *p, int len,
int lc, int encoding, bool noError)
{
const unsigned char *start = mic;
int c1;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
}
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
*p++ = c1;
mic++;
len--;
}
else
{
int l = pg_mule_mblen(mic);
if (len < l)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
}
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]))
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
}
*p++ = mic[1];
mic += 2;
len -= 2;
}
}
*p = '\0';
return mic - start;
}
/*
* latin2mic_with_table: a generic single byte charset encoding
* conversion from a local charset to the mule internal code.
*
* l points to the source string of length len
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
* tab holds conversion entries for the local charset
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the mule encoding, or 0 if there is no equivalent code.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/
int
latin2mic_with_table(const unsigned char *l,
unsigned char *p,
int len,
int lc,
int encoding,
const unsigned char *tab,
bool noError)
{
const unsigned char *start = l;
unsigned char c1,
c2;
while (len > 0)
{
c1 = *l;
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(encoding, (const char *) l, len);
}
if (!IS_HIGHBIT_SET(c1))
*p++ = c1;
else
{
c2 = tab[c1 - HIGHBIT];
if (c2)
{
*p++ = lc;
*p++ = c2;
}
else
{
if (noError)
break;
report_untranslatable_char(encoding, PG_MULE_INTERNAL,
(const char *) l, len);
}
}
l++;
len--;
}
*p = '\0';
return l - start;
}
/*
* mic2latin_with_table: a generic single byte charset encoding
* conversion from the mule internal code to a local charset.
*
* mic points to the source string of length len
* p is the output area (must be large enough!)
* lc is the mule character set id for the local encoding
* encoding is the PG identifier for the local encoding
* tab holds conversion entries for the mule internal code's second byte,
* starting from 128 (0x80). each entry in the table holds the corresponding
* code point for the local charset, or 0 if there is no equivalent code.
*
* Returns the number of input bytes consumed. If noError is true, this can
* be less than 'len'.
*/
int
mic2latin_with_table(const unsigned char *mic,
unsigned char *p,
int len,
int lc,
int encoding,
const unsigned char *tab,
bool noError)
{
const unsigned char *start = mic;
unsigned char c1,
c2;
while (len > 0)
{
c1 = *mic;
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic, len);
}
if (!IS_HIGHBIT_SET(c1))
{
/* easy for ASCII */
*p++ = c1;
mic++;
len--;
}
else
{
int l = pg_mule_mblen(mic);
if (len < l)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL, (const char *) mic,
len);
}
if (l != 2 || c1 != lc || !IS_HIGHBIT_SET(mic[1]) ||
(c2 = tab[mic[1] - HIGHBIT]) == 0)
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, encoding,
(const char *) mic, len);
break; /* keep compiler quiet */
}
*p++ = c2;
mic += 2;
len -= 2;
}
}
*p = '\0';
return mic - start;
}
/*
* comparison routine for bsearch()
* this routine is intended for combined UTF8 -> local code

View file

@ -14,8 +14,8 @@ top_builddir = ../../../../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = \
cyrillic_and_mic euc_cn_and_mic euc_jp_and_sjis \
euc_kr_and_mic euc_tw_and_big5 latin2_and_win1250 latin_and_mic \
cyrillic euc_jp_and_sjis \
euc_tw_and_big5 latin2_and_win1250 \
utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \
utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \
utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \

View file

@ -1,13 +1,13 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile
# src/backend/utils/mb/conversion_procs/cyrillic/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/latin_and_mic
subdir = src/backend/utils/mb/conversion_procs/cyrillic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = latin_and_mic
PGFILEDESC = "latin <-> mic text conversions"
NAME = cyrillic
PGFILEDESC = "cyrillic single-byte conversions"
include $(srcdir)/../proc.mk

View file

@ -1,12 +1,12 @@
/*-------------------------------------------------------------------------
*
* Cyrillic and MULE_INTERNAL
* KOI8R, WIN1251, WIN866 and ISO_8859_5
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c
* src/backend/utils/mb/conversion_procs/cyrillic/cyrillic.c
*
*-------------------------------------------------------------------------
*/
@ -16,18 +16,10 @@
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC_EXT(
.name = "cyrillic_and_mic",
.name = "cyrillic",
.version = PG_VERSION
);
PG_FUNCTION_INFO_V1(koi8r_to_mic);
PG_FUNCTION_INFO_V1(mic_to_koi8r);
PG_FUNCTION_INFO_V1(iso_to_mic);
PG_FUNCTION_INFO_V1(mic_to_iso);
PG_FUNCTION_INFO_V1(win1251_to_mic);
PG_FUNCTION_INFO_V1(mic_to_win1251);
PG_FUNCTION_INFO_V1(win866_to_mic);
PG_FUNCTION_INFO_V1(mic_to_win866);
PG_FUNCTION_INFO_V1(koi8r_to_win1251);
PG_FUNCTION_INFO_V1(win1251_to_koi8r);
PG_FUNCTION_INFO_V1(koi8r_to_win866);
@ -59,7 +51,7 @@ PG_FUNCTION_INFO_V1(win866_to_iso);
* Cyrillic support
* currently supported Cyrillic encodings:
*
* KOI8-R (this is also the charset for the mule internal code for Cyrillic)
* KOI8-R
* ISO-8859-5
* Microsoft's CP1251 (windows-1251)
* Alternativny Variant (MS-DOS CP866)
@ -306,134 +298,6 @@ static const unsigned char win8662iso[] = {
};
Datum
koi8r_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL);
converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_koi8r(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R);
converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError);
PG_RETURN_INT32(converted);
}
Datum
iso_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL);
converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_iso(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5);
converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError);
PG_RETURN_INT32(converted);
}
Datum
win1251_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL);
converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_win1251(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251);
converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError);
PG_RETURN_INT32(converted);
}
Datum
win866_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL);
converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_win866(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866);
converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError);
PG_RETURN_INT32(converted);
}
Datum
koi8r_to_win1251(PG_FUNCTION_ARGS)
{

View file

@ -1,13 +0,0 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/cyrillic_and_mic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = cyrillic_and_mic
PGFILEDESC = "cyrillic <-> mic text conversions"
include $(srcdir)/../proc.mk

View file

@ -1,13 +0,0 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/euc_cn_and_mic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = euc_cn_and_mic
PGFILEDESC = "euc_cn <-> mic text conversions"
include $(srcdir)/../proc.mk

View file

@ -1,169 +0,0 @@
/*-------------------------------------------------------------------------
*
* EUC_CN and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC_EXT(
.name = "euc_cn_and_mic",
.version = PG_VERSION
);
PG_FUNCTION_INFO_V1(euc_cn_to_mic);
PG_FUNCTION_INFO_V1(mic_to_euc_cn);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER, -- source string length
* BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ----------
*/
static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_cn_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL);
converted = euc_cn2mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_euc_cn(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN);
converted = mic2euc_cn(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
/*
* EUC_CN ---> MIC
*/
static int
euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
const unsigned char *start = euc;
int c1;
while (len > 0)
{
c1 = *euc;
if (IS_HIGHBIT_SET(c1))
{
if (len < 2 || !IS_HIGHBIT_SET(euc[1]))
{
if (noError)
break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
}
*p++ = LC_GB2312_80;
*p++ = c1;
*p++ = euc[1];
euc += 2;
len -= 2;
}
else
{ /* should be ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_CN, (const char *) euc, len);
}
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
return euc - start;
}
/*
* MIC ---> EUC_CN
*/
static int
mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
int c1;
while (len > 0)
{
c1 = *mic;
if (IS_HIGHBIT_SET(c1))
{
if (c1 != LC_GB2312_80)
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN,
(const char *) mic, len);
}
if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2]))
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
mic++;
*p++ = *mic++;
*p++ = *mic++;
len -= 3;
}
else
{ /* should be ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
}
}
*p = '\0';
return mic - start;
}

View file

@ -1,6 +1,6 @@
/*-------------------------------------------------------------------------
*
* EUC_JP, SJIS and MULE_INTERNAL
* EUC_JP and SJIS
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@ -34,10 +34,6 @@ PG_MODULE_MAGIC_EXT(
PG_FUNCTION_INFO_V1(euc_jp_to_sjis);
PG_FUNCTION_INFO_V1(sjis_to_euc_jp);
PG_FUNCTION_INFO_V1(euc_jp_to_mic);
PG_FUNCTION_INFO_V1(mic_to_euc_jp);
PG_FUNCTION_INFO_V1(sjis_to_mic);
PG_FUNCTION_INFO_V1(mic_to_sjis);
/* ----------
* conv_proc(
@ -53,10 +49,6 @@ PG_FUNCTION_INFO_V1(mic_to_sjis);
* ----------
*/
static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError);
static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError);
static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError);
static int euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError);
@ -92,444 +84,6 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(converted);
}
Datum
euc_jp_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL);
converted = euc_jp2mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_euc_jp(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP);
converted = mic2euc_jp(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
sjis_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL);
converted = sjis2mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_sjis(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS);
converted = mic2sjis(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
/*
* SJIS ---> MIC
*/
static int
sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError)
{
const unsigned char *start = sjis;
int c1,
c2,
i,
k,
k2;
while (len > 0)
{
c1 = *sjis;
if (c1 >= 0xa1 && c1 <= 0xdf)
{
/* JIS X0201 (1 byte kana) */
*p++ = LC_JISX0201K;
*p++ = c1;
sjis++;
len--;
}
else if (IS_HIGHBIT_SET(c1))
{
/*
* JIS X0208, X0212, user defined extended characters
*/
if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1]))
{
if (noError)
break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
}
c2 = sjis[1];
k = (c1 << 8) + c2;
if (k >= 0xed40 && k < 0xf040)
{
/* NEC selection IBM kanji */
for (i = 0;; i++)
{
k2 = ibmkanji[i].nec;
if (k2 == 0xffff)
break;
if (k2 == k)
{
k = ibmkanji[i].sjis;
c1 = (k >> 8) & 0xff;
c2 = k & 0xff;
}
}
}
if (k < 0xeb3f)
{
/* JIS X0208 */
*p++ = LC_JISX0208;
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
{
/* NEC selection IBM kanji - Other undecided justice */
*p++ = LC_JISX0208;
*p++ = PGEUCALTCODE >> 8;
*p++ = PGEUCALTCODE & 0xff;
}
else if (k >= 0xf040 && k < 0xf540)
{
/*
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
* 0x7e7e EUC 0xf5a1 - 0xfefe
*/
*p++ = LC_JISX0208;
c1 -= 0x6f;
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
else if (k >= 0xf540 && k < 0xfa40)
{
/*
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
*/
*p++ = LC_JISX0212;
c1 -= 0x74;
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
}
else if (k >= 0xfa40)
{
/*
* mapping IBM kanji to X0208 and X0212
*/
for (i = 0;; i++)
{
k2 = ibmkanji[i].sjis;
if (k2 == 0xffff)
break;
if (k2 == k)
{
k = ibmkanji[i].euc;
if (k >= 0x8f0000)
{
*p++ = LC_JISX0212;
*p++ = 0x80 | ((k & 0xff00) >> 8);
*p++ = 0x80 | (k & 0xff);
}
else
{
*p++ = LC_JISX0208;
*p++ = 0x80 | (k >> 8);
*p++ = 0x80 | (k & 0xff);
}
}
}
}
sjis += 2;
len -= 2;
}
else
{ /* should be ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_SJIS, (const char *) sjis, len);
}
*p++ = c1;
sjis++;
len--;
}
}
*p = '\0';
return sjis - start;
}
/*
* MIC ---> SJIS
*/
static int
mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
int c1,
c2,
k,
l;
while (len > 0)
{
c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
if (c1 == LC_JISX0201K)
*p++ = mic[1];
else if (c1 == LC_JISX0208)
{
c1 = mic[1];
c2 = mic[2];
k = (c1 << 8) | (c2 & 0xff);
if (k >= 0xf5a1)
{
/* UDC1 */
c1 -= 0x54;
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
}
else
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
}
else if (c1 == LC_JISX0212)
{
int i,
k2;
c1 = mic[1];
c2 = mic[2];
k = c1 << 8 | c2;
if (k >= 0xf5a1)
{
/* UDC2 */
c1 -= 0x54;
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
}
else
{
/* IBM kanji */
for (i = 0;; i++)
{
k2 = ibmkanji[i].euc & 0xffff;
if (k2 == 0xffff)
{
*p++ = PGSJISALTCODE >> 8;
*p++ = PGSJISALTCODE & 0xff;
break;
}
if (k2 == k)
{
k = ibmkanji[i].sjis;
*p++ = k >> 8;
*p++ = k & 0xff;
break;
}
}
}
}
else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS,
(const char *) mic, len);
}
mic += l;
len -= l;
}
*p = '\0';
return mic - start;
}
/*
* EUC_JP ---> MIC
*/
static int
euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
const unsigned char *start = euc;
int c1;
int l;
while (len > 0)
{
c1 = *euc;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
}
*p++ = c1;
euc++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len);
}
if (c1 == SS2)
{ /* 1 byte kana? */
*p++ = LC_JISX0201K;
*p++ = euc[1];
}
else if (c1 == SS3)
{ /* JIS X0212 kanji? */
*p++ = LC_JISX0212;
*p++ = euc[1];
*p++ = euc[2];
}
else
{ /* kanji? */
*p++ = LC_JISX0208;
*p++ = c1;
*p++ = euc[1];
}
euc += l;
len -= l;
}
*p = '\0';
return euc - start;
}
/*
* MIC ---> EUC_JP
*/
static int
mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
int c1;
int l;
while (len > 0)
{
c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
if (c1 == LC_JISX0201K)
{
*p++ = SS2;
*p++ = mic[1];
}
else if (c1 == LC_JISX0212)
{
*p++ = SS3;
*p++ = mic[1];
*p++ = mic[2];
}
else if (c1 == LC_JISX0208)
{
*p++ = mic[1];
*p++ = mic[2];
}
else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP,
(const char *) mic, len);
}
mic += l;
len -= l;
}
*p = '\0';
return mic - start;
}
/*
* EUC_JP -> SJIS
*/

View file

@ -1,13 +0,0 @@
#-------------------------------------------------------------------------
#
# src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/utils/mb/conversion_procs/euc_kr_and_mic
top_builddir = ../../../../../..
include $(top_builddir)/src/Makefile.global
NAME = euc_kr_and_mic
PGFILEDESC = "euc_kr <-> mic text conversions"
include $(srcdir)/../proc.mk

View file

@ -1,177 +0,0 @@
/*-------------------------------------------------------------------------
*
* EUC_KR and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC_EXT(
.name = "euc_kr_and_mic",
.version = PG_VERSION
);
PG_FUNCTION_INFO_V1(euc_kr_to_mic);
PG_FUNCTION_INFO_V1(mic_to_euc_kr);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER, -- source string length
* BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ----------
*/
static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_kr_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL);
converted = euc_kr2mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_euc_kr(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR);
converted = mic2euc_kr(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
/*
* EUC_KR ---> MIC
*/
static int
euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
const unsigned char *start = euc;
int c1;
int l;
while (len > 0)
{
c1 = *euc;
if (IS_HIGHBIT_SET(c1))
{
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
if (l != 2)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
}
*p++ = LC_KS5601;
*p++ = c1;
*p++ = euc[1];
euc += 2;
len -= 2;
}
else
{ /* should be ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len);
}
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
return euc - start;
}
/*
* MIC ---> EUC_KR
*/
static int
mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
int c1;
int l;
while (len > 0)
{
c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
if (c1 == LC_KS5601)
{
*p++ = mic[1];
*p++ = mic[2];
}
else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR,
(const char *) mic, len);
}
mic += l;
len -= l;
}
*p = '\0';
return mic - start;
}

View file

@ -1,6 +1,5 @@
/*
* conversion between BIG5 and Mule Internal Code(CNS 116643-1992
* plane 1 and plane 2).
* BIG5 support functions (CNS 116643-1992 * plane 1 and plane 2).
* This program is partially copied from lv(Multilingual file viewer)
* and slightly modified. lv is written and copyrighted by NARITA Tomio
* (nrt@web.ad.jp).

View file

@ -1,6 +1,6 @@
/*-------------------------------------------------------------------------
*
* EUC_TW, BIG5 and MULE_INTERNAL
* EUC_TW and BIG5
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@ -22,10 +22,6 @@ PG_MODULE_MAGIC_EXT(
PG_FUNCTION_INFO_V1(euc_tw_to_big5);
PG_FUNCTION_INFO_V1(big5_to_euc_tw);
PG_FUNCTION_INFO_V1(euc_tw_to_mic);
PG_FUNCTION_INFO_V1(mic_to_euc_tw);
PG_FUNCTION_INFO_V1(big5_to_mic);
PG_FUNCTION_INFO_V1(mic_to_big5);
/* ----------
* conv_proc(
@ -43,10 +39,6 @@ PG_FUNCTION_INFO_V1(mic_to_big5);
static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError);
static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError);
static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError);
static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError);
static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError);
Datum
euc_tw_to_big5(PG_FUNCTION_ARGS)
@ -80,74 +72,6 @@ big5_to_euc_tw(PG_FUNCTION_ARGS)
PG_RETURN_INT32(converted);
}
Datum
euc_tw_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL);
converted = euc_tw2mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_euc_tw(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW);
converted = mic2euc_tw(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
big5_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL);
converted = big52mic(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_big5(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5);
converted = mic2big5(src, dest, len, noError);
PG_RETURN_INT32(converted);
}
/*
* EUC_TW ---> Big5
*/
static int
euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
@ -303,281 +227,3 @@ big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError)
return big5 - start;
}
/*
* EUC_TW ---> MIC
*/
static int
euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError)
{
const unsigned char *start = euc;
int c1;
int l;
while (len > 0)
{
c1 = *euc;
if (IS_HIGHBIT_SET(c1))
{
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
}
if (c1 == SS2)
{
c1 = euc[1]; /* plane No. */
if (c1 == 0xa1)
*p++ = LC_CNS11643_1;
else if (c1 == 0xa2)
*p++ = LC_CNS11643_2;
else
{
/* other planes are MULE private charsets */
*p++ = LCPRV2_B;
*p++ = c1 - 0xa3 + LC_CNS11643_3;
}
*p++ = euc[2];
*p++ = euc[3];
}
else
{ /* CNS11643-1 */
*p++ = LC_CNS11643_1;
*p++ = c1;
*p++ = euc[1];
}
euc += l;
len -= l;
}
else
{ /* should be ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len);
}
*p++ = c1;
euc++;
len--;
}
}
*p = '\0';
return euc - start;
}
/*
* MIC ---> EUC_TW
*/
static int
mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
int c1;
int l;
while (len > 0)
{
c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
if (c1 == LC_CNS11643_1)
{
*p++ = mic[1];
*p++ = mic[2];
}
else if (c1 == LC_CNS11643_2)
{
*p++ = SS2;
*p++ = 0xa2;
*p++ = mic[1];
*p++ = mic[2];
}
else if (c1 == LCPRV2_B &&
mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7)
{
*p++ = SS2;
*p++ = mic[1] - LC_CNS11643_3 + 0xa3;
*p++ = mic[2];
*p++ = mic[3];
}
else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW,
(const char *) mic, len);
}
mic += l;
len -= l;
}
*p = '\0';
return mic - start;
}
/*
* Big5 ---> MIC
*/
static int
big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError)
{
const unsigned char *start = big5;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
unsigned char lc;
int l;
while (len > 0)
{
c1 = *big5;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
}
*p++ = c1;
big5++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_BIG5,
(const char *) big5, len);
}
big5buf = (c1 << 8) | big5[1];
cnsBuf = BIG5toCNS(big5buf, &lc);
if (lc != 0)
{
/* Planes 3 and 4 are MULE private charsets */
if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4)
*p++ = LCPRV2_B;
*p++ = lc; /* Plane No. */
*p++ = (cnsBuf >> 8) & 0x00ff;
*p++ = cnsBuf & 0x00ff;
}
else
{
if (noError)
break;
report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL,
(const char *) big5, len);
}
big5 += l;
len -= l;
}
*p = '\0';
return big5 - start;
}
/*
* MIC ---> Big5
*/
static int
mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError)
{
const unsigned char *start = mic;
unsigned short c1;
unsigned short big5buf,
cnsBuf;
int l;
while (len > 0)
{
c1 = *mic;
if (!IS_HIGHBIT_SET(c1))
{
/* ASCII */
if (c1 == 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
*p++ = c1;
mic++;
len--;
continue;
}
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0)
{
if (noError)
break;
report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len);
}
if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B)
{
if (c1 == LCPRV2_B)
{
c1 = mic[1]; /* get plane no. */
cnsBuf = (mic[2] << 8) | mic[3];
}
else
{
cnsBuf = (mic[1] << 8) | mic[2];
}
big5buf = CNStoBIG5(cnsBuf, c1);
if (big5buf == 0)
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
}
*p++ = (big5buf >> 8) & 0x00ff;
*p++ = big5buf & 0x00ff;
}
else
{
if (noError)
break;
report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5,
(const char *) mic, len);
}
mic += l;
len -= l;
}
*p = '\0';
return mic - start;
}

View file

@ -20,10 +20,6 @@ PG_MODULE_MAGIC_EXT(
.version = PG_VERSION
);
PG_FUNCTION_INFO_V1(latin2_to_mic);
PG_FUNCTION_INFO_V1(mic_to_latin2);
PG_FUNCTION_INFO_V1(win1250_to_mic);
PG_FUNCTION_INFO_V1(mic_to_win1250);
PG_FUNCTION_INFO_V1(latin2_to_win1250);
PG_FUNCTION_INFO_V1(win1250_to_latin2);
@ -82,72 +78,6 @@ static const unsigned char iso88592_2_win1250[] = {
};
Datum
latin2_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL);
converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_latin2(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2);
converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError);
PG_RETURN_INT32(converted);
}
Datum
win1250_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL);
converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
win1250_2_iso88592, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_win1250(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250);
converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250,
iso88592_2_win1250, noError);
PG_RETURN_INT32(converted);
}
Datum
latin2_to_win1250(PG_FUNCTION_ARGS)
{

View file

@ -1,139 +0,0 @@
/*-------------------------------------------------------------------------
*
* LATINn and MULE_INTERNAL
*
* Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
PG_MODULE_MAGIC_EXT(
.name = "latin_and_mic",
.version = PG_VERSION
);
PG_FUNCTION_INFO_V1(latin1_to_mic);
PG_FUNCTION_INFO_V1(mic_to_latin1);
PG_FUNCTION_INFO_V1(latin3_to_mic);
PG_FUNCTION_INFO_V1(mic_to_latin3);
PG_FUNCTION_INFO_V1(latin4_to_mic);
PG_FUNCTION_INFO_V1(mic_to_latin4);
/* ----------
* conv_proc(
* INTEGER, -- source encoding id
* INTEGER, -- destination encoding id
* CSTRING, -- source string (null terminated C string)
* CSTRING, -- destination string (null terminated C string)
* INTEGER, -- source string length
* BOOL -- if true, don't throw an error if conversion fails
* ) returns INTEGER;
*
* Returns the number of bytes successfully converted.
* ----------
*/
Datum
latin1_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL);
converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_latin1(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1);
converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError);
PG_RETURN_INT32(converted);
}
Datum
latin3_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL);
converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_latin3(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3);
converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError);
PG_RETURN_INT32(converted);
}
Datum
latin4_to_mic(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL);
converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
PG_RETURN_INT32(converted);
}
Datum
mic_to_latin4(PG_FUNCTION_ARGS)
{
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
int len = PG_GETARG_INT32(4);
bool noError = PG_GETARG_BOOL(5);
int converted;
CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4);
converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError);
PG_RETURN_INT32(converted);
}

View file

@ -1,17 +1,14 @@
# Copyright (c) 2022-2026, PostgreSQL Global Development Group
encodings = {
'cyrillic_and_mic': ['cyrillic_and_mic/cyrillic_and_mic.c'],
'cyrillic': ['cyrillic/cyrillic.c'],
'euc2004_sjis2004': ['euc2004_sjis2004/euc2004_sjis2004.c'],
'euc_cn_and_mic': ['euc_cn_and_mic/euc_cn_and_mic.c'],
'euc_jp_and_sjis': ['euc_jp_and_sjis/euc_jp_and_sjis.c'],
'euc_kr_and_mic': ['euc_kr_and_mic/euc_kr_and_mic.c'],
'euc_tw_and_big5': [
'euc_tw_and_big5/euc_tw_and_big5.c',
'euc_tw_and_big5/big5.c',
],
'latin2_and_win1250': ['latin2_and_win1250/latin2_and_win1250.c'],
'latin_and_mic': ['latin_and_mic/latin_and_mic.c'],
'utf8_and_big5': ['utf8_and_big5/utf8_and_big5.c'],
'utf8_and_cyrillic': ['utf8_and_cyrillic/utf8_and_cyrillic.c'],
'utf8_and_euc2004': ['utf8_and_euc2004/utf8_and_euc2004.c'],

View file

@ -1308,8 +1308,7 @@ SetMessageEncoding(int encoding)
#ifdef ENABLE_NLS
/*
* Make one bind_textdomain_codeset() call, translating a pg_enc to a gettext
* codeset. Fails for MULE_INTERNAL, an encoding unknown to gettext; can also
* fail for gettext-internal causes like out-of-memory.
* codeset. Can fail for gettext-internal causes like out-of-memory.
*/
static bool
raw_pg_bind_textdomain_codeset(const char *domainname, int encoding)
@ -1429,8 +1428,7 @@ PG_encoding_to_char(PG_FUNCTION_ARGS)
/*
* gettext() returns messages in this encoding. This often matches the
* database encoding, but it differs for SQL_ASCII databases, for processes
* not attached to a database, and under a database encoding lacking iconv
* support (MULE_INTERNAL).
* not attached to a database.
*/
int
GetMessageEncoding(void)

View file

@ -164,9 +164,6 @@ static const pg_encname pg_encname_tbl[] =
{
"mskanji", PG_SJIS
}, /* alias for Shift_JIS */
{
"muleinternal", PG_MULE_INTERNAL
},
{
"shiftjis", PG_SJIS
}, /* Shift_JIS; JIS X 0202-1991 */
@ -314,7 +311,6 @@ const pg_enc2name pg_enc2name_tbl[] =
[PG_EUC_TW] = DEF_ENC2NAME(EUC_TW, 0),
[PG_EUC_JIS_2004] = DEF_ENC2NAME(EUC_JIS_2004, 20932),
[PG_UTF8] = DEF_ENC2NAME(UTF8, 65001),
[PG_MULE_INTERNAL] = DEF_ENC2NAME(MULE_INTERNAL, 0),
[PG_LATIN1] = DEF_ENC2NAME(LATIN1, 28591),
[PG_LATIN2] = DEF_ENC2NAME(LATIN2, 28592),
[PG_LATIN3] = DEF_ENC2NAME(LATIN3, 28593),
@ -353,15 +349,12 @@ const pg_enc2name pg_enc2name_tbl[] =
/* ----------
* These are encoding names for gettext.
*
* This covers all encodings except MULE_INTERNAL, which is alien to gettext.
* ----------
*/
const char *pg_enc2gettext_tbl[] =
{
[PG_SQL_ASCII] = "US-ASCII",
[PG_UTF8] = "UTF-8",
[PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "LATIN1",
[PG_LATIN2] = "LATIN2",
[PG_LATIN3] = "LATIN3",
@ -420,7 +413,6 @@ static const char *const pg_enc2icu_tbl[] =
[PG_EUC_TW] = "EUC-TW",
[PG_EUC_JIS_2004] = NULL,
[PG_UTF8] = "UTF-8",
[PG_MULE_INTERNAL] = NULL,
[PG_LATIN1] = "ISO-8859-1",
[PG_LATIN2] = "ISO-8859-2",
[PG_LATIN3] = "ISO-8859-3",

View file

@ -682,178 +682,6 @@ pg_utf_dsplen(const unsigned char *s)
return ucs_wcwidth(utf8_to_unicode(s));
}
/*
* convert mule internal code to pg_wchar
* caller should allocate enough space for "to"
* len: length of from.
* "from" not necessarily null terminated.
*/
static int
pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
{
int cnt = 0;
while (len > 0 && *from)
{
if (IS_LC1(*from))
{
MB2CHAR_NEED_AT_LEAST(len, 2);
*to = *from++ << 16;
*to |= *from++;
len -= 2;
}
else if (IS_LCPRV1(*from))
{
MB2CHAR_NEED_AT_LEAST(len, 3);
from++;
*to = *from++ << 16;
*to |= *from++;
len -= 3;
}
else if (IS_LC2(*from))
{
MB2CHAR_NEED_AT_LEAST(len, 3);
*to = *from++ << 16;
*to |= *from++ << 8;
*to |= *from++;
len -= 3;
}
else if (IS_LCPRV2(*from))
{
MB2CHAR_NEED_AT_LEAST(len, 4);
from++;
*to = *from++ << 16;
*to |= *from++ << 8;
*to |= *from++;
len -= 4;
}
else
{ /* assume ASCII */
*to = (unsigned char) *from++;
len--;
}
to++;
cnt++;
}
*to = 0;
return cnt;
}
/*
* convert pg_wchar to mule internal code
* caller should allocate enough space for "to"
* len: length of from.
* "from" not necessarily null terminated.
*/
static int
pg_wchar2mule_with_len(const pg_wchar *from, unsigned char *to, int len)
{
int cnt = 0;
while (len > 0 && *from)
{
unsigned char lb;
lb = (*from >> 16) & 0xff;
if (IS_LC1(lb))
{
*to++ = lb;
*to++ = *from & 0xff;
cnt += 2;
}
else if (IS_LC2(lb))
{
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV1_A_RANGE(lb))
{
*to++ = LCPRV1_A;
*to++ = lb;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV1_B_RANGE(lb))
{
*to++ = LCPRV1_B;
*to++ = lb;
*to++ = *from & 0xff;
cnt += 3;
}
else if (IS_LCPRV2_A_RANGE(lb))
{
*to++ = LCPRV2_A;
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 4;
}
else if (IS_LCPRV2_B_RANGE(lb))
{
*to++ = LCPRV2_B;
*to++ = lb;
*to++ = (*from >> 8) & 0xff;
*to++ = *from & 0xff;
cnt += 4;
}
else
{
*to++ = *from & 0xff;
cnt += 1;
}
from++;
len--;
}
*to = 0;
return cnt;
}
/* exported for direct use by conv.c */
int
pg_mule_mblen(const unsigned char *s)
{
int len;
if (IS_LC1(*s))
len = 2;
else if (IS_LCPRV1(*s))
len = 3;
else if (IS_LC2(*s))
len = 3;
else if (IS_LCPRV2(*s))
len = 4;
else
len = 1; /* assume ASCII */
return len;
}
static int
pg_mule_dsplen(const unsigned char *s)
{
int len;
/*
* Note: it's not really appropriate to assume that all multibyte charsets
* are double-wide on screen. But this seems an okay approximation for
* the MULE charsets we currently support.
*/
if (IS_LC1(*s))
len = 1;
else if (IS_LCPRV1(*s))
len = 1;
else if (IS_LC2(*s))
len = 2;
else if (IS_LCPRV2(*s))
len = 2;
else
len = 1; /* assume ASCII */
return len;
}
/*
* ISO8859-1
*/
@ -1378,56 +1206,6 @@ pg_johab_verifystr(const unsigned char *s, int len)
return s - start;
}
static int
pg_mule_verifychar(const unsigned char *s, int len)
{
int l,
mbl;
unsigned char c;
l = mbl = pg_mule_mblen(s);
if (len < l)
return -1;
while (--l > 0)
{
c = *++s;
if (!IS_HIGHBIT_SET(c))
return -1;
}
return mbl;
}
static int
pg_mule_verifystr(const unsigned char *s, int len)
{
const unsigned char *start = s;
while (len > 0)
{
int l;
/* fast path for ASCII-subset characters */
if (!IS_HIGHBIT_SET(*s))
{
if (*s == '\0')
break;
l = 1;
}
else
{
l = pg_mule_verifychar(s, len);
if (l == -1)
break;
}
s += l;
len -= l;
}
return s - start;
}
static int
pg_latin1_verifychar(const unsigned char *s, int len)
{
@ -2091,7 +1869,6 @@ const pg_wchar_tbl pg_wchar_table[] = {
[PG_EUC_TW] = {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4},
[PG_EUC_JIS_2004] = {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3},
[PG_UTF8] = {pg_utf2wchar_with_len, pg_wchar2utf_with_len, pg_utf_mblen, pg_utf_dsplen, pg_utf8_verifychar, pg_utf8_verifystr, 4},
[PG_MULE_INTERNAL] = {pg_mule2wchar_with_len, pg_wchar2mule_with_len, pg_mule_mblen, pg_mule_dsplen, pg_mule_verifychar, pg_mule_verifystr, 4},
[PG_LATIN1] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},
[PG_LATIN2] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},
[PG_LATIN3] = {pg_latin12wchar_with_len, pg_wchar2single_with_len, pg_latin1_mblen, pg_latin1_dsplen, pg_latin1_verifychar, pg_latin1_verifystr, 1},

View file

@ -15,30 +15,6 @@
[
{ oid => '4402', descr => 'conversion for KOI8R to MULE_INTERNAL',
conname => 'koi8_r_to_mic', conforencoding => 'PG_KOI8R',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'koi8r_to_mic' },
{ oid => '4403', descr => 'conversion for MULE_INTERNAL to KOI8R',
conname => 'mic_to_koi8_r', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_KOI8R', conproc => 'mic_to_koi8r' },
{ oid => '4404', descr => 'conversion for ISO-8859-5 to MULE_INTERNAL',
conname => 'iso_8859_5_to_mic', conforencoding => 'PG_ISO_8859_5',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'iso_to_mic' },
{ oid => '4405', descr => 'conversion for MULE_INTERNAL to ISO-8859-5',
conname => 'mic_to_iso_8859_5', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_ISO_8859_5', conproc => 'mic_to_iso' },
{ oid => '4406', descr => 'conversion for WIN1251 to MULE_INTERNAL',
conname => 'windows_1251_to_mic', conforencoding => 'PG_WIN1251',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'win1251_to_mic' },
{ oid => '4407', descr => 'conversion for MULE_INTERNAL to WIN1251',
conname => 'mic_to_windows_1251', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_WIN1251', conproc => 'mic_to_win1251' },
{ oid => '4408', descr => 'conversion for WIN866 to MULE_INTERNAL',
conname => 'windows_866_to_mic', conforencoding => 'PG_WIN866',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'win866_to_mic' },
{ oid => '4409', descr => 'conversion for MULE_INTERNAL to WIN866',
conname => 'mic_to_windows_866', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_WIN866', conproc => 'mic_to_win866' },
{ oid => '4410', descr => 'conversion for KOI8R to WIN1251',
conname => 'koi8_r_to_windows_1251', conforencoding => 'PG_KOI8R',
contoencoding => 'PG_WIN1251', conproc => 'koi8r_to_win1251' },
@ -75,90 +51,24 @@
{ oid => '4421', descr => 'conversion for WIN866 to ISO-8859-5',
conname => 'windows_866_to_iso_8859_5', conforencoding => 'PG_WIN866',
contoencoding => 'PG_ISO_8859_5', conproc => 'win866_to_iso' },
{ oid => '4422', descr => 'conversion for EUC_CN to MULE_INTERNAL',
conname => 'euc_cn_to_mic', conforencoding => 'PG_EUC_CN',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_cn_to_mic' },
{ oid => '4423', descr => 'conversion for MULE_INTERNAL to EUC_CN',
conname => 'mic_to_euc_cn', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_EUC_CN', conproc => 'mic_to_euc_cn' },
{ oid => '4424', descr => 'conversion for EUC_JP to SJIS',
conname => 'euc_jp_to_sjis', conforencoding => 'PG_EUC_JP',
contoencoding => 'PG_SJIS', conproc => 'euc_jp_to_sjis' },
{ oid => '4425', descr => 'conversion for SJIS to EUC_JP',
conname => 'sjis_to_euc_jp', conforencoding => 'PG_SJIS',
contoencoding => 'PG_EUC_JP', conproc => 'sjis_to_euc_jp' },
{ oid => '4426', descr => 'conversion for EUC_JP to MULE_INTERNAL',
conname => 'euc_jp_to_mic', conforencoding => 'PG_EUC_JP',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_jp_to_mic' },
{ oid => '4427', descr => 'conversion for SJIS to MULE_INTERNAL',
conname => 'sjis_to_mic', conforencoding => 'PG_SJIS',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'sjis_to_mic' },
{ oid => '4428', descr => 'conversion for MULE_INTERNAL to EUC_JP',
conname => 'mic_to_euc_jp', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_EUC_JP', conproc => 'mic_to_euc_jp' },
{ oid => '4429', descr => 'conversion for MULE_INTERNAL to SJIS',
conname => 'mic_to_sjis', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_SJIS', conproc => 'mic_to_sjis' },
{ oid => '4430', descr => 'conversion for EUC_KR to MULE_INTERNAL',
conname => 'euc_kr_to_mic', conforencoding => 'PG_EUC_KR',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_kr_to_mic' },
{ oid => '4431', descr => 'conversion for MULE_INTERNAL to EUC_KR',
conname => 'mic_to_euc_kr', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_EUC_KR', conproc => 'mic_to_euc_kr' },
{ oid => '4432', descr => 'conversion for EUC_TW to BIG5',
conname => 'euc_tw_to_big5', conforencoding => 'PG_EUC_TW',
contoencoding => 'PG_BIG5', conproc => 'euc_tw_to_big5' },
{ oid => '4433', descr => 'conversion for BIG5 to EUC_TW',
conname => 'big5_to_euc_tw', conforencoding => 'PG_BIG5',
contoencoding => 'PG_EUC_TW', conproc => 'big5_to_euc_tw' },
{ oid => '4434', descr => 'conversion for EUC_TW to MULE_INTERNAL',
conname => 'euc_tw_to_mic', conforencoding => 'PG_EUC_TW',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'euc_tw_to_mic' },
{ oid => '4435', descr => 'conversion for BIG5 to MULE_INTERNAL',
conname => 'big5_to_mic', conforencoding => 'PG_BIG5',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'big5_to_mic' },
{ oid => '4436', descr => 'conversion for MULE_INTERNAL to EUC_TW',
conname => 'mic_to_euc_tw', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_EUC_TW', conproc => 'mic_to_euc_tw' },
{ oid => '4437', descr => 'conversion for MULE_INTERNAL to BIG5',
conname => 'mic_to_big5', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_BIG5', conproc => 'mic_to_big5' },
{ oid => '4438', descr => 'conversion for LATIN2 to MULE_INTERNAL',
conname => 'iso_8859_2_to_mic', conforencoding => 'PG_LATIN2',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin2_to_mic' },
{ oid => '4439', descr => 'conversion for MULE_INTERNAL to LATIN2',
conname => 'mic_to_iso_8859_2', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_LATIN2', conproc => 'mic_to_latin2' },
{ oid => '4440', descr => 'conversion for WIN1250 to MULE_INTERNAL',
conname => 'windows_1250_to_mic', conforencoding => 'PG_WIN1250',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'win1250_to_mic' },
{ oid => '4441', descr => 'conversion for MULE_INTERNAL to WIN1250',
conname => 'mic_to_windows_1250', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_WIN1250', conproc => 'mic_to_win1250' },
{ oid => '4442', descr => 'conversion for LATIN2 to WIN1250',
conname => 'iso_8859_2_to_windows_1250', conforencoding => 'PG_LATIN2',
contoencoding => 'PG_WIN1250', conproc => 'latin2_to_win1250' },
{ oid => '4443', descr => 'conversion for WIN1250 to LATIN2',
conname => 'windows_1250_to_iso_8859_2', conforencoding => 'PG_WIN1250',
contoencoding => 'PG_LATIN2', conproc => 'win1250_to_latin2' },
{ oid => '4444', descr => 'conversion for LATIN1 to MULE_INTERNAL',
conname => 'iso_8859_1_to_mic', conforencoding => 'PG_LATIN1',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin1_to_mic' },
{ oid => '4445', descr => 'conversion for MULE_INTERNAL to LATIN1',
conname => 'mic_to_iso_8859_1', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_LATIN1', conproc => 'mic_to_latin1' },
{ oid => '4446', descr => 'conversion for LATIN3 to MULE_INTERNAL',
conname => 'iso_8859_3_to_mic', conforencoding => 'PG_LATIN3',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin3_to_mic' },
{ oid => '4447', descr => 'conversion for MULE_INTERNAL to LATIN3',
conname => 'mic_to_iso_8859_3', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_LATIN3', conproc => 'mic_to_latin3' },
{ oid => '4448', descr => 'conversion for LATIN4 to MULE_INTERNAL',
conname => 'iso_8859_4_to_mic', conforencoding => 'PG_LATIN4',
contoencoding => 'PG_MULE_INTERNAL', conproc => 'latin4_to_mic' },
{ oid => '4449', descr => 'conversion for MULE_INTERNAL to LATIN4',
conname => 'mic_to_iso_8859_4', conforencoding => 'PG_MULE_INTERNAL',
contoencoding => 'PG_LATIN4', conproc => 'mic_to_latin4' },
{ oid => '4452', descr => 'conversion for BIG5 to UTF8',
conname => 'big5_to_utf8', conforencoding => 'PG_BIG5',
contoencoding => 'PG_UTF8', conproc => 'big5_to_utf8' },

View file

@ -12026,112 +12026,62 @@
proargtypes => '', prosrc => 'binary_upgrade_create_conflict_detection_slot' },
# conversion functions
{ oid => '4302',
descr => 'internal conversion function for KOI8R to MULE_INTERNAL',
proname => 'koi8r_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_mic', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4303',
descr => 'internal conversion function for MULE_INTERNAL to KOI8R',
proname => 'mic_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4304',
descr => 'internal conversion function for ISO-8859-5 to MULE_INTERNAL',
proname => 'iso_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'iso_to_mic',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4305',
descr => 'internal conversion function for MULE_INTERNAL to ISO-8859-5',
proname => 'mic_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool', prosrc => 'mic_to_iso',
probin => '$libdir/cyrillic_and_mic' },
{ oid => '4306',
descr => 'internal conversion function for WIN1251 to MULE_INTERNAL',
proname => 'win1251_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_mic', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4307',
descr => 'internal conversion function for MULE_INTERNAL to WIN1251',
proname => 'mic_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_win1251', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4308',
descr => 'internal conversion function for WIN866 to MULE_INTERNAL',
proname => 'win866_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_mic', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4309',
descr => 'internal conversion function for MULE_INTERNAL to WIN866',
proname => 'mic_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_win866', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4310', descr => 'internal conversion function for KOI8R to WIN1251',
proname => 'koi8r_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'koi8r_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4311', descr => 'internal conversion function for WIN1251 to KOI8R',
proname => 'win1251_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'win1251_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4312', descr => 'internal conversion function for KOI8R to WIN866',
proname => 'koi8r_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_win866', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'koi8r_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4313', descr => 'internal conversion function for WIN866 to KOI8R',
proname => 'win866_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'win866_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4314',
descr => 'internal conversion function for WIN866 to WIN1251',
proname => 'win866_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'win866_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4315',
descr => 'internal conversion function for WIN1251 to WIN866',
proname => 'win1251_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'win1251_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4316',
descr => 'internal conversion function for ISO-8859-5 to KOI8R',
proname => 'iso_to_koi8r', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'iso_to_koi8r', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'iso_to_koi8r', probin => '$libdir/cyrillic' },
{ oid => '4317',
descr => 'internal conversion function for KOI8R to ISO-8859-5',
proname => 'koi8r_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'koi8r_to_iso', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'koi8r_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4318',
descr => 'internal conversion function for ISO-8859-5 to WIN1251',
proname => 'iso_to_win1251', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'iso_to_win1251', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'iso_to_win1251', probin => '$libdir/cyrillic' },
{ oid => '4319',
descr => 'internal conversion function for WIN1251 to ISO-8859-5',
proname => 'win1251_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1251_to_iso', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'win1251_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4320',
descr => 'internal conversion function for ISO-8859-5 to WIN866',
proname => 'iso_to_win866', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'iso_to_win866', probin => '$libdir/cyrillic_and_mic' },
prosrc => 'iso_to_win866', probin => '$libdir/cyrillic' },
{ oid => '4321',
descr => 'internal conversion function for WIN866 to ISO-8859-5',
proname => 'win866_to_iso', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win866_to_iso', probin => '$libdir/cyrillic_and_mic' },
{ oid => '4322',
descr => 'internal conversion function for EUC_CN to MULE_INTERNAL',
proname => 'euc_cn_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_cn_to_mic', probin => '$libdir/euc_cn_and_mic' },
{ oid => '4323',
descr => 'internal conversion function for MULE_INTERNAL to EUC_CN',
proname => 'mic_to_euc_cn', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_euc_cn', probin => '$libdir/euc_cn_and_mic' },
prosrc => 'win866_to_iso', probin => '$libdir/cyrillic' },
{ oid => '4324', descr => 'internal conversion function for EUC_JP to SJIS',
proname => 'euc_jp_to_sjis', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
@ -12140,36 +12090,6 @@
proname => 'sjis_to_euc_jp', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'sjis_to_euc_jp', probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4326',
descr => 'internal conversion function for EUC_JP to MULE_INTERNAL',
proname => 'euc_jp_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_jp_to_mic', probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4327',
descr => 'internal conversion function for SJIS to MULE_INTERNAL',
proname => 'sjis_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'sjis_to_mic', probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4328',
descr => 'internal conversion function for MULE_INTERNAL to EUC_JP',
proname => 'mic_to_euc_jp', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_euc_jp', probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4329',
descr => 'internal conversion function for MULE_INTERNAL to SJIS',
proname => 'mic_to_sjis', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_sjis', probin => '$libdir/euc_jp_and_sjis' },
{ oid => '4330',
descr => 'internal conversion function for EUC_KR to MULE_INTERNAL',
proname => 'euc_kr_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_kr_to_mic', probin => '$libdir/euc_kr_and_mic' },
{ oid => '4331',
descr => 'internal conversion function for MULE_INTERNAL to EUC_KR',
proname => 'mic_to_euc_kr', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_euc_kr', probin => '$libdir/euc_kr_and_mic' },
{ oid => '4332', descr => 'internal conversion function for EUC_TW to BIG5',
proname => 'euc_tw_to_big5', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
@ -12178,46 +12098,6 @@
proname => 'big5_to_euc_tw', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'big5_to_euc_tw', probin => '$libdir/euc_tw_and_big5' },
{ oid => '4334',
descr => 'internal conversion function for EUC_TW to MULE_INTERNAL',
proname => 'euc_tw_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'euc_tw_to_mic', probin => '$libdir/euc_tw_and_big5' },
{ oid => '4335',
descr => 'internal conversion function for BIG5 to MULE_INTERNAL',
proname => 'big5_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'big5_to_mic', probin => '$libdir/euc_tw_and_big5' },
{ oid => '4336',
descr => 'internal conversion function for MULE_INTERNAL to EUC_TW',
proname => 'mic_to_euc_tw', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_euc_tw', probin => '$libdir/euc_tw_and_big5' },
{ oid => '4337',
descr => 'internal conversion function for MULE_INTERNAL to BIG5',
proname => 'mic_to_big5', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_big5', probin => '$libdir/euc_tw_and_big5' },
{ oid => '4338',
descr => 'internal conversion function for LATIN2 to MULE_INTERNAL',
proname => 'latin2_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin2_to_mic', probin => '$libdir/latin2_and_win1250' },
{ oid => '4339',
descr => 'internal conversion function for MULE_INTERNAL to LATIN2',
proname => 'mic_to_latin2', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_latin2', probin => '$libdir/latin2_and_win1250' },
{ oid => '4340',
descr => 'internal conversion function for WIN1250 to MULE_INTERNAL',
proname => 'win1250_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1250_to_mic', probin => '$libdir/latin2_and_win1250' },
{ oid => '4341',
descr => 'internal conversion function for MULE_INTERNAL to WIN1250',
proname => 'mic_to_win1250', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_win1250', probin => '$libdir/latin2_and_win1250' },
{ oid => '4342',
descr => 'internal conversion function for LATIN2 to WIN1250',
proname => 'latin2_to_win1250', prolang => 'c', prorettype => 'int4',
@ -12228,36 +12108,6 @@
proname => 'win1250_to_latin2', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'win1250_to_latin2', probin => '$libdir/latin2_and_win1250' },
{ oid => '4344',
descr => 'internal conversion function for LATIN1 to MULE_INTERNAL',
proname => 'latin1_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin1_to_mic', probin => '$libdir/latin_and_mic' },
{ oid => '4345',
descr => 'internal conversion function for MULE_INTERNAL to LATIN1',
proname => 'mic_to_latin1', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_latin1', probin => '$libdir/latin_and_mic' },
{ oid => '4346',
descr => 'internal conversion function for LATIN3 to MULE_INTERNAL',
proname => 'latin3_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin3_to_mic', probin => '$libdir/latin_and_mic' },
{ oid => '4347',
descr => 'internal conversion function for MULE_INTERNAL to LATIN3',
proname => 'mic_to_latin3', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_latin3', probin => '$libdir/latin_and_mic' },
{ oid => '4348',
descr => 'internal conversion function for LATIN4 to MULE_INTERNAL',
proname => 'latin4_to_mic', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'latin4_to_mic', probin => '$libdir/latin_and_mic' },
{ oid => '4349',
descr => 'internal conversion function for MULE_INTERNAL to LATIN4',
proname => 'mic_to_latin4', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',
prosrc => 'mic_to_latin4', probin => '$libdir/latin_and_mic' },
{ oid => '4352', descr => 'internal conversion function for BIG5 to UTF8',
proname => 'big5_to_utf8', prolang => 'c', prorettype => 'int4',
proargtypes => 'int4 int4 cstring internal int4 bool',

View file

@ -39,187 +39,21 @@ typedef unsigned int pg_wchar;
#define SS3 0x8f /* single shift 3 (JIS0212) */
/*
* SJIS validation macros
* EUC_TW planes
*/
#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
/*----------------------------------------------------
* MULE Internal Encoding (MIC)
*
* This encoding follows the design used within XEmacs; it is meant to
* subsume many externally-defined character sets. Each character includes
* identification of the character set it belongs to, so the encoding is
* general but somewhat bulky.
*
* Currently PostgreSQL supports 5 types of MULE character sets:
*
* 1) 1-byte ASCII characters. Each byte is below 0x80.
*
* 2) "Official" single byte charsets such as ISO-8859-1 (Latin1).
* Each MULE character consists of 2 bytes: LC1 + C1, where LC1 is
* an identifier for the charset (in the range 0x81 to 0x8d) and C1
* is the character code (in the range 0xa0 to 0xff).
*
* 3) "Private" single byte charsets such as SISHENG. Each MULE
* character consists of 3 bytes: LCPRV1 + LC12 + C1, where LCPRV1
* is a private-charset flag, LC12 is an identifier for the charset,
* and C1 is the character code (in the range 0xa0 to 0xff).
* LCPRV1 is either 0x9a (if LC12 is in the range 0xa0 to 0xdf)
* or 0x9b (if LC12 is in the range 0xe0 to 0xef).
*
* 4) "Official" multibyte charsets such as JIS X0208. Each MULE
* character consists of 3 bytes: LC2 + C1 + C2, where LC2 is
* an identifier for the charset (in the range 0x90 to 0x99) and C1
* and C2 form the character code (each in the range 0xa0 to 0xff).
*
* 5) "Private" multibyte charsets such as CNS 11643-1992 Plane 3.
* Each MULE character consists of 4 bytes: LCPRV2 + LC22 + C1 + C2,
* where LCPRV2 is a private-charset flag, LC22 is an identifier for
* the charset, and C1 and C2 form the character code (each in the range
* 0xa0 to 0xff). LCPRV2 is either 0x9c (if LC22 is in the range 0xf0
* to 0xf4) or 0x9d (if LC22 is in the range 0xf5 to 0xfe).
*
* "Official" encodings are those that have been assigned code numbers by
* the XEmacs project; "private" encodings have Postgres-specific charset
* identifiers.
*
* See the "XEmacs Internals Manual", available at http://www.xemacs.org,
* for more details. Note that for historical reasons, Postgres'
* private-charset flag values do not match what XEmacs says they should be,
* so this isn't really exactly MULE (not that private charsets would be
* interoperable anyway).
*
* Note that XEmacs's implementation is different from what emacs does.
* We follow emacs's implementation, rather than XEmacs's.
*----------------------------------------------------
*/
/*
* Charset identifiers (also called "leading bytes" in the MULE documentation)
*/
/*
* Charset IDs for official single byte encodings (0x81-0x8e)
*/
#define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */
#define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */
#define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */
#define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */
#define LC_TIS620 0x85 /* Thai (not supported yet) */
#define LC_ISO8859_7 0x86 /* Greek (not supported yet) */
#define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */
#define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */
#define LC_JISX0201K 0x89 /* Japanese 1 byte kana */
#define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */
/* Note that 0x8b seems to be unused as of Emacs 20.7.
* However, there might be a chance that 0x8b could be used
* in later versions of Emacs.
*/
#define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */
#define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */
#define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */
#define LC_ISO8859_15 0x8e /* ISO8859 Latin 15 (not supported yet) */
/* #define CONTROL_1 0x8f control characters (unused) */
/* Is a leading byte for "official" single byte encodings? */
#define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
/*
* Charset IDs for official multibyte encodings (0x90-0x99)
* 0x9a-0x9d are free. 0x9e and 0x9f are reserved.
*/
#define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */
#define LC_GB2312_80 0x91 /* Chinese */
#define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */
#define LC_KS5601 0x93 /* Korean */
#define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */
#define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */
#define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */
#define LC_JISX0213_1 0x97 /* Japanese Kanji (JIS X 0213 Plane 1)
* (not supported) */
#define LC_BIG5_1 0x98 /* Plane 1 Chinese traditional (not
* supported) */
#define LC_BIG5_2 0x99 /* Plane 1 Chinese traditional (not
* supported) */
/* Is a leading byte for "official" multibyte encodings? */
#define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
/*
* Postgres-specific prefix bytes for "private" single byte encodings
* (According to the MULE docs, we should be using 0x9e for this)
*/
#define LCPRV1_A 0x9a
#define LCPRV1_B 0x9b
#define IS_LCPRV1(c) ((unsigned char)(c) == LCPRV1_A || (unsigned char)(c) == LCPRV1_B)
#define IS_LCPRV1_A_RANGE(c) \
((unsigned char)(c) >= 0xa0 && (unsigned char)(c) <= 0xdf)
#define IS_LCPRV1_B_RANGE(c) \
((unsigned char)(c) >= 0xe0 && (unsigned char)(c) <= 0xef)
/*
* Postgres-specific prefix bytes for "private" multibyte encodings
* (According to the MULE docs, we should be using 0x9f for this)
*/
#define LCPRV2_A 0x9c
#define LCPRV2_B 0x9d
#define IS_LCPRV2(c) ((unsigned char)(c) == LCPRV2_A || (unsigned char)(c) == LCPRV2_B)
#define IS_LCPRV2_A_RANGE(c) \
((unsigned char)(c) >= 0xf0 && (unsigned char)(c) <= 0xf4)
#define IS_LCPRV2_B_RANGE(c) \
((unsigned char)(c) >= 0xf5 && (unsigned char)(c) <= 0xfe)
/*
* Charset IDs for private single byte encodings (0xa0-0xef)
*/
#define LC_SISHENG 0xa0 /* Chinese SiSheng characters for
* PinYin/ZhuYin (not supported) */
#define LC_IPA 0xa1 /* IPA (International Phonetic
* Association) (not supported) */
#define LC_VISCII_LOWER 0xa2 /* Vietnamese VISCII1.1 lower-case (not
* supported) */
#define LC_VISCII_UPPER 0xa3 /* Vietnamese VISCII1.1 upper-case (not
* supported) */
#define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */
#define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */
#define LC_ASCII_RIGHT_TO_LEFT 0xa6 /* ASCII (left half of ISO8859-1) with
* right-to-left direction (not
* supported) */
#define LC_LAO 0xa7 /* Lao characters (ISO10646 0E80..0EDF)
* (not supported) */
#define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */
/*
* Charset IDs for private multibyte encodings (0xf0-0xff)
*/
#define LC_INDIAN_1_COLUMN 0xf0 /* Indian charset for 1-column width
* glyphs (not supported) */
#define LC_TIBETAN_1_COLUMN 0xf1 /* Tibetan 1-column width glyphs (not
* supported) */
#define LC_UNICODE_SUBSET_2 0xf2 /* Unicode characters of the range
* U+2500..U+33FF. (not supported) */
#define LC_UNICODE_SUBSET_3 0xf3 /* Unicode characters of the range
* U+E000..U+FFFF. (not supported) */
#define LC_UNICODE_SUBSET 0xf4 /* Unicode characters of the range
* U+0100..U+24FF. (not supported) */
#define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */
#define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */
#define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */
#define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */
#define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */
#define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */
#define LC_INDIAN_2_COLUMN 0xfb /* Indian charset for 2-column width
* glyphs (not supported) */
#define LC_TIBETAN 0xfc /* Tibetan (not supported) */
/* #define FREE 0xfd free (unused) */
/* #define FREE 0xfe free (unused) */
/* #define FREE 0xff free (unused) */
/*----------------------------------------------------
* end of MULE stuff
*----------------------------------------------------
/*
* SJIS validation macros
*/
#define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
#define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
/*
* PostgreSQL encoding identifiers
@ -246,7 +80,7 @@ typedef enum pg_enc
PG_EUC_TW, /* EUC for Taiwan */
PG_EUC_JIS_2004, /* EUC-JIS-2004 */
PG_UTF8, /* Unicode UTF8 */
PG_MULE_INTERNAL, /* Mule internal code */
PG_UNUSED_1, /* (Was Mule internal code) */
PG_LATIN1, /* ISO-8859-1 Latin 1 */
PG_LATIN2, /* ISO-8859-2 Latin 2 */
PG_LATIN3, /* ISO-8859-3 Latin 3 */
@ -290,18 +124,21 @@ typedef enum pg_enc
#define PG_ENCODING_BE_LAST PG_KOI8U
#define PG_UNUSED_ENCODING(_enc) \
((_enc) == PG_UNUSED_1)
/*
* Please use these tests before access to pg_enc2name_tbl[]
* or to other places...
*/
#define PG_VALID_BE_ENCODING(_enc) \
((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST && !PG_UNUSED_ENCODING(_enc))
#define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
#define PG_VALID_ENCODING(_enc) \
((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_ && !PG_UNUSED_ENCODING(_enc))
/* On FE are possible all encodings */
#define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc)
@ -678,7 +515,6 @@ extern const char *get_encoding_name_for_icu(int encoding);
extern bool pg_utf8_islegal(const unsigned char *source, int length);
extern int pg_utf_mblen(const unsigned char *s);
extern int pg_mule_mblen(const unsigned char *s);
/*
* The remaining functions are backend-only.
@ -782,16 +618,6 @@ pg_noreturn extern void report_untranslatable_char(int src_encoding, int dest_en
extern int local2local(const unsigned char *l, unsigned char *p, int len,
int src_encoding, int dest_encoding,
const unsigned char *tab, bool noError);
extern int latin2mic(const unsigned char *l, unsigned char *p, int len,
int lc, int encoding, bool noError);
extern int mic2latin(const unsigned char *mic, unsigned char *p, int len,
int lc, int encoding, bool noError);
extern int latin2mic_with_table(const unsigned char *l, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab, bool noError);
extern int mic2latin_with_table(const unsigned char *mic, unsigned char *p,
int len, int lc, int encoding,
const unsigned char *tab, bool noError);
#ifdef WIN32
extern WCHAR *pgwin32_message_to_UTF16(const char *str, int len, int *utf16len);

View file

@ -40,8 +40,8 @@ PLyUnicode_Bytes(PyObject *unicode)
*
* PyUnicode_AsEncodedString could be used to encode the object directly
* in the server encoding, but Python doesn't support all the encodings
* that PostgreSQL does (EUC_TW and MULE_INTERNAL). UTF-8 is used as an
* intermediary in PLyUnicode_FromString as well.
* that PostgreSQL does (EUC_TW). UTF-8 is used as an intermediary in
* PLyUnicode_FromString as well.
*/
if (GetDatabaseEncoding() != PG_UTF8)
{

View file

@ -1,333 +0,0 @@
drop table ’·×’»»’µ¡’ÍÑ’¸ì;
ERROR: table "’·×’»»’µ¡’ÍÑ’¸ì" does not exist
create table ’·×’»»’µ¡’ÍÑ’¸ì (’ÍÑ’¸ì text, ’ʬ’Îà’¥³’¡¼’¥É varchar, È÷¹Í1A¤À¤è char(16));
create index ·×»»µ¡ÍѸìindex1 on ’·×’»»’µ¡’ÍÑ’¸ì using btree (’ÍÑ’¸ì);
create index ·×»»µ¡ÍѸìindex2 on ’·×’»»’µ¡’ÍÑ’¸ì using hash (’ʬ’Îà’¥³’¡¼’¥É);
insert into ’·×’»»’µ¡’ÍÑ’¸ì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤','µ¡A01¾å');
insert into ’·×’»»’µ¡’ÍÑ’¸ì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹','ʬB10Ãæ');
insert into ’·×’»»’µ¡’ÍÑ’¸ì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼','¿ÍZ01²¼');
vacuum ’·×’»»’µ¡’ÍÑ’¸ì;
select * from ’·×’»»’µ¡’ÍÑ’¸ì;
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
----------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å |
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ |
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ |
(3 rows)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ʬ’Îà’¥³’¡¼’¥É = '¿ÍZ01²¼';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
--------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ |
(1 row)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ʬ’Îà’¥³’¡¼’¥É ~* '¿Íz01²¼';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
--------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ |
(1 row)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ʬ’Îà’¥³’¡¼’¥É like '_Z01_';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
--------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ |
(1 row)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ʬ’Îà’¥³’¡¼’¥É like '_Z%';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
--------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ |
(1 row)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ÍÑ’¸ì ~ '’¥³’¥ó’¥Ô’¥å’¡¼’¥¿[’¥Ç’¥°]';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
----------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å |
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ |
(2 rows)
select * from ’·×’»»’µ¡’ÍÑ’¸ì where ’ÍÑ’¸ì ~* '’¥³’¥ó’¥Ô’¥å’¡¼’¥¿[’¥Ç’¥°]';
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è
----------------------------+------------+------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å |
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ |
(2 rows)
select *,character_length(’ÍÑ’¸ì) from ’·×’»»’µ¡’ÍÑ’¸ì;
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è | character_length
----------------------------+------------+------------+------------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å | | 12
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ | | 13
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ | | 12
(3 rows)
select *,octet_length(’ÍÑ’¸ì) from ’·×’»»’µ¡’ÍÑ’¸ì;
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è | octet_length
----------------------------+------------+------------+--------------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å | | 36
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ | | 39
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ | | 36
(3 rows)
select *,position('’¥Ç' in ’ÍÑ’¸ì) from ’·×’»»’µ¡’ÍÑ’¸ì;
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è | position
----------------------------+------------+------------+----------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å | | 7
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ | | 0
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ | | 0
(3 rows)
select *,substring(’ÍÑ’¸ì from 10 for 4) from ’·×’»»’µ¡’ÍÑ’¸ì;
’ÍÑ’¸ì | ’ʬ’Îà’¥³’¡¼’¥É | È÷¹Í1a¤À¤è | substring
----------------------------+------------+------------+-----------
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤ | µ¡A01¾å | | ’¥×’¥ì’¥¤
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹ | ʬB10Ãæ | | ’¥£’¥Ã’¥¯’¥¹
’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼ | ¿ÍZ01²¼ | | ’¥é’¥Þ’¡¼
(3 rows)
drop table ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
ERROR: table "‘¼Æ‘Ëã‘»ú‘Êõ‘Óï" does not exist
create table ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï(‘Êõ‘Óï text, ‘·Ö‘Àà‘ºÅ varchar, ±¸×¢1A char(16));
create index ¼ÆËã»úÊõÓïindex1 on ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï using btree(‘Êõ‘Óï);
create index ¼ÆËã»úÊõÓïindex2 on ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï using btree(‘·Ö‘Àà‘ºÅ);
insert into ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï values('‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ','»úA01ÉÏ');
insert into ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï values('‘µç‘Äԑͼ‘ÐÎ','·ÖB01ÖÐ');
insert into ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï values('‘µç‘ÄÔ‘³Ì‘Ðò‘Ô±','ÈËZ01ÏÂ');
vacuum ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ |
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ |
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï |
(3 rows)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘·Ö‘Àà‘ºÅ = 'ÈËZ01ÏÂ';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï |
(1 row)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘·Ö‘Àà‘ºÅ ~* 'ÈËz01ÏÂ';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï |
(1 row)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘·Ö‘Àà‘ºÅ like '_Z01_';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï |
(1 row)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘·Ö‘Àà‘ºÅ like '_Z%';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï |
(1 row)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘Êõ‘Óï ~ '‘µç‘ÄÔ[‘Ïԑͼ]';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ |
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ |
(2 rows)
select * from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï where ‘Êõ‘Óï ~* '‘µç‘ÄÔ[‘Ïԑͼ]';
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a
------------+---------+--------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ |
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ |
(2 rows)
select *,character_length(‘Êõ‘Óï) from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a | character_length
------------+---------+--------+------------------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ | | 5
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ | | 4
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï | | 5
(3 rows)
select *,octet_length(‘Êõ‘Óï) from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a | octet_length
------------+---------+--------+--------------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ | | 15
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ | | 12
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï | | 15
(3 rows)
select *,position('‘ÏÔ' in ‘Êõ‘Óï) from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a | position
------------+---------+--------+----------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ | | 3
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ | | 0
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï | | 0
(3 rows)
select *,substring(‘Êõ‘Óï from 3 for 4) from ‘¼Æ‘Ëã‘»ú‘Êõ‘Óï;
‘Êõ‘Óï | ‘·Ö‘Àà‘ºÅ | ±¸×¢1a | substring
------------+---------+--------+-----------
‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ | »úA01ÉÏ | | ‘Ïԑʾ‘ÆÁ
‘µç‘Äԑͼ‘ÐÎ | ·ÖB01ÖÐ | | ‘ͼ‘ÐÎ
‘µç‘ÄÔ‘³Ì‘Ðò‘Ô± | ÈËZ01Ï | | ‘³Ì‘Ðò‘Ô±
(3 rows)
drop table “ͪ“ß©“Ѧ“¿ë“¾î;
ERROR: table "“ͪ“ß©“Ѧ“¿ë“¾î" does not exist
create table “ͪ“ß©“Ѧ“¿ë“¾î (“¿ë“¾î text, “Ý“׾“ÄÚ“µå varchar, “ºñ“°í1A“¶ó“±¸ char(16));
create index “ͪ“ß©“Ѧ“¿ë“¾îindex1 on “ͪ“ß©“Ѧ“¿ë“¾î using btree (“¿ë“¾î);
create index “ͪ“ß©“Ѧ“¿ë“¾îindex2 on “ͪ“ß©“Ѧ“¿ë“¾î using hash (“Ý“׾“ÄÚ“µå);
insert into “ͪ“ß©“Ѧ“¿ë“¾î values('“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ', '“ѦA01“ß¾');
insert into “ͪ“ß©“Ѧ“¿ë“¾î values('“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º', '“ÝÂB10“ñé');
insert into “ͪ“ß©“Ѧ“¿ë“¾î values('“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó', '“ìÑZ01“ù»');
vacuum “ͪ“ß©“Ѧ“¿ë“¾î;
select * from “ͪ“ß©“Ѧ“¿ë“¾î;
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ |
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé |
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» |
(3 rows)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “Ý“׾“ÄÚ“µå = '“ìÑZ01“ù»';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» |
(1 row)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “Ý“׾“ÄÚ“µå ~* '“ìÑz01“ù»';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» |
(1 row)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “Ý“׾“ÄÚ“µå like '_Z01_';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» |
(1 row)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “Ý“׾“ÄÚ“µå like '_Z%';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» |
(1 row)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “¿ë“¾î ~ '“Äēǻ“ÅÍ[“µð“±×]';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ |
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé |
(2 rows)
select * from “ͪ“ß©“Ѧ“¿ë“¾î where “¿ë“¾î ~* '“Äēǻ“ÅÍ[“µð“±×]';
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸
------------------+----------+------------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ |
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé |
(2 rows)
select *,character_length(“¿ë“¾î) from “ͪ“ß©“Ѧ“¿ë“¾î;
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸ | character_length
------------------+----------+------------+------------------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ | | 8
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé | | 7
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» | | 8
(3 rows)
select *,octet_length(“¿ë“¾î) from “ͪ“ß©“Ѧ“¿ë“¾î;
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸ | octet_length
------------------+----------+------------+--------------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ | | 24
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé | | 21
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» | | 24
(3 rows)
select *,position('“µð' in “¿ë“¾î) from “ͪ“ß©“Ѧ“¿ë“¾î;
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸ | position
------------------+----------+------------+----------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ | | 4
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé | | 0
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» | | 0
(3 rows)
select *,substring(“¿ë“¾î from 3 for 4) from “ͪ“ß©“Ѧ“¿ë“¾î;
“¿ë“¾î | “Ý“׾“ÄÚ“µå | “ºñ“°í1a“¶ó“±¸ | substring
------------------+----------+------------+-----------
“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ | “ѦA01“ß¾ | | “ÅÍ“µð“½º“ÇÃ
“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º | “ÝÂB10“ñé | | “ÅÍ“±×“·¡“ÇÈ
“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó | “ìÑZ01“ù» | | “ÅÍ“ÇÁ“·Î“±×
(3 rows)
drop table test;
ERROR: table "test" does not exist
create table test (t text);
insert into test values('ENGLISH');
insert into test values('FRAN<41>ÇAIS');
insert into test values('ESPA<50>ÑOL');
insert into test values('<27>ÍSLENSKA');
insert into test values('ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA');
vacuum test;
select * from test;
t
-----------------------------------
ENGLISH
FRAN<41>ÇAIS
ESPA<50>ÑOL
<20>ÍSLENSKA
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA
(5 rows)
select * from test where t = 'ESPA<50>ÑOL';
t
---------
ESPA<50>ÑOL
(1 row)
select * from test where t ~* 'espa<70>Ñol';
t
-----------------------------------
ESPA<50>ÑOL
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA
(2 rows)
select *,character_length(t) from test;
t | character_length
-----------------------------------+------------------
ENGLISH | 7
FRAN<41>ÇAIS | 8
ESPA<50>ÑOL | 7
<20>ÍSLENSKA | 8
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA | 33
(5 rows)
select *,octet_length(t) from test;
t | octet_length
-----------------------------------+--------------
ENGLISH | 7
FRAN<41>ÇAIS | 9
ESPA<50>ÑOL | 8
<20>ÍSLENSKA | 9
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA | 36
(5 rows)
select *,position('L' in t) from test;
t | position
-----------------------------------+----------
ENGLISH | 4
FRAN<41>ÇAIS | 0
ESPA<50>ÑOL | 7
<20>ÍSLENSKA | 3
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA | 4
(5 rows)
select *,substring(t from 3 for 4) from test;
t | substring
-----------------------------------+-----------
ENGLISH | GLIS
FRAN<41>ÇAIS | AN<41>ÇA
ESPA<50>ÑOL | PA<50>ÑO
<20>ÍSLENSKA | LENS
ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA | GLIS
(5 rows)

View file

@ -21,7 +21,7 @@ PSQL="psql -X -n -e -q"
# in the test list, client-only encodings must follow the server encoding
# they're to be tested with; see hard-coded cases below
tests="euc_jp sjis euc_kr euc_cn euc_tw big5 utf8 gb18030 mule_internal"
tests="euc_jp sjis euc_kr euc_cn euc_tw big5 utf8 gb18030"
EXITCODE=0

View file

@ -1,72 +0,0 @@
drop table ·×»»µ¡ÍѸì;
create table ·×»»µ¡ÍѸì (ÍѸì text, ʬÎॳ¡¼¥É varchar, È÷¹Í1A¤À¤è char(16));
create index ·×»»µ¡ÍѸìindex1 on ·×»»µ¡ÍѸì using btree (ÍѸì);
create index ·×»»µ¡ÍѸìindex2 on ·×»»µ¡ÍѸì using hash (ʬÎॳ¡¼¥É);
insert into ·×»»µ¡ÍѸì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥Ç’¥£’¥¹’¥×’¥ì’¥¤','µ¡A01¾å');
insert into ·×»»µ¡ÍѸì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥°’¥é’¥Õ’¥£’¥Ã’¥¯’¥¹','ʬB10Ãæ');
insert into ·×»»µ¡ÍѸì values('’¥³’¥ó’¥Ô’¥å’¡¼’¥¿’¥×’¥í’¥°’¥é’¥Þ’¡¼','¿ÍZ01²¼');
vacuum ·×»»µ¡ÍѸì;
select * from ·×»»µ¡ÍѸì;
select * from ·×»»µ¡ÍѸì where ʬÎॳ¡¼¥É = '¿ÍZ01²¼';
select * from ·×»»µ¡ÍѸì where ʬÎॳ¡¼¥É ~* '¿Íz01²¼';
select * from ·×»»µ¡ÍѸì where ʬÎॳ¡¼¥É like '_Z01_';
select * from ·×»»µ¡ÍѸì where ʬÎॳ¡¼¥É like '_Z%';
select * from ·×»»µ¡ÍѸì where ÍѸì ~ '’¥³’¥ó’¥Ô’¥å’¡¼’¥¿[’¥Ç’¥°]';
select * from ·×»»µ¡ÍѸì where ÍѸì ~* '’¥³’¥ó’¥Ô’¥å’¡¼’¥¿[’¥Ç’¥°]';
select *,character_length(ÍѸì) from ·×»»µ¡ÍѸì;
select *,octet_length(ÍѸì) from ·×»»µ¡ÍѸì;
select *,position('’¥Ç' in ÍѸì) from ·×»»µ¡ÍѸì;
select *,substring(ÍѸì from 10 for 4) from ·×»»µ¡ÍѸì;
drop table ¼ÆËã»úÊõÓï;
create table ¼ÆËã»úÊõÓï(ÊõÓï text, ·ÖÀàºÅ varchar, ±¸×¢1A char(16));
create index ¼ÆËã»úÊõÓïindex1 on ¼ÆËã»úÊõÓï using btree(ÊõÓï);
create index ¼ÆËã»úÊõÓïindex2 on ¼ÆËã»úÊõÓï using btree(·ÖÀàºÅ);
insert into ¼ÆËã»úÊõÓï values('‘µç‘ÄÔ‘Ïԑʾ‘ÆÁ','»úA01ÉÏ');
insert into ¼ÆËã»úÊõÓï values('‘µç‘Äԑͼ‘ÐÎ','·ÖB01ÖÐ');
insert into ¼ÆËã»úÊõÓï values('‘µç‘ÄÔ‘³Ì‘Ðò‘Ô±','ÈËZ01ÏÂ');
vacuum ¼ÆËã»úÊõÓï;
select * from ¼ÆËã»úÊõÓï;
select * from ¼ÆËã»úÊõÓï where ·ÖÀàºÅ = 'ÈËZ01ÏÂ';
select * from ¼ÆËã»úÊõÓï where ·ÖÀàºÅ ~* 'ÈËz01ÏÂ';
select * from ¼ÆËã»úÊõÓï where ·ÖÀàºÅ like '_Z01_';
select * from ¼ÆËã»úÊõÓï where ·ÖÀàºÅ like '_Z%';
select * from ¼ÆËã»úÊõÓï where ÊõÓï ~ '‘µç‘ÄÔ[‘Ïԑͼ]';
select * from ¼ÆËã»úÊõÓï where ÊõÓï ~* '‘µç‘ÄÔ[‘Ïԑͼ]';
select *,character_length(ÊõÓï) from ¼ÆËã»úÊõÓï;
select *,octet_length(ÊõÓï) from ¼ÆËã»úÊõÓï;
select *,position('‘ÏÔ' in ÊõÓï) from ¼ÆËã»úÊõÓï;
select *,substring(ÊõÓï from 3 for 4) from ¼ÆËã»úÊõÓï;
drop table ͪߩѦ¿ë¾î;
create table ͪߩѦ¿ë¾î (¿ë¾î text, ÝÂ×¾ÄÚµå varchar, ºñ°í1Aó±¸ char(16));
create index ͪߩѦ¿ë¾îindex1 on ͪߩѦ¿ë¾î using btree (¿ë¾î);
create index ͪߩѦ¿ë¾îindex2 on ͪߩѦ¿ë¾î using hash (ÝÂ×¾ÄÚµå);
insert into ͪߩѦ¿ë¾î values('“Äēǻ“ÅÍ“µð“½º“ÇÓ·¹“ÀÌ', '“ѦA01“ß¾');
insert into ͪߩѦ¿ë¾î values('“Äēǻ“ÅÍ“±×“·¡“ÇÈ“½º', '“ÝÂB10“ñé');
insert into ͪߩѦ¿ë¾î values('“Äēǻ“ÅÍ“ÇÁ“·Î“±×“·¡“¸Ó', '“ìÑZ01“ù»');
vacuum ͪߩѦ¿ë¾î;
select * from ͪߩѦ¿ë¾î;
select * from ͪߩѦ¿ë¾î where ÝÂ×¾ÄÚµå = '“ìÑZ01“ù»';
select * from ͪߩѦ¿ë¾î where ÝÂ×¾ÄÚµå ~* '“ìÑz01“ù»';
select * from ͪߩѦ¿ë¾î where ÝÂ×¾ÄÚµå like '_Z01_';
select * from ͪߩѦ¿ë¾î where ÝÂ×¾ÄÚµå like '_Z%';
select * from ͪߩѦ¿ë¾î where ¿ë¾î ~ '“Äēǻ“ÅÍ[“µð“±×]';
select * from ͪߩѦ¿ë¾î where ¿ë¾î ~* '“Äēǻ“ÅÍ[“µð“±×]';
select *,character_length(¿ë¾î) from ͪߩѦ¿ë¾î;
select *,octet_length(¿ë¾î) from ͪߩѦ¿ë¾î;
select *,position('“µð' in ¿ë¾î) from ͪߩѦ¿ë¾î;
select *,substring(¿ë¾î from 3 for 4) from ͪߩѦ¿ë¾î;
drop table test;
create table test (t text);
insert into test values('ENGLISH');
insert into test values('FRAN<EFBFBD>ÇAIS');
insert into test values('ESPA<EFBFBD>ÑOL');
insert into test values('<EFBFBD>ÍSLENSKA');
insert into test values('ENGLISH FRAN<41>ÇAIS ESPA<50>ÑOL <20>ÍSLENSKA');
vacuum test;
select * from test;
select * from test where t = 'ESPA<EFBFBD>ÑOL';
select * from test where t ~* 'espa<EFBFBD>Ñol';
select *,character_length(t) from test;
select *,octet_length(t) from test;
select *,position('L' in t) from test;
select *,substring(t from 3 for 4) from test;

View file

@ -526,8 +526,6 @@ static pe_test_vector pe_test_vectors[] =
TV("gbk", "\x80\""),
TV("gbk", "\x80\\"),
TV("mule_internal", "\\\x9c';\0;"),
TV("sql_ascii", "1\xC0'"),
/*

View file

@ -588,16 +588,6 @@ select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from i
invalid, NUL byte | \xe4dede00 | \xc6cfcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
(5 rows)
select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
description | inbytes | result | errorat | error
-------------------+------------+----------------+----------+-------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid | \xe4dede | \x8bc68bcf8bcf | |
invalid, NUL byte | \x00 | \x | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
invalid, NUL byte | \xe400dede | \x8bc6 | \x00dede | invalid byte sequence for encoding "ISO_8859_5": 0x00
invalid, NUL byte | \xe4dede00 | \x8bc68bcf8bcf | \x00 | invalid byte sequence for encoding "ISO_8859_5": 0x00
(5 rows)
--
-- Big5
--
@ -630,120 +620,3 @@ select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_in
invalid, NUL byte | \x666f6fb64800 | \x666f6fe8b1a1 | \x00 | invalid byte sequence for encoding "BIG5": 0x00
(5 rows)
select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
description | inbytes | result | errorat | error
--------------------------------+----------------+----------------+----------+------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid | \x666f6fb648 | \x666f6f95e2af | |
valid, no translation to UTF-8 | \x666f6fa27f | \x666f6f95a3c1 | |
invalid, NUL byte | \x666f6fb60048 | \x666f6f | \xb60048 | invalid byte sequence for encoding "BIG5": 0xb6 0x00
invalid, NUL byte | \x666f6fb64800 | \x666f6f95e2af | \x00 | invalid byte sequence for encoding "BIG5": 0x00
(5 rows)
--
-- MULE_INTERNAL
--
CREATE TABLE mic_inputs (inbytes bytea, description text);
insert into mic_inputs values
('\x666f6f', 'valid, pure ASCII'),
('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
('\x8bc68bcf8b', 'invalid,incomplete char'),
('\x92bedd', 'valid (in SHIFT_JIS)'),
('\x92be', 'invalid, incomplete char)'),
('\x666f6f95a3c1', 'valid (in Big5)'),
('\x666f6f95a3', 'invalid, incomplete char'),
('\x9200bedd', 'invalid, NUL byte'),
('\x92bedd00', 'invalid, NUL byte'),
('\x8b00c68bcf8bcf', 'invalid, NUL byte');
-- Test MULE_INTERNAL verification
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+----------------+------------------+--------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \x8bc68bcf8bcf | |
invalid,incomplete char | \x8bc68bcf8b | \x8bc68bcf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
valid (in SHIFT_JIS) | \x92bedd | \x92bedd | |
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6f95a3c1 | |
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
invalid, NUL byte | \x92bedd00 | \x92bedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
(10 rows)
-- Test conversions from MULE_INTERNAL
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+----------+------------------+---------------------------------------------------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \xc6cfcf | |
invalid,incomplete char | \x8bc68bcf8b | \xc6cf | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "KOI8R"
(10 rows)
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \xe4dede | |
invalid,incomplete char | \x8bc68bcf8b | \xe4de | \x8b | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b
valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | character with byte sequence 0x92 0x00 0xbe in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | character with byte sequence 0x8b 0x00 in encoding "MULE_INTERNAL" has no equivalent in encoding "ISO_8859_5"
(10 rows)
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+----------+------------------+--------------------------------------------------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
valid (in SHIFT_JIS) | \x92bedd | \x8fdb | |
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "SJIS"
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
invalid, NUL byte | \x92bedd00 | \x8fdb | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
(10 rows)
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+--------------+------------------+--------------------------------------------------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
valid (in SHIFT_JIS) | \x92bedd | \x | \x92bedd | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6fa2a1 | |
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
invalid, NUL byte | \x92bedd00 | \x | \x92bedd00 | character with byte sequence 0x92 0xbe 0xdd in encoding "MULE_INTERNAL" has no equivalent in encoding "BIG5"
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
(10 rows)
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;
description | inbytes | result | errorat | error
---------------------------+------------------+----------+------------------+----------------------------------------------------------------------------------------------------------------
valid, pure ASCII | \x666f6f | \x666f6f | |
valid (in KOI8R) | \x8bc68bcf8bcf | \x | \x8bc68bcf8bcf | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
invalid,incomplete char | \x8bc68bcf8b | \x | \x8bc68bcf8b | character with byte sequence 0x8b 0xc6 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
valid (in SHIFT_JIS) | \x92bedd | \xbedd | |
invalid, incomplete char) | \x92be | \x | \x92be | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0xbe
valid (in Big5) | \x666f6f95a3c1 | \x666f6f | \x95a3c1 | character with byte sequence 0x95 0xa3 0xc1 in encoding "MULE_INTERNAL" has no equivalent in encoding "EUC_JP"
invalid, incomplete char | \x666f6f95a3 | \x666f6f | \x95a3 | invalid byte sequence for encoding "MULE_INTERNAL": 0x95 0xa3
invalid, NUL byte | \x9200bedd | \x | \x9200bedd | invalid byte sequence for encoding "MULE_INTERNAL": 0x92 0x00 0xbe
invalid, NUL byte | \x92bedd00 | \xbedd | \x00 | invalid byte sequence for encoding "MULE_INTERNAL": 0x00
invalid, NUL byte | \x8b00c68bcf8bcf | \x | \x8b00c68bcf8bcf | invalid byte sequence for encoding "MULE_INTERNAL": 0x8b 0x00
(10 rows)

View file

@ -308,16 +308,7 @@ INSERT INTO encoding_tests VALUES
('UTF8', '6 byte, unsupported', '\xfd8283'),
('UTF8', '6 byte, unsupported', '\xfd828384'),
('UTF8', '6 byte, unsupported', '\xfd82838485'),
('UTF8', '6 byte, unsupported', '\xfd8283848586'),
-- MULE_INTERNAL
-- 2 81..8d LC1
-- 3 90..99 LC2
('MULE_INTERNAL', 'ASCII', 'a'),
('MULE_INTERNAL', 'LC1, short', '\x81'),
('MULE_INTERNAL', 'LC1', '\x8182'),
('MULE_INTERNAL', 'LC2, short', '\x90'),
('MULE_INTERNAL', 'LC2, short', '\x9082'),
('MULE_INTERNAL', 'LC2', '\x908283');
('UTF8', '6 byte, unsupported', '\xfd8283848586');
SELECT COUNT(test_encoding(encoding, description, input)) > 0
FROM encoding_tests;
NOTICE: LATIN1 ASCII: \x61 -> {97} -> \x61 = OK
@ -370,12 +361,6 @@ NOTICE: UTF8 6 byte, unsupported: \xfd8283 -> {253,130,131} -> \xc3bdc282c28
NOTICE: UTF8 6 byte, unsupported: \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed
NOTICE: UTF8 6 byte, unsupported: \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed
NOTICE: UTF8 6 byte, unsupported: \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed
NOTICE: MULE_INTERNAL ASCII: \x61 -> {97} -> \x61 = OK
NOTICE: MULE_INTERNAL LC1, short: \x81 -> {} -> \x = truncated
NOTICE: MULE_INTERNAL LC1: \x8182 -> {8454274} -> \x8182 = OK
NOTICE: MULE_INTERNAL LC2, short: \x90 -> {} -> \x = truncated
NOTICE: MULE_INTERNAL LC2, short: \x9082 -> {} -> \x = truncated
NOTICE: MULE_INTERNAL LC2: \x908283 -> {9470595} -> \x908283 = OK
?column?
----------
t

View file

@ -423,7 +423,7 @@ WITH objects (type, name, args) AS (VALUES
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
('conversion', '{pg_catalog, koi8_r_to_utf8}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object
@ -498,7 +498,7 @@ default value|NULL|NULL|for addr_nsp.gentable.b|t
cast|NULL|NULL|(bigint AS integer)|t
table constraint|addr_nsp|NULL|a_chk on addr_nsp.gentable|t
domain constraint|addr_nsp|NULL|domconstr on addr_nsp.gendomain|t
conversion|pg_catalog|koi8_r_to_mic|pg_catalog.koi8_r_to_mic|t
conversion|pg_catalog|koi8_r_to_utf8|pg_catalog.koi8_r_to_utf8|t
language|NULL|plpgsql|plpgsql|t
schema|NULL|addr_nsp|addr_nsp|t
operator class|pg_catalog|int4_ops|pg_catalog.int4_ops USING btree|t

View file

@ -971,9 +971,7 @@ BEGIN
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
WHEN untranslatable_character
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
OR undefined_function
WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;

View file

@ -714,9 +714,7 @@ BEGIN
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
WHEN untranslatable_character
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
OR undefined_function
WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;

View file

@ -957,9 +957,7 @@ BEGIN
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
WHEN untranslatable_character
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
OR undefined_function
WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;

View file

@ -957,6 +957,8 @@ test_enc_setup(PG_FUNCTION_ARGS)
mblen,
valid;
if (!PG_VALID_ENCODING(i))
continue;
if (pg_encoding_max_length(i) == 1)
continue;
pg_encoding_set_invalid(i, buf);

View file

@ -329,7 +329,6 @@ select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'iso8859-5')).* fr
-- Test conversions from ISO-8859-5
select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'utf8')).* from iso8859_5_inputs;
select description, inbytes, (test_conv(inbytes, 'iso8859-5', 'koi8r')).* from iso8859_5_inputs;
select description, inbytes, (test_conv(inbytes, 'iso8859_5', 'mule_internal')).* from iso8859_5_inputs;
--
-- Big5
@ -346,29 +345,3 @@ insert into big5_inputs values
select description, inbytes, (test_conv(inbytes, 'big5', 'big5')).* from big5_inputs;
-- Test conversions from Big5
select description, inbytes, (test_conv(inbytes, 'big5', 'utf8')).* from big5_inputs;
select description, inbytes, (test_conv(inbytes, 'big5', 'mule_internal')).* from big5_inputs;
--
-- MULE_INTERNAL
--
CREATE TABLE mic_inputs (inbytes bytea, description text);
insert into mic_inputs values
('\x666f6f', 'valid, pure ASCII'),
('\x8bc68bcf8bcf', 'valid (in KOI8R)'),
('\x8bc68bcf8b', 'invalid,incomplete char'),
('\x92bedd', 'valid (in SHIFT_JIS)'),
('\x92be', 'invalid, incomplete char)'),
('\x666f6f95a3c1', 'valid (in Big5)'),
('\x666f6f95a3', 'invalid, incomplete char'),
('\x9200bedd', 'invalid, NUL byte'),
('\x92bedd00', 'invalid, NUL byte'),
('\x8b00c68bcf8bcf', 'invalid, NUL byte');
-- Test MULE_INTERNAL verification
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'mule_internal')).* from mic_inputs;
-- Test conversions from MULE_INTERNAL
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'koi8r')).* from mic_inputs;
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'iso8859-5')).* from mic_inputs;
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'sjis')).* from mic_inputs;
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'big5')).* from mic_inputs;
select description, inbytes, (test_conv(inbytes, 'mule_internal', 'euc_jp')).* from mic_inputs;

View file

@ -199,16 +199,7 @@ INSERT INTO encoding_tests VALUES
('UTF8', '6 byte, unsupported', '\xfd8283'),
('UTF8', '6 byte, unsupported', '\xfd828384'),
('UTF8', '6 byte, unsupported', '\xfd82838485'),
('UTF8', '6 byte, unsupported', '\xfd8283848586'),
-- MULE_INTERNAL
-- 2 81..8d LC1
-- 3 90..99 LC2
('MULE_INTERNAL', 'ASCII', 'a'),
('MULE_INTERNAL', 'LC1, short', '\x81'),
('MULE_INTERNAL', 'LC1', '\x8182'),
('MULE_INTERNAL', 'LC2, short', '\x90'),
('MULE_INTERNAL', 'LC2, short', '\x9082'),
('MULE_INTERNAL', 'LC2', '\x908283');
('UTF8', '6 byte, unsupported', '\xfd8283848586');
SELECT COUNT(test_encoding(encoding, description, input)) > 0
FROM encoding_tests;

View file

@ -179,7 +179,7 @@ WITH objects (type, name, args) AS (VALUES
('collation', '{default}', '{}'),
('table constraint', '{addr_nsp, gentable, a_chk}', '{}'),
('domain constraint', '{addr_nsp.gendomain}', '{domconstr}'),
('conversion', '{pg_catalog, koi8_r_to_mic}', '{}'),
('conversion', '{pg_catalog, koi8_r_to_utf8}', '{}'),
('default value', '{addr_nsp, gentable, b}', '{}'),
('language', '{plpgsql}', '{}'),
-- large object

View file

@ -272,9 +272,7 @@ BEGIN
END IF;
EXCEPTION
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
WHEN untranslatable_character
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
OR undefined_function
WHEN undefined_function
-- unsupported XML feature
OR feature_not_supported THEN
RAISE LOG 'skip: %', SQLERRM;