mirror of
https://github.com/opnsense/src.git
synced 2026-06-08 16:22:46 -04:00
Add POSIX-style support for multibyte characters to od(1): the 'c'
conversion interprets input bytes as multibyte sequences and displays printable characters in the area corresponding to their first byte. The remaining bytes are shown as "**".
This commit is contained in:
parent
7602de354f
commit
40ccfb3137
4 changed files with 114 additions and 11 deletions
|
|
@ -39,15 +39,30 @@ __FBSDID("$FreeBSD$");
|
|||
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <wchar.h>
|
||||
#include <wctype.h>
|
||||
#include "hexdump.h"
|
||||
|
||||
void
|
||||
conv_c(PR *pr, u_char *p)
|
||||
conv_c(PR *pr, u_char *p, size_t bufsize)
|
||||
{
|
||||
char buf[10];
|
||||
char const *str;
|
||||
wchar_t wc;
|
||||
size_t clen, oclen;
|
||||
int converr, pad, width;
|
||||
char peekbuf[MB_LEN_MAX];
|
||||
|
||||
if (pr->mbleft > 0) {
|
||||
str = "**";
|
||||
pr->mbleft--;
|
||||
goto strpr;
|
||||
}
|
||||
|
||||
switch(*p) {
|
||||
case '\0':
|
||||
|
|
@ -78,9 +93,53 @@ conv_c(PR *pr, u_char *p)
|
|||
default:
|
||||
break;
|
||||
}
|
||||
if (isprint(*p)) {
|
||||
*pr->cchar = 'c';
|
||||
(void)printf(pr->fmt, *p);
|
||||
/*
|
||||
* Multibyte characters are disabled for hexdump(1) for backwards
|
||||
* compatibility and consistency (none of its other output formats
|
||||
* recognize them correctly).
|
||||
*/
|
||||
converr = 0;
|
||||
if (odmode && MB_CUR_MAX > 1) {
|
||||
oclen = 0;
|
||||
retry:
|
||||
clen = mbrtowc(&wc, p, bufsize, &pr->mbstate);
|
||||
if (clen == 0)
|
||||
clen = 1;
|
||||
else if (clen == (size_t)-1 || (clen == (size_t)-2 &&
|
||||
buf == peekbuf)) {
|
||||
memset(&pr->mbstate, 0, sizeof(pr->mbstate));
|
||||
wc = *p;
|
||||
clen = 1;
|
||||
converr = 1;
|
||||
} else if (clen == (size_t)-2) {
|
||||
/*
|
||||
* Incomplete character; peek ahead and see if we
|
||||
* can complete it.
|
||||
*/
|
||||
oclen = bufsize;
|
||||
bufsize = peek(p = peekbuf, MB_CUR_MAX);
|
||||
goto retry;
|
||||
}
|
||||
clen += oclen;
|
||||
} else {
|
||||
wc = *p;
|
||||
clen = 1;
|
||||
}
|
||||
if (!converr && iswprint(wc)) {
|
||||
if (!odmode) {
|
||||
*pr->cchar = 'c';
|
||||
(void)printf(pr->fmt, (int)wc);
|
||||
} else {
|
||||
*pr->cchar = 'C';
|
||||
assert(strcmp(pr->fmt, "%3C") == 0);
|
||||
width = wcwidth(wc);
|
||||
assert(width > 0);
|
||||
pad = 3 - width;
|
||||
if (pad < 0)
|
||||
pad = 0;
|
||||
(void)printf("%*s%C", pad, "", wc);
|
||||
pr->mbleft = clen - 1;
|
||||
}
|
||||
} else {
|
||||
(void)sprintf(buf, "%03o", (int)*p);
|
||||
str = buf;
|
||||
|
|
|
|||
|
|
@ -132,7 +132,8 @@ print(PR *pr, u_char *bp)
|
|||
(void)printf(pr->fmt, "");
|
||||
break;
|
||||
case F_C:
|
||||
conv_c(pr, bp);
|
||||
conv_c(pr, bp, eaddress ? eaddress - address :
|
||||
blocksize - address % blocksize);
|
||||
break;
|
||||
case F_CHAR:
|
||||
(void)printf(pr->fmt, *bp);
|
||||
|
|
@ -261,6 +262,10 @@ get(void)
|
|||
errx(1, "cannot skip past end of input");
|
||||
if (need == blocksize)
|
||||
return((u_char *)NULL);
|
||||
/*
|
||||
* XXX bcmp() is not quite right in the presence
|
||||
* of multibyte characters.
|
||||
*/
|
||||
if (vflag != ALL &&
|
||||
valid_save &&
|
||||
bcmp(curp, savp, nread) == 0) {
|
||||
|
|
@ -284,6 +289,10 @@ get(void)
|
|||
if (length != -1)
|
||||
length -= n;
|
||||
if (!(need -= n)) {
|
||||
/*
|
||||
* XXX bcmp() is not quite right in the presence
|
||||
* of multibyte characters.
|
||||
*/
|
||||
if (vflag == ALL || vflag == FIRST ||
|
||||
valid_save == 0 ||
|
||||
bcmp(curp, savp, blocksize) != 0) {
|
||||
|
|
@ -303,6 +312,27 @@ get(void)
|
|||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
peek(u_char *buf, size_t nbytes)
|
||||
{
|
||||
size_t n, nread;
|
||||
int c;
|
||||
|
||||
if (length != -1 && nbytes > length)
|
||||
nbytes = length;
|
||||
nread = 0;
|
||||
while (nread < nbytes && (c = getchar()) != EOF) {
|
||||
*buf++ = c;
|
||||
nread++;
|
||||
}
|
||||
n = nread;
|
||||
while (n-- > 0) {
|
||||
c = *--buf;
|
||||
ungetc(c, stdin);
|
||||
}
|
||||
return (nread);
|
||||
}
|
||||
|
||||
int
|
||||
next(char **argv)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@
|
|||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include <wchar.h>
|
||||
|
||||
typedef struct _pr {
|
||||
struct _pr *nextpr; /* next print unit */
|
||||
#define F_ADDRESS 0x001 /* print offset */
|
||||
|
|
@ -52,6 +54,8 @@ typedef struct _pr {
|
|||
char *cchar; /* conversion character */
|
||||
char *fmt; /* printf format */
|
||||
char *nospace; /* no whitespace version */
|
||||
int mbleft; /* bytes left of multibyte char. */
|
||||
mbstate_t mbstate; /* conversion state */
|
||||
} PR;
|
||||
|
||||
typedef struct _fu {
|
||||
|
|
@ -88,7 +92,7 @@ void badconv(char *);
|
|||
void badfmt(const char *);
|
||||
void badsfmt(void);
|
||||
void bpad(PR *);
|
||||
void conv_c(PR *, u_char *);
|
||||
void conv_c(PR *, u_char *, size_t);
|
||||
void conv_u(PR *, u_char *);
|
||||
void display(void);
|
||||
void doskip(const char *, int);
|
||||
|
|
@ -98,6 +102,7 @@ void newsyntax(int, char ***);
|
|||
int next(char **);
|
||||
void nomem(void);
|
||||
void oldsyntax(int, char ***);
|
||||
size_t peek(u_char *, size_t);
|
||||
void rewrite(FS *);
|
||||
int size(FS *);
|
||||
void usage(void);
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@
|
|||
.\" @(#)od.1 8.1 (Berkeley) 6/6/93
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd July 3, 2004
|
||||
.Dd July 11, 2004
|
||||
.Os
|
||||
.Dt OD 1
|
||||
.Sh NAME
|
||||
|
|
@ -179,6 +179,10 @@ characters, which are represented as C escapes:
|
|||
.It vertical tab
|
||||
\ev
|
||||
.El
|
||||
.Pp
|
||||
Multi-byte characters are displayed in the area corresponding to the first
|
||||
byte of the character. The remaining bytes are shown as
|
||||
.Ql ** .
|
||||
.It Xo
|
||||
.Sm off
|
||||
.Op Cm d | o | u | x
|
||||
|
|
@ -231,6 +235,15 @@ contain one line for each format.
|
|||
If no output format is specified,
|
||||
.Fl t Ar oS
|
||||
is assumed.
|
||||
.Sh ENVIRONMENT
|
||||
The
|
||||
.Ev LANG , LC_ALL
|
||||
and
|
||||
.Ev LC_CTYPE
|
||||
environment variables affect the execution of
|
||||
.Nm
|
||||
as described in
|
||||
.Xr environ 7 .
|
||||
.Sh DIAGNOSTICS
|
||||
.Ex -std
|
||||
.Sh COMPATIBILITY
|
||||
|
|
@ -252,7 +265,3 @@ An
|
|||
.Nm
|
||||
command appeared in
|
||||
.At v1 .
|
||||
.Sh BUGS
|
||||
The
|
||||
.Nm
|
||||
utility does not recognize multibyte characters.
|
||||
|
|
|
|||
Loading…
Reference in a new issue