From a04ed7615ed26cc2c22286b2f4c624dce23a758f Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Sat, 11 Aug 2018 02:56:43 +0000 Subject: [PATCH] stat(1): cache id->name resolution When invoked on a large list of files, it is most common for a small number of uids/gids to own most of the results. Like ls(1), use pwcache(3) to avoid repeatedly looking up the same IDs. Example microbenchmark and non-scientific results: $ time (find /usr/src -type f -print0 | xargs -0 stat >/dev/null) BEFORE: 3.62s user 5.23s system 102% cpu 8.655 total 3.47s user 5.38s system 102% cpu 8.647 total AFTER: 1.23s user 1.81s system 108% cpu 2.810 total 1.43s user 1.54s system 107% cpu 2.754 total Does this microbenchmark have any real-world significance? Until a use case is demonstrated otherwise, I doubt it. Ordinarily I would be resistant to optimizing pointless microbenchmarks in base utilities (e.g., recent totally gratuitous changes to yes(1)). However, the pwcache(3) APIs actually simplify stat(1) logic ever so slightly compared to the raw APIs they wrap, so I think this is at worst harmless. PR: 230491 Reported by: Thomas Hurst Discussed with: gad@ --- usr.bin/stat/stat.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/usr.bin/stat/stat.c b/usr.bin/stat/stat.c index 0c9ba6801f7..0b63a4067ca 100644 --- a/usr.bin/stat/stat.c +++ b/usr.bin/stat/stat.c @@ -619,8 +619,6 @@ format1(const struct stat *st, char *stmp, lfmt[24], tmp[20]; const char *sdata; char smode[12], sid[12], path[PATH_MAX + 4]; - struct passwd *pw; - struct group *gr; const struct timespec *tsp; struct timespec ts; struct tm *tm; @@ -717,9 +715,8 @@ format1(const struct stat *st, case SHOW_st_uid: small = (sizeof(st->st_uid) == 4); data = st->st_uid; - if ((pw = getpwuid(st->st_uid)) != NULL) - sdata = pw->pw_name; - else { + sdata = user_from_uid(st->st_uid, 1); + if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_uid); sdata = sid; } @@ -731,9 +728,8 @@ format1(const struct stat *st, case SHOW_st_gid: small = (sizeof(st->st_gid) == 4); data = st->st_gid; - if ((gr = getgrgid(st->st_gid)) != NULL) - sdata = gr->gr_name; - else { + sdata = group_from_gid(st->st_gid, 1); + if (sdata == NULL) { snprintf(sid, sizeof(sid), "(%ld)", (long)st->st_gid); sdata = sid; }