shell在变量中使用awk命令查找两个数组的差集
生产环境中需要获取原端和目标端的已由文件的差异情况,因为在docker中运行,要求尽量避免落地文件,所以采用比较暴力的比对方式:for i in `echo -e "${source}"`doj=`echo -e "${local}"|grep ${i}$`if [[ "${j}" == "" ]]thenstr=&q
·
生产环境中需要获取原端和目标端的已由文件的差异情况,因为在docker中运行,要求尽量避免落地文件,所以采用比较暴力的比对方式:
for i in `echo -e "${source}"`
do
j=`echo -e "${local}"|grep ${i}$`
if [[ "${j}" == "" ]]
then
str="${str} ${i}"
fi
done
对3000个文件的比较需要耗时72秒以上
grep的执行效率明显堪忧,所以考虑用awk命令去查找差集,来优化
echo |awk -v source_list="${source}" -v local_list="${local}"
'{split(sourcelist,arr_source," ")
split(locallist,arr_local," ")
for(i in arr_source){
arr_diff[arr_source[i]]=arr_source[i]
}
for(i in arr_local){
if(arr_diff[arr_local[i]]!=""){
delete arr_diff[arr_local[i]]
}
}
for(i in arr_diff){
print arr_diff[i]
}
}'
1、awk中首先定义两个变量,接受shell中的变量
2、使用split函数将字符串拆分成数组
3、复制原端的列表数组,存入一个以键来存取的数组arr_diff(arr_diff["20190303abcd"]="20190303abcd")
4、遍历本地列表数组,如果文件名在arr_diff数组中出现,则用delete函数删除这个元素
5、输出差集
附上GNU下载的grep命令源码,下载地址:http://mirrors.ustc.edu.cn/gnu/
static int
grep (int fd, char const *file, struct stats *stats)
{
int nlines, i;
int not_text;
size_t residue, save;
char oldc;
char *beg;
char *lim;
char eol = eolbyte;
if (!reset (fd, file, stats))
return 0;
if (file && directories == RECURSE_DIRECTORIES
&& S_ISDIR (stats->stat.st_mode))
{
/* Close fd now, so that we don't open a lot of file descriptors
when we recurse deeply. */
if (close (fd) != 0)
error (0, errno, "%s", file);
return grepdir (file, stats) - 2;
}
totalcc = 0;
lastout = 0;
totalnl = 0;
outleft = max_count;
after_last_match = 0;
pending = 0;
nlines = 0;
residue = 0;
save = 0;
if (! fillbuf (save, stats))
{
if (! is_EISDIR (errno, file))
suppressible_error (filename, errno);
return 0;
}
not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
|| binary_files == WITHOUT_MATCH_BINARY_FILES)
&& memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
return 0;
done_on_match += not_text;
out_quiet += not_text;
for (;;)
{
lastnl = bufbeg;
if (lastout)
lastout = bufbeg;
beg = bufbeg + save;
/* no more data to scan (eof) except for maybe a residue -> break */
if (beg == buflim)
break;
/* Determine new residue (the length of an incomplete line at the end of
the buffer, 0 means there is no incomplete last line). */
oldc = beg[-1];
beg[-1] = eol;
for (lim = buflim; lim[-1] != eol; lim--)
continue;
beg[-1] = oldc;
if (lim == beg)
lim = beg - residue;
beg -= residue;
residue = buflim - lim;
if (beg < lim)
{
if (outleft)
nlines += grepbuf (beg, lim);
if (pending)
prpending (lim);
if((!outleft && !pending) || (nlines && done_on_match && !out_invert))
goto finish_grep;
}
/* The last OUT_BEFORE lines at the end of the buffer will be needed as
leading context if there is a matching line at the begin of the
next data. Make beg point to their begin. */
i = 0;
beg = lim;
while (i < out_before && beg > bufbeg && beg != lastout)
{
++i;
do
--beg;
while (beg[-1] != eol);
}
/* detect if leading context is discontinuous from last printed line. */
if (beg != lastout)
lastout = 0;
/* Handle some details and read more data to scan. */
save = residue + lim - beg;
if (out_byte)
totalcc = add_count (totalcc, buflim - bufbeg - save);
if (out_line)
nlscan (beg);
if (! fillbuf (save, stats))
{
if (! is_EISDIR (errno, file))
suppressible_error (filename, errno);
goto finish_grep;
}
}
if (residue)
{
*buflim++ = eol;
if (outleft)
nlines += grepbuf (bufbeg + save - residue, buflim);
if (pending)
prpending (buflim);
}
finish_grep:
done_on_match -= not_text;
out_quiet -= not_text;
if ((not_text & ~out_quiet) && nlines != 0)
printf (_("Binary file %s matches\n"), filename);
return nlines;
}
每一次grep都需要对管道的数据全部读取然后进行查找,而数组只需要寻址匹配的删除即可。
更多推荐
已为社区贡献1条内容
所有评论(0)