shell在变量中使用awk命令查找两个数组的差集

生产环境中需要获取原端和目标端的已由文件的差异情况，因为在docker中运行，要求尽量避免落地文件，所以采用比较暴力的比对方式：for i in `echo -e "${source}"`doj=`echo -e "${local}"|grep ${i}$`if [[ "${j}" == "" ]]thenstr=&q

牧锋

3478人浏览 · 2019-03-08 16:54:13

牧锋 · 2019-03-08 16:54:13 发布

生产环境中需要获取原端和目标端的已由文件的差异情况，因为在docker中运行，要求尽量避免落地文件，所以采用比较暴力的比对方式：

for i in `echo -e "${source}"`
do
    j=`echo -e "${local}"|grep ${i}$`
    if [[ "${j}" == "" ]]
    then
       str="${str} ${i}"
    fi
done

对3000个文件的比较需要耗时72秒以上

grep的执行效率明显堪忧，所以考虑用awk命令去查找差集，来优化

echo |awk -v source_list="${source}" -v local_list="${local}" 
'{split(sourcelist,arr_source," ")
split(locallist,arr_local," ")
for(i in arr_source){
    arr_diff[arr_source[i]]=arr_source[i]
}
for(i in arr_local){
    if(arr_diff[arr_local[i]]!=""){
        delete arr_diff[arr_local[i]]
    }
}
for(i in arr_diff){
    print arr_diff[i]
}
}'

1、awk中首先定义两个变量，接受shell中的变量

2、使用split函数将字符串拆分成数组

3、复制原端的列表数组，存入一个以键来存取的数组arr_diff（arr_diff["20190303abcd"]="20190303abcd"）

4、遍历本地列表数组，如果文件名在arr_diff数组中出现，则用delete函数删除这个元素

5、输出差集

附上GNU下载的grep命令源码，下载地址：http://mirrors.ustc.edu.cn/gnu/

static int
grep (int fd, char const *file, struct stats *stats)
{
  int nlines, i;
  int not_text;
  size_t residue, save;
  char oldc;
  char *beg;
  char *lim;
  char eol = eolbyte;

  if (!reset (fd, file, stats))
    return 0;

  if (file && directories == RECURSE_DIRECTORIES
      && S_ISDIR (stats->stat.st_mode))
    {
      /* Close fd now, so that we don't open a lot of file descriptors
	 when we recurse deeply.  */
      if (close (fd) != 0)
	error (0, errno, "%s", file);
      return grepdir (file, stats) - 2;
    }

  totalcc = 0;
  lastout = 0;
  totalnl = 0;
  outleft = max_count;
  after_last_match = 0;
  pending = 0;

  nlines = 0;
  residue = 0;
  save = 0;

  if (! fillbuf (save, stats))
    {
      if (! is_EISDIR (errno, file))
	suppressible_error (filename, errno);
      return 0;
    }

  not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet)
	       || binary_files == WITHOUT_MATCH_BINARY_FILES)
	      && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg));
  if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES)
    return 0;
  done_on_match += not_text;
  out_quiet += not_text;

  for (;;)
    {
      lastnl = bufbeg;
      if (lastout)
	lastout = bufbeg;

      beg = bufbeg + save;

      /* no more data to scan (eof) except for maybe a residue -> break */
      if (beg == buflim)
	break;

      /* Determine new residue (the length of an incomplete line at the end of
         the buffer, 0 means there is no incomplete last line).  */
      oldc = beg[-1];
      beg[-1] = eol;
      for (lim = buflim; lim[-1] != eol; lim--)
	continue;
      beg[-1] = oldc;
      if (lim == beg)
	lim = beg - residue;
      beg -= residue;
      residue = buflim - lim;

      if (beg < lim)
	{
	  if (outleft)
	    nlines += grepbuf (beg, lim);
	  if (pending)
	    prpending (lim);
	  if((!outleft && !pending) || (nlines && done_on_match && !out_invert))
	    goto finish_grep;
	}

      /* The last OUT_BEFORE lines at the end of the buffer will be needed as
	 leading context if there is a matching line at the begin of the
	 next data. Make beg point to their begin.  */
      i = 0;
      beg = lim;
      while (i < out_before && beg > bufbeg && beg != lastout)
	{
	  ++i;
	  do
	    --beg;
	  while (beg[-1] != eol);
	}

      /* detect if leading context is discontinuous from last printed line.  */
      if (beg != lastout)
	lastout = 0;

      /* Handle some details and read more data to scan.  */
      save = residue + lim - beg;
      if (out_byte)
	totalcc = add_count (totalcc, buflim - bufbeg - save);
      if (out_line)
	nlscan (beg);
      if (! fillbuf (save, stats))
	{
	  if (! is_EISDIR (errno, file))
	    suppressible_error (filename, errno);
	  goto finish_grep;
	}
    }
  if (residue)
    {
      *buflim++ = eol;
      if (outleft)
	nlines += grepbuf (bufbeg + save - residue, buflim);
      if (pending)
        prpending (buflim);
    }

 finish_grep:
  done_on_match -= not_text;
  out_quiet -= not_text;
  if ((not_text & ~out_quiet) && nlines != 0)
    printf (_("Binary file %s matches\n"), filename);
  return nlines;
}

每一次grep都需要对管道的数据全部读取然后进行查找，而数组只需要寻址匹配的删除即可。

向您推荐>>Eolink开发者社区

权威｜前沿｜技术｜干货｜国内首个API全生命周期开发者社区

更多推荐

沃云统一开发平台介绍

沃云集成平台研发平台介绍1.平台优势2.平台原理3.研发平台使用方法4.遇到的问题5.现阶段实现的功能6.后续需要补充的功能和优化内容研发平台介绍1.平台优势解决孤岛式应用，实现能力共享；现有系统框架过于复杂，跨系统业务处理成本居高不下，协同服务共享，降低运维成本；提高项目应用资源监控能力，改善资源利用率；业务微服务化，快速发布、快速部署，快速响应业务需求变化；沃云平台不仅提供了自动化的、可快速部

云原生

(20200916 Solved)docker-compose up创建容器自动退出

问题描述如题，创建容器后自动退出了。并且docker start container无效解决方案原因是缺失了控制终端的配置，需要在docker-compose.yml中增加tty:true ，有时候这样也不行，需要再增加一个command:/bin/bash，命令不一定是这个，需要是一个不会退出的命令，然后用-d后台启动容器。Referencesdocker-compose启动容器后自动退出...

云原生

基于docker的test-containers环境百宝箱

笔者语录：我开了个公众号【Java你我他】，欢迎大家关注。在很多时候，程序猿们更关注代码本身，而不愿意把时间花费在环境搭建上，这也是Docker变得越来越受欢迎的原因之一。test-containe是Docker生态圈中的一颗新星，其主要针对测试领域、背靠Docker实现环境百宝箱功能。 test-containers：你要的环境，我都有~ 假设我们现在需要一个redis-clust