Golang 获取文件 md5 校验和的方法及效率比较

前端之家收集整理的这篇文章主要介绍了Golang 获取文件 md5 校验和的方法及效率比较前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。

近期有一个需求:获取多个文件 md5 校验和判断是否存在重复文件,因为文件数量较多,有的文件还比较大,需要处理的文件还没有到位,我就考虑了一下效率的问题。目前我已知的 Golang 中获取 md5 校验和的方法有两个,这里直接给出实现源码。

package@H_404_5@ main

import@H_404_5@ (
    "crypto/md5"@H_404_5@
    "flag"@H_404_5@
    "fmt"@H_404_5@
    "io"@H_404_5@
    "io/IoUtil"@H_404_5@
    "os"@H_404_5@
)

var@H_404_5@ which = flag.Bool("which"@H_404_5@,true@H_404_5@,""@H_404_5@)
var@H_404_5@ path = flag.String("path"@H_404_5@,""@H_404_5@,""@H_404_5@)
var@H_404_5@ cnt = flag.Int("cnt"@H_404_5@, 100@H_404_5@,""@H_404_5@)

func@H_404_5@ aaa() {
    f,err := os.Open(*path)
    if@H_404_5@ err != nil@H_404_5@ {
        fmt.Println("Open"@H_404_5@,err)
        return@H_404_5@
    }

    defer@H_404_5@ f.Close()

    body,err := IoUtil.ReadAll(f)
    if@H_404_5@ err != nil@H_404_5@ {
        fmt.Println("ReadAll"@H_404_5@,err)
        return@H_404_5@
    }

    md5.Sum(body)
    //fmt.Printf("%x\n",md5.Sum(body))@H_404_5@
}

func@H_404_5@ bbb() {
    f,err)
        return@H_404_5@
    }

    defer@H_404_5@ f.Close()

    md5hash := md5.New()
    if@H_404_5@ _,err := io.Copy(md5hash,f); err != nil@H_404_5@ {
        fmt.Println("Copy"@H_404_5@,err)
        return@H_404_5@
    }

    md5hash.Sum(nil@H_404_5@)
    //fmt.Printf("%x\n",md5hash.Sum(nil))@H_404_5@
}

func@H_404_5@ main() {
    flag.Parse()

    for@H_404_5@ i := 0@H_404_5@; i < *cnt; i++ {
        if@H_404_5@ *which {
            aaa()
        } else@H_404_5@ {
            bbb()
        }
    }
}

还有可供参考的获取 md5 校验和的 Shell 命令

md5 -- calculate a message-digest fingerprint (checksum) for@H_404_5@ a file
md5 [-pqrtx] [-s string] [file ...@H_404_5@]

测试文件是公司项目的日志文件

banjakukutekiiMac:shell panshiqu$ ls -an@H_404_5@ |@H_404_5@ grep by@H_404_5@
-rw@H_404_5@-r@H_404_5@--@H_404_5@r--@H_404_5@   1@H_404_5@ 501@H_404_5@  20@H_404_5@   7285957@H_404_5@ 11@H_404_5@ 17@H_404_5@ 16@H_404_5@:14@H_404_5@ by@H_404_5@.@H_404_5@out

banjakukutekiiMac:shell panshiqu$ cp by@H_404_5@.@H_404_5@out by2.@H_404_5@out
banjakukutekiiMac:shell panshiqu$ cat by@H_404_5@.@H_404_5@out >>@H_404_5@ by2.@H_404_5@out

banjakukutekiiMac:shell panshiqu$ ls -an@H_404_5@ |@H_404_5@ grep by@H_404_5@
-rw@H_404_5@-r@H_404_5@--@H_404_5@r--@H_404_5@   1@H_404_5@ 501@H_404_5@  20@H_404_5@   7285957@H_404_5@ 11@H_404_5@ 17@H_404_5@ 16@H_404_5@:14@H_404_5@ by@H_404_5@.@H_404_5@out
-rw@H_404_5@-r@H_404_5@--@H_404_5@r--@H_404_5@   1@H_404_5@ 501@H_404_5@  20@H_404_5@  14571914@H_404_5@ 11@H_404_5@ 17@H_404_5@ 17@H_404_5@:03@H_404_5@ by2.@H_404_5@out

下面效率展示

banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ ./gomd5 -cnt=1@H_404_5@ -which=true@H_404_5@ -path="by.out"@H_404_5@

real    0@H_404_5@m0.027@H_404_5@s
user    0@H_404_5@m0.017@H_404_5@s
sys 0@H_404_5@m0.012@H_404_5@s
banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ ./gomd5 -cnt=1@H_404_5@ -which=true@H_404_5@ -path="by2.out"@H_404_5@

real    0@H_404_5@m0.048@H_404_5@s
user    0@H_404_5@m0.033@H_404_5@s
sys 0@H_404_5@m0.018@H_404_5@s
banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ ./gomd5 -cnt=1@H_404_5@ -which=false@H_404_5@ -path="by.out"@H_404_5@

real    0@H_404_5@m0.018@H_404_5@s
user    0@H_404_5@m0.012@H_404_5@s
sys 0@H_404_5@m0.004@H_404_5@s
banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ ./gomd5 -cnt=1@H_404_5@ -which=false@H_404_5@ -path="by2.out"@H_404_5@

real    0@H_404_5@m0.031@H_404_5@s
user    0@H_404_5@m0.024@H_404_5@s
sys 0@H_404_5@m0.005@H_404_5@s
banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ md5 by@H_404_5@.out
MD5 (by@H_404_5@.out) = 9@H_404_5@d79e19a00cef1ae1bb6518ca4adf9de

real    0@H_404_5@m0.023@H_404_5@s
user    0@H_404_5@m0.019@H_404_5@s
sys 0@H_404_5@m0.006@H_404_5@s
banjakukutekiiMac:shell@H_404_5@ panshiqu$ time@H_404_5@ md5 by2.out
MD5 (by2.out) = 0@H_404_5@a029a460a20e8dcb00d032d6fab74c6

real    0@H_404_5@m0.042@H_404_5@s
user    0@H_404_5@m0.037@H_404_5@s
sys 0@H_404_5@m0.009@H_404_5@s

总结:
* 不管什么方法都会随着文件变大时间会变长,上面的例子大约都是2倍
* io.Copy 方法效率最高,建议大家这样使用

猜你在找的Go相关文章