我试图计算在另一个字节序列中字节序列发生的所有时间.但是,如果已经计算了它们,则无法重复使用字节.例如给定字符串
k.k.k.k.k.k.假设字节序列是k.k,那么它只会发现3次而不是5次,因为它们会像以下那样被分解:[k.k].[k.k].[k.k].并不喜欢[k.[k].[k].[k].[k] .k]他们在一圈并且基本上只是向右移动2.
k.k.k.k.k.k.假设字节序列是k.k,那么它只会发现3次而不是5次,因为它们会像以下那样被分解:[k.k].[k.k].[k.k].并不喜欢[k.[k].[k].[k].[k] .k]他们在一圈并且基本上只是向右移动2.
理想情况下,我们的想法是了解压缩字典或运行时编码的外观.所以目标就是获得
k.k.k.k.k.k.只有2个部分,因为(k.k.k.)是你可以拥有的最大和最好的符号.
到目前为止这是源:
using System; using System.Collections.Generic; using System.Collections; using System.Linq; using System.Text; using System.IO; static class Compression { static int Main(string[] args) { List<byte> bytes = File.ReadAllBytes("ok.txt").ToList(); List<List<int>> list = new List<List<int>>(); // Starting Numbers of bytes - This can be changed manually. int StartingNumBytes = bytes.Count; for (int i = StartingNumBytes; i > 0; i--) { Console.WriteLine("i: " + i); for (int ii = 0; ii < bytes.Count - i; ii++) { Console.WriteLine("ii: " + i); // New pattern comes with refresh data. List<byte> pattern = new List<byte>(); for (int iii = 0; iii < i; iii++) { pattern.Add(bytes[ii + iii]); } DisplayBinary(bytes,"red"); DisplayBinary(pattern,"green"); int matches = 0; // foreach (var position in bytes.ToArray().Locate(pattern.ToArray())) for (int position = 0; position < bytes.Count; position++) { if (pattern.Count > (bytes.Count - position)) { continue; } for (int iiii = 0; iiii < pattern.Count; iiii++) { if (bytes[position + iiii] != pattern[iiii]) { //Have to use goto because C# doesn't support continue <level> goto outer; } } // If it made it this far,it has found a match. matches++; Console.WriteLine("Matches: " + matches + " Orig Count: " + bytes.Count + " POS: " + position); if (matches > 1) { int numBytesToRemove = pattern.Count; for (int ra = 0; ra < numBytesToRemove; ra++) { // Remove it at the position it was found at,once it // deletes the first one,the list will shift left and you'll need to be here again. bytes.RemoveAt(position); } DisplayBinary(bytes,"red"); Console.WriteLine(pattern.Count + " Bytes removed."); // Since you deleted some bytes,set the position less because you will need to redo the pos. position = position - 1; } outer: continue; } List<int> sublist = new List<int>(); sublist.Add(matches); sublist.Add(pattern.Count); // Some sort of calculation to determine how good the symbol was sublist.Add(bytes.Count-((matches * pattern.Count)-matches)); list.Add(sublist); } } Display(list); Console.Read(); return 0; } static void DisplayBinary(List<byte> bytes,string color="white") { switch(color){ case "green": Console.ForegroundColor = ConsoleColor.Green; break; case "red": Console.ForegroundColor = ConsoleColor.Red; break; default: break; } for (int i=0; i<bytes.Count; i++) { if (i % 8 ==0) Console.WriteLine(); Console.Write(GetIntBinaryString(bytes[i]) + " "); } Console.WriteLine(); Console.ResetColor(); } static string GetIntBinaryString(int n) { char[] b = new char[8]; int pos = 7; int i = 0; while (i < 8) { if ((n & (1 << i)) != 0) { b[pos] = '1'; } else { b[pos] = '0'; } pos--; i++; } //return new string(b).TrimStart('0'); return new string(b); } static void Display(List<List<int>> list) { // // Display everything in the List. // Console.WriteLine("Elements:"); foreach (var sublist in list) { foreach (var value in sublist) { Console.Write("{0,4}",value); } Console.WriteLine(); } // // Display total count. // int count = 0; foreach (var sublist in list) { count += sublist.Count; } Console.WriteLine("Count:"); Console.WriteLine(count); } static public int SearchBytePattern(byte[] pattern,byte[] bytes) { int matches = 0; // precomputing this shaves some seconds from the loop execution int maxloop = bytes.Length - pattern.Length; for (int i = 0; i < maxloop; i++) { if (pattern[0] == bytes[i]) { bool ismatch = true; for (int j = 1; j < pattern.Length; j++) { if (bytes[i + j] != pattern[j]) { ismatch = false; break; } } if (ismatch) { matches++; i += pattern.Length - 1; } } } return matches; } }
请参考帖子获取文件的非二进制文件,这里是二进制数据:
011010110010111001101011001011100110101100101110011010110010111001101011001011100110101100101110我希望它比它的起点小.
解决方法
private static int CountOccurences(byte[] target,byte[] pattern) { var targetString = BitConverter.ToString(target); var patternString = BitConverter.ToString(pattern); return new Regex(patternString).Matches(targetString).Count; }