词典编码:LZ77算法(C/C++)
一、基本思想
用指向早期曾经出现过的字符串的指针来表示当前被编码的字符串,如:
二、LZ77算法
- 算法伪码:
- 示意图:
- 举例:
三、C/C++实现
#include <iostream>
#include <vector>
#include <string>
using namespace std;
struct code {
int off;
int len;
char nextChar;
};
#define maxWindow 10
vector<code> vt;
int main(int argc, char* argv[]) {
//string str = "abacdbacc";
string str = "abcdbbccaaabaeaaabaee";
cout << str << endl;
int window1, window2;
//编码
for(int i = 0; i < str.length(); i++) {
if(i + 1 <= maxWindow) window1 = i; //确定查询窗口的尺寸
else window1 = maxWindow;
if(i + window1 < str.length()) window2 = window1;
else window2 = str.length() - i;
//printf("%d %d %d\n", window1, window2, i);
string str1 = str.substr(i - window1, window1);
string str2 = str.substr(i, window2);
//cout << str1 << " : " << str2 << endl;
int off = -1;
while(true) {
if(window2 == 0) break; //空串,直接退出
string str3 = str2.substr(0, window2);
off = str1.find(str3); //找不到,off = -1
//cout << str3 << " :: " << off << " :: " << i + window2 << endl;
if(off != -1) break; //找到了,退出
window2--;
if(window2 <= 0) break;
}
if(off != -1) {
code cd;
cd.len = window2;
cd.off = window1 - off;
cd.nextChar = str[i + window2];
vt.push_back(cd);
i += window2;
} else {
code cd;
cd.len = 0;
cd.off = 0;
cd.nextChar = str[i + window2];
vt.push_back(cd);
}
}
for(int i = 0; i < vt.size(); i++) { //编码结果
printf("(%d,%d,%c)", vt[i].off, vt[i].len, vt[i].nextChar);
}
printf("\n");
//解码
string strOut;
for(int i = 0; i < vt.size(); i++) {
if(vt[i].len == 0) {
strOut += vt[i].nextChar;
} else {
int len = strOut.length();
len -= vt[i].off;
string temp = strOut.substr(len, vt[i].len);
strOut += temp + vt[i].nextChar;
}
}
cout << strOut;
return 0;
}
- 运行结果