<html>
<HEAD></HEAD>
<BODY>
<textarea rows="10" cols="50"> </textarea>
<FONT >û</FONT>
<textarea rows="20" cols="150">
【文件压缩解压项目】
【项目技术:】
模版堆,哈夫曼树,哈夫曼编码,函数对象,feof函数,文件读写
【项目准备:】
文件运行于Visual Studio 平台(VS),
数据准备一份待压缩文件,本程序运行可以不做准备。
【项目过程:】
压缩过程
1.从待压缩原文件中统计字符个数,利用堆建立哈夫曼树。
2.依据哈弗曼树写入每个字符相应的哈夫曼编码。将这里的字符 和统计出现的次数写入配置文件。
3.打开一个新文件,按照哈夫曼编码用较短编码代替原文件较长的编码,实现压缩。
//配置文件丢失后 压缩的文件也就失去了意义这两个文件是相互对应互相起作用的。
解压过程:
1.根据配置文件中的次数再次建立哈夫曼树,哈夫曼字符编码集。
2.根据哈夫曼字符编码集,翻译程序生成的短的编码文件。翻译得到的内容写入新文件
//可以对比原文件与翻译后的文件
//时间上的考虑,在调试版本与发行版本中压缩世界总是大于解压时间。
【项目思考:】对于少量信息的文本,压缩所能遇见的问题 对于中文字符 二次压缩
为什么配置文件里不直接写构造好的编码 ,虽然这样可以做到
因为人的思想总是快一点 以为对照字符与哈夫曼编码进行翻译能够快 而实际计算机在运行中 总是对 待翻译的编码
在配置文件中比对编码和寻找对应的字符。
对于项目运行后的警告 文件读写复习。
</textarea>
<p><FONT >324</FONT></p>
<textarea rows="20" cols="150">
#include <iostream>
using namespace std;
#include <cassert>
#include <string>
#include <vector>
#include <cassert>
#include <time.h>
template<class T>
struct Less
{
bool operator() (const T& l, const T& r)
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator() (const T& l, const T& r)
{
return l > r;
}
};
template<class T, class Compare = Less>
class Heap
{
public:
Heap()
{}
Heap(const T* a, size_t size)
{
_a.reserve(size);
for (size_t i = 0; i < size; ++i)
{
_a.push_back(a[i]);
}
for (int i = (_a.size() - 2) / 2; i >= 0; --i)
{
_AdjustDown(i);
}
}
Heap(vector<T>& a)
{
_a.swap(a);
for (int i = (_a.size() - 2) / 2; i >= 0; --i)
{
_AdjustDown(i);
}
}
void Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size() - 1);
}
void Pop()
{
size_t size = _a.size();
assert(size > 0);
swap(_a[0], _a[size - 1]);
_a.pop_back();
_AdjustDown(0);
}
T& Top()
{
assert(!_a.empty());
return _a[0];
}
size_t size()
{
assert(!_a.empty());
return (_a.size());
}
protected:
void _AdjustUp(int child)
{
int parent = (child - 1) / 2;
while (child > 0)
{
Compare com;
if (com(_a[child], _a[parent]))
{
swap(_a[child], _a[parent]);
child = parent;
parent = (child - 1) / 2;
}
else
{
break;
}
}
}
void _AdjustDown(size_t parent)
{
size_t child = parent * 2 + 1;
while (child < _a.size())
{
Compare com;
if (child + 1 < _a.size()
&& com(_a[child + 1], _a[child]))
{
++child;
}
if (com(_a[child], _a[parent]))
{
swap(_a[parent], _a[child]);
parent = child;
child = 2 * parent + 1;
}
else
{
break;
}
}
}
protected:
vector<T> _a;
};
/*/
------文件可拆分线--------#include "Heap.h" -------
构建哈夫曼树 (利用小堆构建哈夫曼树)
/*/
template <class T>
struct HuffmanNode
{
HuffmanNode<T>*_left;
HuffmanNode<T>*_right;
T _weight;
HuffmanNode(const T&weight)
:_left(NULL)
, _right(NULL)
, _weight(weight)
{}
};
template <class T>
class HuffmanTree
{
typedef HuffmanNode<T> Node;
public:
HuffmanTree(const T*a, size_t size, const T& invalid)
/*/invalid代表非法值,若为非法值,则不构建哈夫曼树/*/
{
_root = CreateTree(a, size, invalid);
}
Node* GetRootNode()
{
return _root;
}
protected:
Node* CreateTree(const T*a, size_t size, const T& invalid)
{
struct Compare
{
bool operator()(const Node*l, const Node*r)
{
return (l->_weight < r->_weight);
}
};
Heap <Node*, Compare> minHeap;
for (size_t i = 0; i < size; ++i)
{
if (a[i] != invalid)
{
minHeap.Push(new Node(a[i]));
}
}
/*/小堆的top结点的权值必是最小的,每次选出小堆的top构造哈夫曼树的结点/*/
while (minHeap.size()>1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_weight + right->_weight);
/*/哈夫曼树特点,父结点是两个子结点和/*/
parent->_left = left;
parent->_right = right;
minHeap.Push(parent);
}
return minHeap.Top();
}
protected:
HuffmanNode<T>* _root;
};
/*/
----文件可分割线--#include "HuffmanTree.h"------
统计字符次数 构建哈夫曼树(堆) 生成哈夫曼编码 读取源文件字符压缩
哈夫曼树根结点的权值就是源文件读入的个数
统计字符次数
/*/
typedef unsigned long long LongType;
struct CharInfo
{
unsigned char _ch; /*/字符/*/
LongType _count; /*/出现次数/*/
string _code; /*/Huffman code/*/
CharInfo()
:_ch(0)
, _count(0)
{}
CharInfo(LongType count)
:_ch(0)
, _count(count)
{}
bool operator!=(const CharInfo&info) const
{
return _count != info._count;
}
CharInfo operator+(const CharInfo&info) const
{
return CharInfo(_count + info._count);
}
bool operator<(const CharInfo&info) const
{
return _count < info._count;
}
};
template <class T>
class FileCompress
{
typedef HuffmanNode<T> Node;
public:
FileCompress()
{
for (size_t i = 0; i < 256; ++i)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
}
}
public:
void Compress(const char* filename)
{
assert(filename);
/*/统计文件中字符出现的次数/*/
FILE* fout = fopen(filename, "rb");
assert(fout);
char ch = fgetc(fout);
while (!feof(fout))
{
_infos[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
/*/构建哈夫曼树/*/
CharInfo invalid(0);
HuffmanTree<CharInfo>tree(_infos, 256, invalid);
/*/生成哈夫曼编码/*/
string code;
GenerateHuffmanCode(tree.GetRootNode(), code);
/*/读取源文件字符压缩,将哈夫曼编码写进对应的位/*/
string Compressfilename = filename;
Compressfilename += ".compress";
FILE* fin = fopen(Compressfilename.c_str(), "wb"); /*/用二进制读写文件/*/
fseek(fout, 0, SEEK_SET); /*/定位到文件起始位置/*/
ch = fgetc(fout);
char value = 0;
int pos = 0;
while (!feof(fout))
{
string &code = _infos[(unsigned char)ch]._code;
/*/注意code要为引用/*/
for (size_t i = 0; i < code.size(); ++i)
/*/利用位存储哈夫曼编码,减少内存/*/
{
value <<= 1;
if (code[i] == '1')
{
value |= 1;
}
if (++pos == 8)
/*/
满8位(1字节),将哈夫曼编码写进对应的文件,
然后继续读取这个字符的后续编码
/*/
{
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(fout); /*/继续读取下一个字符的哈夫曼编码/*/
}
if (pos != 0)
/*/如果最后几位哈夫曼编码不满8位,
则需要补充记录, 后续补充(配置文件)/*/
{
value <<= (8 - pos);
fputc(value, fin);
}
/*/写配置文件,方便解压缩的时候重建哈夫曼树/*/
string configfilename = filename;
configfilename += ".config";
FILE* finconfig = fopen(configfilename.c_str(), "wb");
assert(finconfig);
char buffer[128]; /*/设置写入缓冲区/*/
string str;
for (size_t i = 0; i < 256; ++i)
{
if (_infos[i]._count>0)
{
str += _infos[i]._ch;
str += ",";
str += _itoa(_infos[i]._count, buffer, 10);
/*/itoa将整数_count转换成字符存入buffer中,10为进制/*/
str += '\n';
}
fputs(str.c_str(), finconfig);
str.clear();
}
fclose(fout);
fclose(fin);
fclose(finconfig);
}
/*/解压缩/*/
void UnCompress(const char* filename)
{
string configfilename = filename;
configfilename += ".config";
FILE* foutconfig = fopen(configfilename.c_str(), "rb");
assert(foutconfig);
string str;
LongType charCount = 0;
while (Readline(foutconfig, str))
{
if (str.empty())
{
str += '\n';
}
else
{
_infos[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str());
/*/将配置文件中保存的字符格式转换为次数,
(如a,4所以从第2个字符开始)/*/
str.clear();
}
}
/*/重建哈夫曼树/*/
CharInfo invalid(0);
HuffmanTree<CharInfo>tree(_infos, 256, invalid);
charCount = tree.GetRootNode()->_weight._count;
string Compressfilename = filename;
Compressfilename += ".compress";
FILE* fout = fopen(Compressfilename.c_str(), "rb");
assert(fout);
string UnCompressfilename = filename;
UnCompressfilename += ".uncompress";
FILE* fin = fopen(UnCompressfilename.c_str(), "wb");
assert(fin);
char ch = fgetc(fout);
HuffmanNode<CharInfo>* root = tree.GetRootNode();
HuffmanNode<CharInfo>* cur = root;
int pos = 7;
while (1)
{
if (ch & (1 << pos))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (pos-- == 0)
{
pos = 7;
ch = fgetc(fout); /*/继续读取字符/*/
}
if (cur->_left == NULL&&cur->_right == NULL)
{
fputc(cur->_weight._ch, fin);
if (--charCount == 0)
{
break;
}
cur = root;
}
}
fclose(fin);
}
protected:
void GenerateHuffmanCode(HuffmanNode<CharInfo>* root, string code)
{
if (root == NULL)
{
return;
}
if (root->_left)
{
GenerateHuffmanCode(root->_left, code + '0');
}
if (root->_right)
{
GenerateHuffmanCode(root->_right, code + '1');
}
if ((root->_left == NULL) && (root->_right == NULL))
{
_infos[root->_weight._ch]._code = code;
}
}
bool Readline(FILE* fout, string& str)
{
char ch = fgetc(fout);
if (feof(fout))
{
return false;
}
while (!feof(fout) && ch != '\n')
{
str += ch;
ch = fgetc(fout);
}
return true;
}
protected:
CharInfo _infos[256];
};
void TestCompress()
{
FileCompress<int> fc;
double start_compress = clock();
fc.Compress("Input.txt");
double finish_compress = clock();
fc.UnCompress("Input.txt");
double finish_uncompress = clock();
cout << "压缩时间是:" << finish_compress - start_compress << "ms" << endl;
cout << "解压缩时间是:" << finish_uncompress - finish_compress << "ms" << endl;
}
void wztest(char* s)
{
FileCompress<int> fc;
double start_compress = clock();
fc.Compress(s);
double finish_compress = clock();
fc.UnCompress(s);
double finish_uncompress = clock();
cout << "压缩时间是:" << finish_compress - start_compress << "ms" << endl;
cout << "解压缩时间是:" << finish_uncompress - finish_compress << "ms" << endl;
}
#include <iostream>
#include <fstream>
using namespace std;
void testfile()
{
char *s="WZXXXXXXXXXXXX.TXT";
ofstream fout(s);
int ss=100;
while(ss--){
fout <<"123abcdefghijklmnopqrstuvwxyz";
}
wztest(s);
}
int main0()
{
TestCompress();
system("pause");
return 0;
}
int main()
{ FILE *pFile = fopen("xxx.txt", "w");
char x[]="wzzx sts bit ";
fwrite(x, 6,12,pFile);
fclose(pFile); fflush(pFile);
wztest("xxx.txt");
system("pause");
return 0;
}
</textarea>
<p><FONT >ogr</FONT></p>
<p><FONT >pgspt</FONT></p>
<p><FONT >%^&*(</FONT></p>
<textarea rows="20" cols="150">
程序运行可以进行微设计
可以用用户输入绝对路径
然后将对应的文件压缩 同文件目录中产生新文件
注意转义操作
</textarea>
<FONT >ABCDEFGH</FONT>
<p><FONT >LMOIJK</FONT></p>
<p><FONT >PQRSD</FONT></p>
<textarea rows="20" cols="150">
</textarea>
</div>
</body>
</html>