Compress.h

#ifndef COMPRESS_H
#define COMPRESS_H
int Compress(const char* pFilename);
char Str2byte(const char* pBinStr);
int Encode(const char* pFilename, const HuffmanCode pHC, char* pBuffer, const int nSize);

struct HEAD
{
	char type[4];//文件类型
	int length;//原文件长度
	int weight[256];//权值数值
};
int WriteFile(const char* pFilename, const HEAD sHead, const char* pBuffer, const int nSize);
int InitHead(const char* pFilename, HEAD& sHead);
#endif

Huffman.h

#ifndef HUFFMAN_H
#define HUFFMAN_H
#define OK 1
#define SIZE 256
struct HTNode {
	int weight;//权值
	int parent;//父节点
	int lchild;//左孩子
	int rchild;//右孩子
};
typedef HTNode* HuffmanTree;//动态分配数组存储Huffman树
typedef char** HuffmanCode;//动态分配哈夫曼编码表


//void PreorderTraverse(int root, HuffmanTree pHT);
int HuffmanCoding(HuffmanCode& pHC, HuffmanTree& pHT);
int Select(HuffmanTree pHT, int nSize);
void TestHufTree(HuffmanTree pHT);
void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC);
void TestHufTreeN(int root, HuffmanTree pHT);

int HfmTree(HuffmanTree& pHT, int* w, int n);

#endif

Compress.cpp

#include"huffman.h"
#include"Compress.h"
#include<iostream>
#pragma warning( disable : 4996)
using namespace std;
//Compress
//InitHead
//Encode
//Str2byte
//WriteFile
char Str2byte(const char* pBinStr)
{
	char b = 0x00;
	for (int i = 0; i < 8; i++)
	{
		b = b << 1;
		if (pBinStr[i] == '1')
		{
			b = b | 0x01;
		}
	}
	return b;
}

int Compress(const char* pFilename)
{
	int weight[256] = { 0 };
	//以二进制打开文件
	FILE* in = fopen(pFilename, "rb");
	if (in == NULL)
	{
		cout << "Failed to open the file!" << endl;
		exit(0);
	}
	cout << "成功打开文件 " << pFilename << endl;
	int ch;
	while ((ch = getc(in)) != EOF)
	{
		weight[ch]++;
	}
	fclose(in);
	//cout << "Byte Weight" << endl;
	//for (int i = 0; i < SIZE; i++)
	//{
	//    printf("0x%02X %d\n", i, weight[i]);
	//}

	HuffmanTree hfmt;
	HfmTree(hfmt, weight, SIZE);
	cout << "成功生成哈夫曼树" << endl;
	//    TestHufTree(hfmt);
		//    TestHufTreeN(511, hfmt);
	HuffmanCode hfmc = (HuffmanCode)malloc((SIZE + 1) * sizeof(char*));
	//    for (int i = 1; i <= SIZE; i++)
	//        hfmt[i].weight = weight[i - 1]
		//根据哈夫曼树进行编码
	HuffmanCoding(hfmc, hfmt);
	cout << "成功完成哈夫曼编码" << endl;
	//    cout << "先序遍历哈夫曼树输出编码信息:" << endl;
	//    TestHufCode(2 * SIZE - 1, hfmt, hfmc);//测试哈夫曼编码
	//    cout << "压缩后的文件编码:" << endl;

		//计算编码缓冲区大小
	int nSize = 0;
	for (int i = 0; i < 256; i++)
	{
		nSize += weight[i] * strlen(hfmc[i + 1]);
	}
	nSize = (nSize % 8) ? nSize / 8 + 1 : nSize / 8;

	//    cout <<"nSize = "<<nSize << endl << endl;

		//对原文件进行压缩编码
	char* pBuffer = NULL;
	pBuffer = (char*)malloc(nSize * sizeof(char));
	memset(pBuffer, 0, (nSize) * sizeof(char));
	//    cout << "begin: " << strlen(pBuffer) << endl;
	    cout << "----";
	//    int n;
	//    cout << "input n:";
	//    cin >> n;
		//将编码写入缓冲区
	Encode(pFilename, hfmc, pBuffer, nSize);
	//    cout << "after: " << strlen(pBuffer) << endl;
	//    cout << "len of puf  = " << strlen(pBuffer) << endl;
	//    cout << "!pBuffer = " << !pBuffer << endl;
	if (!pBuffer)
	{
		cout << "!pBuffer = " << !pBuffer << endl;
		return -1;
	}
	cout << "\n压缩完毕" << endl;
	//for (int i = 1; i < strlen(pBuffer); i++)
	//{
	//    printf("%d", pBuffer[i]);
	//}

	HEAD sHead;
	InitHead(pFilename, sHead);
	cout << "原文件" << pFilename << "大小为:" << sHead.length << "Byte" << endl;
	int len_after = WriteFile(pFilename, sHead, pBuffer, nSize);
	cout << "大小为:" << len_after << "Byte \n头文件sHead大小为:" << sizeof(sHead) << "Byte" << endl;
	cout << "压缩比率:" << (double)len_after * 100 / sHead.length << "%" << endl;
	free(hfmt);
	free(hfmc);
	free(pBuffer);
	return OK;
}


int Encode(const char* pFilename, const HuffmanCode pHC, char* pBuffer, const int nSize)
{
	//开辟缓冲区
//    cout << "+++++";
	FILE* in = fopen(pFilename, "rb");
	if (in == NULL)
	{
		cout << "Failed to open the file!" << endl;
		exit(0);
	}
	pBuffer = (char*)malloc(nSize * sizeof(char));
	if (!pBuffer)
	{
		cerr << "开辟缓冲区失败" << endl;
		return -1;
	}
	cout << "loading";
	int sign = 0;//用于控制小数点输出
	char cd[SIZE] = { 0 };//工作区
	int pos = 0;//缓冲区指针
	int ch;
	//扫描文件,根据huffmman编码表对其进行压缩,压缩结果暂存到缓冲区中
	while ((ch = getc(in)) != EOF)
	{
		if (sign % 1000 == 1)
			printf(".");
		sign++;
		strcat(cd, pHC[ch + 1]);//从HC复制编码串到cd


		//打印压缩后的文件编码
//        printf("%s", pHC[ch + 1]);


		//压缩编码
		while (strlen(cd) >= 8)
		{
			//截取字符串左边的8个字符,编码成字节
			pBuffer[pos++] = Str2byte(cd);
			//字符串整体左移8个字节
			for (int i = 0; i < SIZE - 8; i++)
			{
				cd[i] = cd[i + 8];
			}
		}
	}
	if (strlen(cd) > 0)
	{
		pBuffer[pos++] = Str2byte(cd);
	}
	fclose(in);
	//for (int i = 1; i < nSize; i++)
	//{
	//    printf("%d ", pBuffer[i]);
	//}
//    cout << endl<<"before: " << strlen(pBuffer) << endl;
	return OK;
}

int InitHead(const char* pFilename, HEAD& sHead)
{
	//初始化文件头
	strcpy(sHead.type, "HUF");//文件类型
	sHead.length = 0;//原文件长度
	for (int i = 0; i < SIZE; i++)
	{
		sHead.weight[i] = 0;
	}
	FILE* in = fopen(pFilename, "rb");
	int ch;
	while ((ch = fgetc(in)) != EOF)
	{
		sHead.weight[ch]++;
		sHead.length++;
	}
	fclose(in);
	in = NULL;
	return OK;
}

int WriteFile(const char* pFilename, const HEAD sHead, const char* pBuffer, const int nSize)
{
	//生成文件名
	char filename[256] = { 0 };
	strcpy(filename, pFilename);
	strcat(filename, ".huf");
	//以二进制流形式打开文件
	FILE* out = fopen(filename, "wb");
	//写文件头
	fwrite(&sHead, sizeof(char), 1, out);
	//写压缩后的编码
	fwrite(pBuffer, sizeof(char), nSize, out);
	//关闭文件,释放文件指针
	fclose(out);
	out = NULL;
	cout << "生成压缩文件:" << filename << endl;
	int len = sizeof(HEAD) + strlen(pFilename) + 1 + nSize;
	return len;
}

Huffman.cpp

#include<iostream>
#include<cstring>
#include"huffman.h"
#pragma warning( disable : 4996)
using namespace std;
/*
void PreorderTraverse(int root, HuffmanTree pHT)
{
	cout << pHT[root].weight << " ";//访问节点
	if (pHT[root].lchild)//左孩子
	{
		PreorderTraverse(pHT[root].lchild, pHT);
	}
	if (pHT[root].rchild)//右孩子
	{
		PreorderTraverse(pHT[root].rchild, pHT);
	}
}
*/
int HuffmanCoding(HuffmanCode& pHC, HuffmanTree& pHT)
{
	//    pHC = (HuffmanCode)malloc((SIZE + 1) * sizeof(char*));
		//无栈非递归遍历 
	char cd[SIZE] = { '\0' };//记录访问路径
	int cdlen = 0;//记录当前路径长度
	for (int i = 1; i < 512; i++)
	{
		pHT[i].weight = 0;//遍历 Huffman树时用作节点的状态标志
	}

	int p = 2 * SIZE - 1;//根节点
	while (p != 0)
	{
		if (pHT[p].weight == 0)//向左
		{
			pHT[p].weight = 1;
			if (pHT[p].lchild != 0)
			{
				p = pHT[p].lchild;
				cd[cdlen++] = '0';
			}
			else if (pHT[p].rchild == 0)//登记叶子节点的字符编码
			{
				pHC[p] = (char*)malloc((cdlen + 1) * sizeof(char));
				cd[cdlen] = '\0';
				strcpy(pHC[p], cd);//复制编码
			}
		}
		else if (pHT[p].weight == 1)//向右
		{
			pHT[p].weight = 2;
			if (pHT[p].rchild != 0)//右孩子为叶子节点
			{
				p = pHT[p].rchild;
				cd[cdlen++] = '1';
			}
		}
		else
		{
			//退回父节点,编码长度减1
			pHT[p].weight = 0;
			p = pHT[p].parent;
			--cdlen;
		}
		//        printf("*");
	}
	return OK;
}

int Select(HuffmanTree pHT, int nSize)
{
	int minValue = 0x7FFFFFFF;//最小值
	int min = 0;
	//找到最小权值的元素序号
	for (int i = 1; i <= nSize; i++)
	{
		if (pHT[i].parent == 0 && pHT[i].weight < minValue)
		{
			minValue = pHT[i].weight;
			min = i;
		}
	}
	return min;
}

void TestHufTree(HuffmanTree pHT)
{
	for (int i = 1; i < 2 * SIZE; i++)
	{
		printf("pHT[%d]\t%d\t%d\t%d\t%d\n", i, pHT[i].weight, pHT[i].parent, pHT[i].lchild, pHT[i].rchild);
	}
}

int HfmTree(HuffmanTree& pHT, int* w, int n)
{
	int m = 2 * n - 1;
	pHT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));
	if (!pHT)
	{
		cerr << "内存分配失败! " << endl;
		return -1;
	}
	//初始化树
	HuffmanTree p = pHT + 1;//0号单元不使用
	for (int i = 0; i < m; i++)
	{
		p->weight = (i < n) ? w[i] : 0;
		p->parent = 0;
		p->lchild = 0;
		p->rchild = 0;
		p++;
	}
	for (int i = n + 1; i <= m; i++)
	{
		//第一个最小元素
		int s1 = Select(pHT, i - 1);//找出前i-1个中最小元素
		pHT[s1].parent = i;

		//第二个最小元素
		int s2 = Select(pHT, i - 1);
		pHT[s2].parent = i;

		pHT[i].weight = pHT[s1].weight + pHT[s2].weight;
		pHT[i].lchild = s1;
		pHT[i].rchild = s2;
	}
	return 0;
}

void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC)
{
	if (pHT[root].lchild == 0 && pHT[root].rchild == 0)
	{
		printf("0x%02X %s\n", root - 1, pHC[root]);
	}
	if (pHT[root].lchild)//访问左孩子
	{
		TestHufCode(pHT[root].lchild, pHT, pHC);
	}
	if (pHT[root].rchild)
	{
		TestHufCode(pHT[root].rchild, pHT, pHC);
	}
}

void TestHufTreeN(int root, HuffmanTree pHT)
{
	cout << pHT[root].weight << "\t" << pHT[root].lchild << "\t" << pHT[root].rchild << "\t" << pHT[root].parent << "\n";
	if (pHT[root].lchild != 0)
	{
		TestHufTreeN(pHT[root].lchild, pHT);
	}
	if (pHT[root].rchild != 0)
	{
		TestHufTreeN(pHT[root].rchild, pHT);
	}
}

main.cpp

#include"huffman.h"
#include"Compress.h"
#include<iostream>
#include<cstdlib>
using namespace std;
#pragma warning( disable : 4996)

int main()
{
	cout << "= = = = = = = =Huffman 文件压缩= = = = = = = =" << endl;
	cout << "请输入文件名:";
	char filename[256];
	cin >> filename;
	Compress(filename);


	//    system("pause");
	return 0;
}
Logo

为武汉地区的开发者提供学习、交流和合作的平台。社区聚集了众多技术爱好者和专业人士,涵盖了多个领域,包括人工智能、大数据、云计算、区块链等。社区定期举办技术分享、培训和活动,为开发者提供更多的学习和交流机会。

更多推荐