MPI矩阵向量乘法

矩阵向量乘法MPI

呼啦啦呼啦啦啦啦啦啦

1913人浏览 · 2022-10-28 16:52:44

呼啦啦呼啦啦啦啦啦啦 · 2022-10-28 16:52:44 发布

文章目录

前言
一、实验原理与提示
二、实验源代码
三、实验效果
四、实验遇到的问题以及解决方法
总结

前言

完整的MPI矩阵向量乘法的算法，并在分布式环境下编译、排错、调试、运行、优化。

一、实验原理与提示

为方便矩阵的生成和计算结果的验证，可以通过自定义函数直接生成单位矩阵和元素全为1的向量用于计算。要特别注意注意C语言中传递二维数组给函数时，只能以一维数组的形式传递，并在函数内部把一维数组视为二维数组使用。

二、实验源代码

代码如下：

#include<stdio.h>
#include<mpi.h>
#include<stdlib.h>
void Get_input(int my_rank,int *m,int *n)
{
	if(my_rank==0){
		printf("Please enter m,n:\n");
		scanf("%d %d",m,n);
	}
	MPI_Bcast(m,1,MPI_INT,0,MPI_COMM_WORLD);
	MPI_Bcast(n,1,MPI_INT,0,MPI_COMM_WORLD);
}
void Get_matrix(int n, int m, double *local_matrix, int local_m, int my_rank)
{
    double *A;
    if (!my_rank)
    {
        A = (double *)malloc(m * n * sizeof(double));
        printf("Please enter the matrix:\n");
        for (int i = 0; i < m; ++i)
            for (int j = 0; j < n; ++j)
                scanf("%lf", &A[i * n + j]);
    }

    MPI_Scatter(A, local_m * n, MPI_DOUBLE, local_matrix, local_m * n, MPI_DOUBLE, 0, MPI_COMM_WORLD);
}

void Print_matrix(int my_rank,int n,int m,int local_m,double *local_matrix,MPI_Comm comm)
{
	double *matrix = NULL;
	int i,j;
	if(my_rank==0)
	{
		matrix = malloc(m*n*sizeof(double));
		MPI_Gather(local_matrix,local_m*n,MPI_DOUBLE,matrix,local_m*n,MPI_DOUBLE,0,comm);
		printf("The matrix is:\n");
		for(i=0;i<m;++i)
		{
			for(j=0;j<n;++j)
			{
				printf("%f ",matrix[i*n+j]);
			}
			printf("\n");
		}
		free(matrix);
	}
	else{
		MPI_Gather(local_matrix,local_m*n,MPI_DOUBLE,matrix,local_m*n,MPI_DOUBLE,0,comm);
	}
}

void Get_vector(int my_rank,int n,int local_n,double *local_vector,MPI_Comm comm)
{
	double *vector = NULL;
	int i;
	if(my_rank==0)
	{
		vector=(double *)malloc(n*sizeof(double));
		printf("Please enter the vector:\n");
		for(i=0;i<n;i++)
		{
			scanf("%lf",&vector[i]);
		}
	}
	printf("\n");
	MPI_Scatter(vector,local_n,MPI_DOUBLE,local_vector,local_n,MPI_DOUBLE,0,comm);
}

void Print_vector(int my_rank,int n,int local_n,double *local_vector,MPI_Comm comm)
{
	double *vector = NULL;
	int i,j;
	if(my_rank==0)
	{
		vector = malloc(n*sizeof(double));
		MPI_Gather(local_vector,local_n,MPI_DOUBLE,vector,local_n,MPI_DOUBLE,0,comm);
		printf("The vector is:\n");
		for(i=0;i<n;i++){
			printf("%f ",vector[i]);
		}
		printf("\n");
		free(vector);
	}
	else{
		MPI_Gather(local_vector,local_n,MPI_DOUBLE,vector,local_n,MPI_DOUBLE,0,comm);
	}
}


void Mat_vect_mult(double *local_matrix,double *local_vector,double *local_y,int local_m,int n,int local_n,MPI_Comm comm)
{
	int local_i,j;
	double *x;
	
	x=malloc(n*sizeof(double));

	MPI_Allgather(local_vector,local_n,MPI_DOUBLE,x,local_n,MPI_DOUBLE,comm);
	
	for(local_i=0;local_i<local_m;local_i++)
	{
		local_y[local_i]=0.0;
		for(j=0;j<n;j++)
		{
			local_y[local_i]+=local_matrix[local_i*n+j]*x[j];
		}
	}
	free(x);
}

void Print_y(int my_rank,double *local_y,int m,int local_m,MPI_Comm comm)
{
	double *y=NULL;
	int i;
	if(my_rank==0){
		y=malloc(m*sizeof(double));
		MPI_Gather(local_y,local_m,MPI_DOUBLE,y,local_m,MPI_DOUBLE,0,comm);
		printf("The vector y is:\n");
		for(i=0;i<m;i++)
		{
			printf("%lf ",y[i]);
		}
		printf("\n");
		free(y);
	}
	else{
		MPI_Gather(local_y,local_m,MPI_DOUBLE,y,local_m,MPI_DOUBLE,0,comm);
	}
}

void main()
{
	int comm_sz,my_rank,i;
	int m,n,local_m,local_n;
	double *local_matrix,*local_vector;
	double *local_y;
	
	MPI_Init(NULL,NULL);
	MPI_Comm_size(MPI_COMM_WORLD,&comm_sz);
	MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);
	
	Get_input(my_rank,&m,&n);
	local_m=m/comm_sz;
	local_n=n/comm_sz;
	local_matrix=(double *)malloc(local_m*n*sizeof(double));
	local_vector=(double *)malloc(local_n*sizeof(double));
	local_y=(double *)malloc(local_m*sizeof(double));

	Get_matrix(n,m,local_matrix,local_m,my_rank);
	Print_matrix(my_rank,n,m,local_m,local_matrix,MPI_COMM_WORLD);
	Get_vector(my_rank,n,local_n,local_vector,MPI_COMM_WORLD);
	Print_vector(my_rank,n,local_n,local_vector,MPI_COMM_WORLD);
	Mat_vect_mult(local_matrix,local_vector,local_y,local_m,n,local_n,MPI_COMM_WORLD);
	Print_y(my_rank,local_y,m,local_m,MPI_COMM_WORLD);	
	MPI_Finalize();	
}

三、实验效果

1.4*4矩阵向量乘法的实现。
在这里插入图片描述

四、实验遇到的问题以及解决方法

问题1：无法动态生成矩阵的内存
解决方法：还是C语言的基础知识掌握不牢，学习malloc的函数使用进行解决。
问题2：该矩阵只能应用在能平均分配的基础上，即整除的情况需要进一步优化程序。
解决方法：进一步改变程序，通过让某一个进程担任不能整除多出来的进行解决。

总结

本次实验收获颇多，首先实现了生成矩阵并且动态分配空间，其次实现矩阵与向量的相乘，矩阵和向量相乘实现难度并不大，复杂的是如何分配给各个进程来实现，从中能学习到很多并行计算的知识，理解并行计算的思想，受益匪浅。

更专业、系统、实战的高性能计算学习资源地

汇聚原天河团队并行计算工程师、中科院计算所专家以及头部AI名企HPC专家，助力解决“卡脖子”问题

更多推荐

计算机广东考研大学排名,广东省最值得考研的8所大学

本文由木哥原创，欢迎转载分享，转载须注明来自百家号“木哥说教”！今日话题：广东省最值得大家考研的8所大学！广东省虽然不是中国好大学最多的省份，但是却是很多名校毕业生最想去的地方，俗话说北上广深，广东就占据两位，足以看得出来广东是非常受大学生欢迎的一个城市，其实广东也有很多好的大学，对于一些想考研的学生也可以参考一下，木哥给大家梳理了8所比较不错的大学出来了，希望能帮到大家。第一所大学：中山大学又叫

高性能计算社区

【考研英语】词汇积累（详细全面，2023最新版）

缺课miss a class、旷课cut a class、辍学drop out、退学quit school、助学金assistantship、奖学金scholarship。老人the elderly、the old、the aged、the senior、aged parents、the old and infirm parents。只有only、nothing but、merely、大约abou

高性能计算社区

计算机类专业学习难度排行,全国33所名校计算机专业考研跨考难度系数排名

计算机系统结构前20名(总共47所高校)1.清华大学，2.华中科技大学，3.西安交通大学，4.上海交通大学，5.浙江大学，6.西安电子科技大学，7.武汉大学，8.复旦大学，9.哈尔滨工业大学，10.东北大学，11.北京大学，12.东南大学，13.北京航空航天大学，14.中国科学技术大学，15.电子科技大学，16.吉林大学，17.南京理工大学，18.重庆大学，19.北京科技大学，20.同济大学计算机