I'm a newbie and I have tried to implement Strassen's algorithm to multiply two NxN matrices. I'm currently working on even dimensions. I'm getting a segmentation fault for values of N greater than 4.
After debugging I figured that the segmentation fault is encountered before the first call to the multiply function and immediately after displaying the two matrices.
Any help is appreciated.
Thanks very much!
#define N 8
typedef struct matrix
{
int rs;
int re;
int cs;
int ce;
int a[N][N];
}matrix;
void display(matrix);
matrix multiply(matrix, matrix);
matrix add(matrix, matrix);
matrix sub(matrix, matrix);
int main(int argc, char *argv[])
{
matrix m1;
matrix m2;
matrix result;
printf("Enter the values for matrix one: ");
for (int i=0; i<N; i++ )
{
for (int j=0; j<N ; j++)
scanf(" %d", &m1.a[i][j]);
}
printf("Enter the values for matrix two: ");
for (int i=0; i<N; i++ )
{
for (int j=0; j<N ; j++)
scanf(" %d", &m2.a[i][j]);
}
m1.rs = m2.rs = m1.cs = m2.cs = 0;
m1.re = m2.re = m1.ce = m2.ce = N-1;
display(m1);
display(m2);
**I believe the first call to multiply is not executed and there's a segmentation fault here.**
result = multiply(m1, m2);
display(result);
}
void display(matrix mat)
{
for(int i=0;i<=mat.re;i++)
{
for(int j=0;j<=mat.ce;j++)
printf("%d ", mat.a[i][j]);
printf("\n");
}
printf("\n");
}
matrix multiply(matrix m1, matrix m2)
{
int dimension = m1.re - m1.rs + 1;
matrix result;
result.rs = result.cs = 0;
result.re = result.ce = dimension -1;
if (dimension <=2)
{
matrix m3 = m1;
int a, b, c, d, e, f, g, h;
a = m1.a[m1.rs][m1.cs];
b = m1.a[m1.rs][m1.cs + 1];
c = m1.a[m1.rs + 1][m1.cs];
d = m1.a[m1.rs + 1][m1.cs + 1];
e = m2.a[m2.rs][m2.cs];
f = m2.a[m2.rs][m2.cs + 1];
g = m2.a[m2.rs + 1][m2.cs];
h = m2.a[m2.rs + 1][m2.cs + 1];
m3.a[m3.rs][m3.cs] = a*e + b*g;
m3.a[m3.rs][m3.cs+1] = a*f + b*h;
m3.a[m3.rs+1][m3.cs] = c*e + d*g;
m3.a[m3.rs+1][m3.cs+1] = c*f + d*h;
return m3;
}
else
{
matrix A, B, C, D, E, F, G, H;
matrix P1, P2, P3, P4, P5, P6, P7;
matrix R1, R2, R3, R4;
A = B = C = D = m1;
E = F = G = H = m2;
A.rs = m1.rs;
A.re = m1.re/2;
A.cs = m1.cs;
A.ce = m1.ce/2;
B.rs = m1.rs;
B.re = m1.re/2;
B.cs = (m1.ce/2)+1;
B.ce = m1.ce;
C.rs = (m1.re/2)+1;
C.re = m1.re;
C.cs = m1.cs;
C.ce = m1.ce/2;
D.rs = (m1.re/2)+1;
D.re = m1.re;
D.cs = (m1.ce/2)+1;
D.ce = m1.ce;
E.rs = m2.rs;
E.re = m2.re/2;
E.cs = m2.cs;
E.ce = m2.ce/2;
F.rs = m2.rs;
F.re = m2.re/2;
F.cs = (m2.ce/2)+1;
F.ce = m2.ce;
G.rs = (m2.re/2)+1;
G.re = m2.re;
G.cs = m2.cs;
G.ce = m2.ce/2;
H.rs = (m2.re/2)+1;
H.re = m2.re;
H.cs = (m2.ce/2)+1;
H.ce = m2.ce;
P1 = multiply(A, sub(F, H));
P2 = multiply(add(A, B), H);
P3 = multiply(add(C, D), E);
P4 = multiply(D, sub(G, E));
P5 = multiply(add(A, D), add(E, H));
P6 = multiply(sub(B, D), add(G, H));
P7 = multiply(sub(A,C), add(E, F));
R1 = add(add(P5, P6), sub(P4,P2));
R2 = add(P1, P2);
R3 = add(P3, P4);
R4 = sub(sub(add(P1,P5), P3), P7);
int m1_i, m1_j;
int i,j;
for(m1_i=R1.rs, i=0; m1_i<=R1.re; m1_i++, i++)
{
for(m1_j=R1.cs, j=0; m1_j<=R1.ce; m1_j++, j++)
result.a[i][j] = R1.a[m1_i][m1_j];
}
for(m1_i=R2.rs, i=0; m1_i<=R2.re; m1_i++, i++)
{
for(m1_j=R2.cs, j=dimension/2; m1_j<=R2.ce; m1_j++, j++)
result.a[i][j] = R2.a[m1_i][m1_j];
}
for(m1_i=R3.rs, i=dimension/2; m1_i<=R3.re; m1_i++, i++)
{
for(m1_j=R3.cs, j=0; m1_j<=R3.ce; m1_j++, j++)
result.a[i][j] = R3.a[m1_i][m1_j];
}
for(m1_i=R4.rs, i=dimension/2; m1_i<=R4.re; m1_i++, i++)
{
for(m1_j=R4.cs, j=dimension/2; m1_j<=R4.ce; m1_j++, j++)
result.a[i][j] = R4.a[m1_i][m1_j];
}
return result;
}
}
matrix add(matrix m1, matrix m2)
{
matrix result;
int m1_i, m1_j, m2_i, m2_j, i, j;
result.rs = result.cs = 0;
result.re = result.ce = m1.re - m1.rs;
for(m1_i = m1.rs, m2_i = m2.rs, i=0; m1_i<=m1.re; m1_i++, m2_i++, i++)
{
for(m1_j=m1.cs, m2_j = m2.cs, j=0; m1_j<=m1.ce; m1_j++, m2_j++, j++)
result.a[i][j] = m1.a[m1_i][m1_j] + m2.a[m2_i][m2_j];
}
return result;
}
matrix sub(matrix m1, matrix m2)
{
matrix result;
int m1_i, m1_j, m2_i, m2_j, i, j;
result.rs = result.cs = 0;
result.re = result.ce = m1.re - m1.rs;
for(m1_i = m1.rs, m2_i = m2.rs, i=0; m1_i<=m1.re; m1_i++, m2_i++, i++)
{
for(m1_j=m1.cs, m2_j = m2.cs, j=0; m1_j<=m1.ce; m1_j++, m2_j++, j++)
result.a[i][j] = m1.a[m1_i][m1_j] - m2.a[m2_i][m2_j];
}
return result;
}
Your program is entering an infinite recursion in multiply()
, gdb shows a stack trace like this:
#0 0x0000000000400899 in multiply (m1=..., m2=...) at b.c:60
#1 0x0000000000400fc7 in multiply (m1=..., m2=...) at b.c:140
#2 0x0000000000401584 in multiply (m1=..., m2=...) at b.c:142
#3 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#4 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#5 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#6 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#7 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#8 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#9 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#10 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
(...)
#421 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#422 0x0000000000401859 in multiply (m1=..., m2=...) at b.c:143
#423 0x000000000040067c in main (argc=<optimized out>, argv=<optimized out>) at b.c:43
This likely causes a stack overflow which causes the crash you observe. Here is the code around line 143:
140 P1 = multiply(A, sub(F, H));
141 P2 = multiply(add(A, B), H);
142 P3 = multiply(add(C, D), E);
143 P4 = multiply(D, sub(G, E));
144 P5 = multiply(add(A, D), add(E, H));
145 P6 = multiply(sub(B, D), add(G, H));
146 P7 = multiply(sub(A,C), add(E, F));
I suspect the logic you use to compute the sizes of the subarrays is faulty. Further digging shows that m2
has negative dimensions in the recursive calls, you should do some further debugging to find out what exactly went wrong.
Another issue my compiler warns about is this:
b.c: In function 'multiply':
b.c:166:28: warning: array subscript is above array bounds [-Warray-bounds]
result.a[i][j] = R2.a[m1_i][m1_j];
^
b.c:178:28: warning: array subscript is above array bounds [-Warray-bounds]
result.a[i][j] = R4.a[m1_i][m1_j];
Here are the corresponding source lines:
163 for(m1_i=R2.rs, i=0; m1_i<=R2.re; m1_i++, i++)
164 {
165 for(m1_j=R2.cs, j=dimension/2; m1_j<=R2.ce; m1_j++, j++)
166 result.a[i][j] = R2.a[m1_i][m1_j];
167 }
...
175 for(m1_i=R4.rs, i=dimension/2; m1_i<=R4.re; m1_i++, i++)
176 {
177 for(m1_j=R4.cs, j=dimension/2; m1_j<=R4.ce; m1_j++, j++)
178 result.a[i][j] = R4.a[m1_i][m1_j];
179 }