single thread -multiple threads

Discuss the OpenMP 4.0 Examples document.

single thread -multiple threads

Postby grepi » Sun Jul 19, 2015 1:37 am

Hello,
the program fresnel.cpp generates Fresnel diffraction of a circular optic and a rectangle aperture(Hecht Optics) :

int main(int argc, char *argv[])
{

int i, j, k;

double R[3][3], Ry[3][3], Rz[3][3];
double alpha = 0, beta = 0;
double delta_alpha= M_PI/180.0, delta_beta = M_PI/180.0; //1 Grad
double fresnelarray[2000][3];
point O;
double start_alpha, stop_alpha, start_beta, stop_beta;
dcomplex E_nr(0, 0);
dcomplex E_PH(0,0);
double P_width[2], P_height[2];// [0] in min [1] is max
ofstream myfile;
myfile.open("example.dat");
int Pj=0, Pi=0;
double max_P = -10;
double min_P = 10;
double lambda=0;
double K_theta=0;
double Er = 0;
double Ei = 0;
int dii = 50 , djj = 50 ;
double h;
int num_thr, thr_id;
// (double *) private_tile = NULL;




typedef struct PPstruct
{
dcomplex * E;
int * C;
} PPstruct;




process_args(argc, argv);

// data generated by numerical recipes
// if du < 0.2 --->> fresnelarray[u*10000][0]
// fresneclC(u) --> fresnelarray[u*10000][1]
// fresneclS(u) --> fresnelarray[u*10000][2]
// read fresnel values from file and store it in A use get_fresnel(A, t)

ifstream fresneli;
fresneli.open("fresnel_0_0_2.txt");
for(i = 0;i < 2000;i++)
fresneli >> fresnelarray[i][0] >> fresnelarray[i][1] >> fresnelarray[i][2];




// optic definition D in [mm]

//(3 points on circle is circle fixed)
// a) -m + DD = r
// b) m^2 + (D/2)^2 = r^2

// a') m^2 - 2mDD + DD^2 = r^2
// b-a') (D/2)^2 +2mDD -DD^2 = 0
// c) m = (-(D/2)^2 + DD^2)/ 2DD;
// d) r = -m + DD

double D = 20; // [mmm] optic diameter
double DD = 2.0; //[mm] distance on x axis

double m[] = { (-(D/2.0)*(D/2.0) + DD*DD)/(2.0*DD) , 0, 0}; // [mm]
double r = - m[0] + DD; // [mm]
double optic_cut_with_x_eq_0_axis = sqrt( double( r*r - m[0] * m[0]) );


cout << "optic_cut_with_x_eq_0_axis (+) = " << optic_cut_with_x_eq_0_axis << " m[0] = " << m[0] << " r = " << r << endl;

// Optic center is on x=0, y=0, z=0, S and Q defines P on d_OA plane


/*
| y |
| + P
| / |
| / |
| | / |
| | / |
| | / |
--------------+ | / |
| / |
| + O |
--------------+-----/----------------+----------> x
| / |
| / |
--------------+ S | |
x=0 D_OA D_AP

Optic Q plane P plane

[mm]
*/

// compute Pmin/max for x = D_AP plane
// compute first optic cut x = 0 plane

double h_width_min = - r*r/ ( (m[0] -DD)*(m[0] -DD) ); // [mm]
double h_width_max = - h_width_min; // [mm]


//h_height_min = h_width_min for a circular optic

double h_height_min = h_width_min; // [mm]
double h_height_max = - h_width_min; // [mm]




int no_detectors_height = 500;
int no_detectors_width = 500;
double buf[no_detectors_width];
//unsigned char bufo[no_detectors_width];

double detector_height = 20.0; // [micro m]
double detector_width = 20.0; // [micro m]
double FPA_height_half = no_detectors_height * detector_height/1.0e3; // [mm]
double FPA_width_half = no_detectors_width * detector_width /1.0e3 ; // [mm]

short PlanarConfiguration = 1;
short BitsPerSample = 64;
short SamplesPerPixel = 1;
short PhotometricInterpretation = 1;
short SampleFormat = 3;

TIFF * tiff = TIFFOpen("FRESNEL_data.tif", "w");

TIFFSetField( tiff, TIFFTAG_IMAGEWIDTH, no_detectors_width);
TIFFSetField( tiff, TIFFTAG_IMAGELENGTH, no_detectors_height);
TIFFSetField( tiff, TIFFTAG_PHOTOMETRIC, PhotometricInterpretation);
TIFFSetField( tiff, TIFFTAG_SAMPLEFORMAT, SampleFormat);
TIFFSetField( tiff, TIFFTAG_SAMPLESPERPIXEL, SamplesPerPixel );
TIFFSetField( tiff, TIFFTAG_PLANARCONFIG, PlanarConfiguration);
TIFFSetField( tiff, TIFFTAG_BITSPERSAMPLE, BitsPerSample);
int ScanlineSize = TIFFScanlineSize(tiff);

double C[500][500] = {0};



// for display output
TIFF * tiffo = TIFFOpen("F.tif", "w");
TIFFSetField( tiffo, TIFFTAG_IMAGEWIDTH, no_detectors_width);
TIFFSetField( tiffo, TIFFTAG_IMAGELENGTH, no_detectors_height);
TIFFSetField( tiffo, TIFFTAG_PHOTOMETRIC, PhotometricInterpretation);
TIFFSetField( tiffo, TIFFTAG_SAMPLESPERPIXEL, 1 );
TIFFSetField( tiffo, TIFFTAG_PLANARCONFIG, 1 );
TIFFSetField( tiffo, TIFFTAG_BITSPERSAMPLE, 8 );
int ScanlineSizeo = TIFFScanlineSize(tiffo);

unsigned char * bufo = (unsigned char *) calloc(no_detectors_width, sizeof(unsigned char));

PPstruct * PPs = (PPstruct * ) calloc(1,sizeof(PPstruct));

PPs->E = (dcomplex *) calloc(no_detectors_height*no_detectors_width, sizeof(dcomplex));
PPs->C = (int * ) calloc(no_detectors_height*no_detectors_width, sizeof(int) );

lambda = 9.5e-3; // reference wave length [mm]





// compute angle P.x = D_AP P.y = P.z = 0 through aperture to optic




// loops, loops, loops



compute_bounding_Szy_on_optic( r, m, optic_cut_with_x_eq_0_axis, FPA_height_half, FPA_width_half, &start_alpha, &stop_alpha , &start_beta, &stop_beta);


#ifdef _OPENMP

#pragma omp parallel private(thr_id)

num_thr = omp_get_num_threads();
thr_id = omp_get_thread_num();

if ( thr_id == 0 )
{
cout << " thr_id = " << thr_id << endl;
cout << " num_thr = " << num_thr << endl;
}

#endif


double no_alpha = 30.0;
double no_beta = 30.0;
delta_alpha = ( -start_alpha + stop_alpha ) / no_alpha;
delta_beta = ( -start_beta + stop_beta ) / no_beta;

// Sy[0] on optic
// Sy[0] -= m
// Sy[0] = Rz(alpha) (r,0,0)' compute alpha : alpha = acos ( Sz[0] *(0,1,0)' ) / r
// Sy[0] += m


//for varying point S input on optic

for ( alpha = start_alpha; alpha < stop_alpha + delta_alpha; alpha += delta_alpha)
{

Rz[0][0] = cos(alpha); Rz[0][1] = -sin(alpha); Rz[0][2] = 0;
Rz[1][0] = sin(alpha); Rz[1][1] = cos(alpha); Rz[1][2] = 0;
Rz[2][0] = 0.0; Rz[2][1] = 0.0; Rz[2][2] = 1;



//int kk;
//int beta_end = (int) (M_PI / delta_beta +1);

for ( beta = start_beta; beta < stop_beta + delta_beta; beta += delta_beta)
{

Ry[0][0] = cos(beta); Ry[0][1] = 0.0; Ry[0][2] = sin(beta);
Ry[1][0] = 0.0; Ry[1][1] = 1.0; Ry[1][2] = 0;
Ry[2][0] = -sin(beta); Ry[2][1] = 0.0; Ry[2][2] = cos(beta);



for (j=0;j<3;j++)
for(i=0;i<3; i++)
{
h = 0.0;
for (k= 0;k < 3; k++)
h += Rz[j][k] * Ry[k][i]; // variate beta first then alpha

R[j][i] = h;
}

double SS[]= {DD, 0 , 0.};

double QQ[3] = {0, 0, 0};

S.x = DD;
S.y = 0;
S.z = 0;

SS[0] = r;
SS[1] = 0;
SS[2] = 0;

QQ[0] = 0;
QQ[1] = 0;
QQ[2] = 0;

for (j=0;j < 3; j++)
{
h = 0;
for (k= 0;k < 3; k++)
h += R[j][k] * SS[k]; // variate beta first then alpha

QQ[j] = h;
}

QQ[0] += m[0];
QQ[1] += m[1];
QQ[2] += m[2];

S.x = QQ[0];
S.y = QQ[1];
S.z = QQ[2];

// S defined




// set P to detectors; S and P defines point O on aperture windows



// construct bucket tiles of FPA for parallel processing FPA tiles

int ni=1,nj=1;

int image_width = no_detectors_width;
int image_height = no_detectors_height;

int bucketsize_width = int (image_width / num_thr );
int bucketsize_height = int ( image_height / num_thr ) ;


while ( (no_detectors_width / ni) >= bucketsize_width ) ni++;
while ( (no_detectors_height / nj ) >= bucketsize_height) nj++;
// cout << " i = " << ni-1.0 << " j = " << nj-1.0 << endl;

ni--;
nj--;
int jj = 0;
int ii = 0;

// there are ni*nj tiles!



for (j=0; j<nj; j++)
{


// second for loop are processed in parallel with index k

#pragma omp parallel for shared(j) private( k, ii, jj, dii, djj, K_theta ) reduction (+:Er,Ei)
for ( int k=0; k<ni; k++)
{

dii = min(bucketsize_width, no_detectors_width - k * bucketsize_width);
djj = min(bucketsize_height, no_detectors_height - j * bucketsize_height);
//if ( dii == 0 || djj == 0 )
// break;


double * private_tile = ( double* ) malloc(dii * djj * sizeof(double));
int jlow = j * djj;
int klow = k * dii;
// random_device rd;
//mt19937 e2(rd());
//normal_distribution<> normal_dist(0, 1);


for (jj=0; jj < djj; jj++)
{
for (ii=0; ii < dii; ii++)
{
//for( int kk=0; kk< 1; kk++)
//{

int ip = klow + ii;
int jp = jlow + jj;

// Varying point P on FPA lambda in [mm]
//double rdnx = 1.0e-9 * detector_width * normal_dist(e2);
//double rdny = 1.0e-9 * detector_height * normal_dist(e2);

P.x = D_AP ; // [mm]
P.y = jp * detector_height/1.0e3 - FPA_height_half + detector_height/2.0e3;// + rdnx; // [mm]
P.z = ip * detector_width/1.0e3 - FPA_width_half + detector_width/2.0e3;// + rdny; // [mm]

double l = (D_OA - D_AP) / (S.x - D_AP);
O.x = D_OA; //
O.y = (S.y - P.y) * l + P.y;

K_theta = Ktheta( m, S, P );
E_PH = integrate_PH( S, O, P, K_theta, lambda, fresnelarray );
*(private_tile + jj * dii + ii) = E_PH.real() * E_PH.real() + E_PH.imag() * E_PH.imag();


Er += E_PH.real();
Ei += E_PH.imag();


PPs->E[(j+jj) * no_detectors_width + k + ii] += E_PH;
PPs->C[(j+jj) * no_detectors_width + k + ii]++;

double h = E_PH.real() * E_PH.real() + E_PH.imag() * E_PH.imag();
if ( h > max_P ) max_P = h;
if ( h < min_P ) min_P = h;
// } // kk
} // for ii =
} // for jj =




// #pragma omp reduction(+:C[jj,0,djj][ii,0,dii] )
// #pragma omp critical geht!
// #pragma omp atomic Fehler bei for

#ifdef _OPENMP
#pragma omp reduction(+:C[jj,0,djj][ii,0,dii] )
#endif

for (jj=0; jj< djj; jj++)
for (ii=0; ii< dii; ii++)
C[jj+jlow][ii+klow] += *( private_tile + jj * dii + ii) ;

free(private_tile);

} // for k=0 ... parallel

} // for j




} // beta

cout << " alpha = " << alpha << endl;
} // alpha



//if ( myfile )
// myfile << fixed << setprecision(12) << E_p.real() << " " << E_p.imag() << endl;




cout << "min_P = " << min_P << " max_P = " << max_P << endl;




for (j=0; j < no_detectors_width; j++)
{
for (i=0; i < no_detectors_width; i++)
{
//
//buf[i] = PPs->E[j*no_detectors_width +i].real() * PPs->E[j*no_detectors_width +i].real() +
// PPs->E[j*no_detectors_width +i].imag() * PPs->E[j*no_detectors_width +i].imag();

buf[i] = C[j][i];

* ( bufo + i ) = (unsigned char) ( 255.0 * (buf[i] - min_P ) / ( max_P - min_P) );

}
TIFFWriteScanline(tiff, buf , j, 0);
TIFFWriteScanline(tiffo, bufo , j, 0);

}

TIFFClose(tiff);
TIFFClose(tiffo);


//myfile.close();
return 0;

}

If you want to get the images of single thread and multiple threads (image with noise) send an email to me.

Cheers,

grepi
grepi
 
Posts: 3
Joined: Sat Jul 18, 2015 2:00 am

Re: single thread -multiple threads

Postby grepi » Sun Jul 19, 2015 3:09 am

Hello,

to clarification to my post the multiple threads code with noise is wrong because it differs from single thread but I don't know why.

Cheers,

grepi
grepi
 
Posts: 3
Joined: Sat Jul 18, 2015 2:00 am

Re: single thread -multiple threads

Postby grepi » Thu Aug 06, 2015 6:36 am

Hello,

I have updated my fresnel program to new_fresnel.cpp that is more openmp like.

new openmp program new_fresnel.cpp generates an image(500,500) of 8 bit tiff format:

complex C(j,i) += f * E0 * E(S, O, P(j,i) )

f, E0=1 real factor

complex E() energy on line S-O-P(j,i)

all S on optic (alpha,beta,30x30)

O in rectangle aperture window 20x20 mm^2

D_OA distance Optic-Aperture 40 mm for images in tar.
D_AP distance Aperture P-plane 60 mm

P in P-plane(D_AP,y,z) detector_position



output(j,i) = C(j,i).real()^2 + C(j,i).imag()^2





usage ./new_fresnel -set_num_threads 1 -ofile A.tif -D_OA 40 -D_AP 60
./new_fresnel -S 2 0 0 -P D_AP 0 0 for a single ray gives nice
Fraunhofer diffraction but wrong Fresnel diffraction.

new_fresnel uses file fresnel_0_0_2.txt which contains lower fresnels(c) values.

Up to now I have seen Fresnel diffraction values on axis from a book
and they are similiar to new_fresnel values.

It is a pity , that Openmp works fine with thread=1 and with threads
n > 1 with increasing noise(n). In the tar file several tifs are
generated with gcc-5.2 on mac os(Fink-Home).
but it runs also on every linux system(see head of file new_fresnel.cpp).

Regards,

grepi
Attachments
fresnel.tar.gz
new_fresnel.cpp and image files to show the noise
(803.84 KiB) Downloaded 457 times
grepi
 
Posts: 3
Joined: Sat Jul 18, 2015 2:00 am


Return to OpenMP 4.0 Examples

Who is online

Users browsing this forum: No registered users and 1 guest

cron