/* MYE023
 * Sobel filter
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <time.h>
#include <math.h>
#include <sys/time.h>
#include <omp.h>

#pragma pack(push, 2)          
	typedef struct bmpheader_ 
	{
		char sign;
		int size;
		int notused;
		int data;
		int headwidth;
		int width;
		int height;
		short numofplanes;
		short bitpix;
		int method;
		int arraywidth;
		int horizresol;
		int vertresol;
		int colnum;
		int basecolnum;
	} bmpheader_t;
#pragma pack(pop)

/* This is the image structure, containing all the BMP information 
 * plus the RGB channels.
 */
typedef struct img_
{
	bmpheader_t header;
	int rgb_width;
	unsigned char *imgdata;
	unsigned char *red;
	unsigned char *green;
	unsigned char *blue;
} img_t;

void sobel_serial(img_t *, img_t *);
void sobel_omp_device(img_t *, img_t *);


/* START of BMP utility functions */
static
void bmp_read_img_from_file(char *inputfile, img_t *img) 
{
	FILE *file;
	bmpheader_t *header = &(img->header);

	file = fopen(inputfile, "rb");
	if (file == NULL)
	{
		fprintf(stderr, "File %s not found; exiting.", inputfile);
		exit(1);
	}
	
	fread(header, sizeof(bmpheader_t)+1, 1, file);
	if (header->bitpix != 24)
	{
		fprintf(stderr, "File %s is not in 24-bit format; exiting.", inputfile);
		exit(1);
	}

	img->imgdata = (unsigned char*) calloc(header->arraywidth, sizeof(unsigned char));
	if (img->imgdata == NULL)
	{
		fprintf(stderr, "Cannot allocate memory for image data; exiting.");
		exit(1);
	}
	
	fseek(file, header->data, SEEK_SET);
	fread(img->imgdata, header->arraywidth, 1, file);
	fclose(file);
}

static
void bmp_clone_empty_img(img_t *imgin, img_t *imgout)
{
	imgout->header = imgin->header;
	imgout->imgdata = 
		(unsigned char*) calloc(imgout->header.arraywidth, sizeof(unsigned char));
	if (imgout->imgdata == NULL)
	{
		fprintf(stderr, "Cannot allocate memory for clone image data; exiting.");
		exit(1);
	}
}

static
void bmp_write_data_to_file(char *fname, img_t *img) 
{
	FILE *file;
	bmpheader_t *bmph = &(img->header);

	file = fopen(fname, "wb");
	fwrite(bmph, sizeof(bmpheader_t)+1, 1, file);
	fseek(file, bmph->data, SEEK_SET);
	fwrite(img->imgdata, bmph->arraywidth, 1, file);
	fclose(file);
}

static
void bmp_rgb_from_data(img_t *img)
{
	bmpheader_t *bmph = &(img->header);

	int i, j, pos = 0;
	int width = bmph->width, height = bmph->height;
	int rgb_width = img->rgb_width;

	for (i = 0; i < height; i++) 
		for (j = 0; j < width * 3; j += 3, pos++)
		{
			img->red[pos]   = img->imgdata[i * rgb_width + j];
			img->green[pos] = img->imgdata[i * rgb_width + j + 1];
			img->blue[pos]  = img->imgdata[i * rgb_width + j + 2];  
		}
}

static
void bmp_data_from_rgb(img_t *img)
{
	bmpheader_t *bmph = &(img->header);
	int i, j, pos = 0;
	int width = bmph->width, height = bmph->height;
	int rgb_width = img->rgb_width;

	for (i = 0; i < height; i++ ) 
		for (j = 0; j < width* 3 ; j += 3 , pos++) 
		{
			img->imgdata[i * rgb_width  + j]     = img->red[pos];
			img->imgdata[i * rgb_width  + j + 1] = img->green[pos];
			img->imgdata[i * rgb_width  + j + 2] = img->blue[pos];
		}
}

static
void bmp_rgb_alloc(img_t *img)
{
	int width, height;

	width = img->header.width;
	height = img->header.height;

	img->red = (unsigned char*) calloc(width*height, sizeof(unsigned char));
	if (img->red == NULL)
	{
		fprintf(stderr, "Cannot allocate memory for the red channel; exiting.");
		exit(1);
	}

	img->green = (unsigned char*) calloc(width*height, sizeof(unsigned char));
	if (img->green == NULL)
	{
		fprintf(stderr, "Cannot allocate memory for the green channel; exiting.");
		exit(1);
	}

	img->blue = (unsigned char*) calloc(width*height, sizeof(unsigned char));
	if (img->blue == NULL)
	{
		fprintf(stderr, "Cannot allocate memory for the blue channel; exiting.");
		exit(1);
	}

	img->rgb_width = width * 3;
	if ((width * 3  % 4) != 0) {
	   img->rgb_width += (4 - (width * 3 % 4));  
	}
}

static
void bmp_img_free(img_t *img)
{
	free(img->red);
	free(img->green);
	free(img->blue);
	free(img->imgdata);
}

/* END of BMP utility functions */

/* check bounds */
int clamp(int i , int min , int max)
{
	if (i < min) return min;
	else if (i > max) return max;
	return i;  
}

/* Sequential Sobel filter */
void sobel_serial(img_t *imgin, img_t *imgout)
{
	int width = imgin->header.width;
	int height = imgin->header.height;
	int rx = 0, ry = 0, gx = 0, gy = 0, bx = 0, by = 0, idx = 0;

	unsigned char *in_r = imgin->red;
	unsigned char *in_g = imgin->green;
	unsigned char *in_b = imgin->blue;

	unsigned char *out_r = imgout->red;
	unsigned char *out_g = imgout->green;
	unsigned char *out_b = imgout->blue;

	int Gx[9] = {
		-1,  0,  1,
		-2,  0,  2,
		-1,  0,  1
	};

	int Gy[9] = {
		-1, -2, -1,
		 0,  0,  0,
		 1,  2,  1
	};

	for (int i = 0; i < height; i++)
	{
		for (int j = 0; j < width; j++)
		{
			for (int kr = -1; kr <= 1; kr++)
			{
				for (int kc = -1; kc <= 1; kc++, idx++)
				{
					int x = clamp(j + kc, 0, width - 1);
					int y = clamp(i + kr, 0, height - 1);
					int pos = y * width + x;

					rx += in_r[pos] * Gx[idx];
					ry += in_r[pos] * Gy[idx];

					gx += in_g[pos] * Gx[idx];
					gy += in_g[pos] * Gy[idx];

					bx += in_b[pos] * Gx[idx];
					by += in_b[pos] * Gy[idx];
				}
			}

			int r = (int) sqrt((double)(rx * rx + ry * ry));
			int g = (int) sqrt((double)(gx * gx + gy * gy));
			int b = (int) sqrt((double)(bx * bx + by * by));

			out_r[i * width + j] = (unsigned char) clamp(r, 0, 255);
			out_g[i * width + j] = (unsigned char) clamp(g, 0, 255);
			out_b[i * width + j] = (unsigned char) clamp(b, 0, 255);
			
			rx = ry = gx = gy = bx = by = idx = 0;
		}
	}
}


/* Parallel Sobel filter with OpenMP device offloading */
void sobel_omp_device(img_t *imgin, img_t *imgout)
{
	/* TODO: Implement parallel Sobel filter using OpenMP device offloading */
}


double timeit(void (*func)(img_t *, img_t *), img_t *imgin, img_t *imgout)
{
	struct timeval start, end;
	gettimeofday(&start, NULL);
	func(imgin, imgout);
	gettimeofday(&end, NULL);
	return (double) (end.tv_usec - start.tv_usec) / 1000000 
		+ (double) (end.tv_sec - start.tv_sec);
}


char *remove_ext(char *str, char extsep, char pathsep) 
{
	char *newstr, *ext, *lpath;

	if (str == NULL) return NULL;
	if ((newstr = malloc(strlen(str) + 1)) == NULL) return NULL;

	strcpy(newstr, str);
	ext = strrchr(newstr, extsep);
	lpath = (pathsep == 0) ? NULL : strrchr(newstr, pathsep);
	if (ext != NULL) 
	{
		if (lpath != NULL) 
		{
			if (lpath < ext) 
				*ext = '\0';
		} 
		else 
			*ext = '\0';
	}
	return newstr;
}


int main(int argc, char *argv[]) 
{
	double exectime_serial = 0.0, exectime_omp_device = 0.0;
	char *inputfile, *noextfname;   
	char seqoutfile[128], paroutfile_device[128];
	img_t imgin, imgout, pimgout_device;

	if (argc < 2)
	{
		fprintf(stderr, "Syntax: %s <filename>, \n\te.g. %s 500.bmp\n", 
			argv[0], argv[0]);
		fprintf(stderr, "Available images: 500.bmp, 1000.bmp, 1500.bmp\n");
		exit(1);
	}

	inputfile = argv[1];


	noextfname = remove_ext(inputfile, '.', '/');
	sprintf(seqoutfile, "%s-serial.bmp", noextfname);
	sprintf(paroutfile_device, "%s-omp-device.bmp", noextfname);

	bmp_read_img_from_file(inputfile, &imgin);
	bmp_clone_empty_img(&imgin, &imgout);
	bmp_clone_empty_img(&imgin, &pimgout_device);
	bmp_rgb_alloc(&imgin);
	bmp_rgb_alloc(&imgout);
	bmp_rgb_alloc(&pimgout_device);

	printf("<<< Sobel filter (h=%d, w=%d) >>>\n", imgin.header.height, 
	       imgin.header.width);

	/* Image data to R,G,B */
	bmp_rgb_from_data(&imgin);

	/* Run & time serial Sobel filter */
	exectime_serial = timeit(sobel_serial, &imgin, &imgout);

	/* Save the results (serial) */
	bmp_data_from_rgb(&imgout);
	bmp_write_data_to_file(seqoutfile, &imgout);

	/* Run & time OpenMP Sobel filter (w/ device) */
	exectime_omp_device = timeit(sobel_omp_device, &imgin, &pimgout_device);

	/* Save the results (parallel w/ device) */
	bmp_data_from_rgb(&pimgout_device);
	bmp_write_data_to_file(paroutfile_device, &pimgout_device);
	printf("Saved %s\n", seqoutfile);
	printf("Saved %s\n", paroutfile_device);
		
	printf("Total execution time (sequential): %lf\n", exectime_serial);
	printf("Total execution time (omp device): %lf\n", exectime_omp_device);

	bmp_img_free(&imgin);
	bmp_img_free(&imgout);
	bmp_img_free(&pimgout_device);

	return 0;
}
