
#define EPSILON 0.001f
#define RAY_NOISE (nRayId+((k*96)%5685)+(globalid.x*81)+((globalid.x*31*7)+iYNoise*8*19*7)) % 4096

#define INLINE __attribute__((always_inline))

typedef struct
{
        float4 Origin;
        float4 Direction;
} RayInfo;

typedef struct
{
		float4 RayPosLT;
		float4 RayPosRT;
		float4 RayPosLB;
		float4 RayPosRB;        
		float4 DirLT;
		float4 DirRT;
		float4 DirLB;
		float4 DirRB;        
		
		float SizeX;
		float SizeY;
		float PrespSizeD1X;
		float PrespSizeD1Y;

		float4 MatrixX;
		float4 MatrixY;
		float4 MatrixZ;
		float4 MatrixPos;
		float widthPX;
		float heightPX;

        float2 NearFar;
} cCamera;

typedef struct
{
        float3 Forward;
        float3 Up;
        float3 Position;
        float2 NearFar;
} cSimpleCamera;


typedef struct
{
    int ObjectID;
    int TriangleID;
    float2 uv;
} cRayHit;

INLINE
float4 MixTriangleVertices(__global const float* aVertices, __global const int* aIndices, int aTriangleID, __global const float2* aTriUV)
{
	float4 p1 = (float4)(aVertices[aIndices[aTriangleID * 3] * 3], aVertices[aIndices[aTriangleID * 3] * 3 + 1], aVertices[aIndices[aTriangleID * 3] * 3 + 2], 0.f);
	float4 p2 = (float4)(aVertices[aIndices[aTriangleID * 3 + 1] * 3], aVertices[aIndices[aTriangleID * 3 + 1] * 3 + 1], aVertices[aIndices[aTriangleID * 3 + 1] * 3 + 2], 0.f);	
	float4 p3 = (float4)(aVertices[aIndices[aTriangleID * 3 + 2] * 3], aVertices[aIndices[aTriangleID * 3 + 2] * 3 + 1], aVertices[aIndices[aTriangleID * 3 + 2] * 3 + 2], 0.f);
	return p1 * (1 - aTriUV->x - aTriUV->y) + p2 * aTriUV->x + p3 * aTriUV->y;
}


__kernel void UpdatePerspectiveRays(__global RayInfo* aOutRays, __global const cCamera* aCamera, int aWidth, int aHeight)
{
	int2 globalid;
	globalid.x = get_global_id(0);
	globalid.y = get_global_id(1);

	if (globalid.x < aWidth && globalid.y < aHeight)
	{

		float fx = (float)globalid.x;
		float fy = (float)globalid.y;

		float xE = fx / (float)aWidth;
		float yE = fy / (float)aHeight;

		float4 DirXT = aCamera->DirLT * (1.0f - xE) + aCamera->DirRT * xE;
		float4 DirXB = aCamera->DirLB * (1.0f - xE) + aCamera->DirRB * xE;

		float4 RayPosXT = aCamera->RayPosLT * (1.0f - xE) + aCamera->RayPosRT * xE;
		float4 RayPosXB = aCamera->RayPosLB * (1.0f - xE) + aCamera->RayPosRB * xE;

		float4 Dir = DirXB * (1.0f - yE) + DirXT * yE;
		Dir.xyz = normalize(Dir.xyz);

		float4 RayPos = RayPosXB * (1.0f - yE) + RayPosXT * yE;

		int k = globalid.y * aWidth + globalid.x;

		aOutRays[k].Origin.xyz = RayPos.xyz;

		aOutRays[k].Direction.xyz = Dir.xyz;
		aOutRays[k].Origin.w = aCamera->NearFar.y;

	}

}

__kernel void UpdateDiffuseRays(__global RayInfo* aOutRays,
	__global float* aPositions,
	__global float* aNormals,
	__global int* aIDs,
	__global int* aObjVrtShift,
	__global float* aLightColor,
	__global float* aLightPos,
	__global int* aPXLightId,
	__global cRayHit* aHit,
	float3 panM0,
	float3 panM1,
	float3 panM2,
	int RayId,
	float aTimes,
	int aWidth,
	int aHeight)
{
	int2 globalid;
	globalid.x = get_global_id(0);
	globalid.y = get_global_id(1);

	if (globalid.x < aWidth && globalid.y < aHeight)
	{
		int k = globalid.y * aWidth + globalid.x;
		int ObjectID = aHit[k].ObjectID;
		int TriangleID = aHit[k].TriangleID;


		if (RayId == 0) aPXLightId[k] = 0;

		if (ObjectID == -1 || TriangleID == -1)
		{
			return;
		}

		int nRayId = aPXLightId[k];

		int iObjVrtShift = aObjVrtShift[ObjectID];
		float3 pos = MixTriangleVertices(aPositions + iObjVrtShift * 3, aIDs + iObjVrtShift, TriangleID, &aHit[k].uv).xyz;
		float3 norm = MixTriangleVertices(aNormals + iObjVrtShift * 3, aIDs + iObjVrtShift, TriangleID, &aHit[k].uv).xyz;
		norm = normalize(norm);

		int MaxRayCount = 16384;


		float3 iLight;
		float4 dir;
		nRayId++;
		int iYNoise = globalid.y;
		int iRay = nRayId;
		if (iRay < 4096) iRay = RAY_NOISE;

		float3 rlight = (float3)(aLightPos[iRay * 3], aLightPos[iRay * 3 + 1], aLightPos[iRay * 3 + 2]);

		iLight.x = rlight.x * panM0.x + rlight.y * panM1.x + rlight.z * panM2.x;
		iLight.y = rlight.x * panM0.y + rlight.y * panM1.y + rlight.z * panM2.y;
		iLight.z = rlight.x * panM0.z + rlight.y * panM1.z + rlight.z * panM2.z;

		dir.xyz = iLight.xyz - pos.xyz;
		float3 dirNorm = normalize(dir.xyz);

		aPXLightId[k] = nRayId;

		dir.w = 0;
		dir = normalize(dir);
		aOutRays[k].Direction = dir;
		aOutRays[k].Direction.w = min(1.0f, max(0.0f, dot(dir.xyz, norm)));//99999.9;//aTimes;
		aOutRays[k].Origin.xyz = pos.xyz + norm.xyz * EPSILON;
		aOutRays[k].Origin.w = 99999.9;////length(dir);

	}
}


__kernel void Shading(
	__global float* resultBitmap,
	__global RayInfo* aRays,
	__global RayInfo* aPerspRays,
	__global float* aPositions,
	__global float* aNormals,
	__global int* aIDs,
	__global int* aObjVrtShift,
	__global cRayHit* aHit,
	__global const int* aIsHit,
	__global float* aLightColor,
	__global float* aLightPos,
	__global int* aPXLightId,
	__global const cCamera* aCamera,
	float3 panM0,
	float3 panM1,
	float3 panM2,
	int RayId,
	float aCastTimes,
	int aWidth,
	int aHeight)
{
	int2 globalid;
	globalid.x = get_global_id(0);
	globalid.y = get_global_id(1);

	if (globalid.x < aWidth && globalid.y < aHeight)
	{
		int k = globalid.y * aWidth + globalid.x;
		int ObjectID = aHit[k].ObjectID;
		int TriangleID = aHit[k].TriangleID;


		int nRayId = aPXLightId[k];
		if (nRayId >= 16384) return;
		int iYNoise = globalid.y;
		int iRay = nRayId;
		if (iRay < 4096) iRay = RAY_NOISE;
		float3 iLightColor = (float3)(aLightColor[iRay * 3], aLightColor[iRay * 3 + 1], aLightColor[iRay * 3 + 2]);
		float3 rlight = (float3)(aLightPos[iRay * 3], aLightPos[iRay * 3 + 1], aLightPos[iRay * 3 + 2]);

		bool hit = false;

		float3 fromPoint = aRays[k].Origin.xyz;
		float hitDistance = aIsHit[k];
		float3 hitPoint = fromPoint + (aRays[k].Direction.xyz * hitDistance);


		float3 rHitPoint = hitPoint - aCamera->MatrixPos.xyz;
		float3 rFromPoint = fromPoint - aCamera->MatrixPos.xyz;

		float hitScreenX = rHitPoint.x * aCamera->MatrixX.x + rHitPoint.y * aCamera->MatrixX.y + rHitPoint.z * aCamera->MatrixX.z;
		float hitScreenY = rHitPoint.x * aCamera->MatrixY.x + rHitPoint.y * aCamera->MatrixY.y + rHitPoint.z * aCamera->MatrixY.z;
		float hitScreenZ = rHitPoint.x * aCamera->MatrixZ.x + rHitPoint.y * aCamera->MatrixZ.y + rHitPoint.z * aCamera->MatrixZ.z;

		float fromScreenX = (rFromPoint.x * aCamera->MatrixX.x + rFromPoint.y * aCamera->MatrixX.y + rFromPoint.z * aCamera->MatrixX.z);
		float fromScreenY = (rFromPoint.x * aCamera->MatrixY.x + rFromPoint.y * aCamera->MatrixY.y + rFromPoint.z * aCamera->MatrixY.z);
		float fromScreenZ = (rFromPoint.x * aCamera->MatrixZ.x + rFromPoint.y * aCamera->MatrixZ.y + rFromPoint.z * aCamera->MatrixZ.z);// + EPSILON;

		float fWidth = (float)aWidth;
		float fHeight = (float)aHeight;


		hitScreenX = hitScreenX / (aCamera->SizeX + (aCamera->PrespSizeD1X * hitScreenZ)) * fWidth + (fWidth * 0.5f);
		hitScreenY = hitScreenY / (aCamera->SizeY + (aCamera->PrespSizeD1Y * hitScreenZ)) * fHeight + (fHeight * 0.5f);

		fromScreenX = fromScreenX / (aCamera->SizeX + (aCamera->PrespSizeD1X * fromScreenZ)) * fWidth + (fWidth * 0.5f);
		fromScreenY = fromScreenY / (aCamera->SizeY + (aCamera->PrespSizeD1Y * fromScreenZ)) * fHeight + (fHeight * 0.5f);

		float shiftScreenX = fromScreenX - ((float)globalid.x + 0.5f);
		float shiftScreenY = fromScreenY - ((float)globalid.y + 0.5f);
		fromScreenX -= shiftScreenX;
		fromScreenY -= shiftScreenY;

		hitScreenX -= shiftScreenX;
		hitScreenY -= shiftScreenY;


		int iHitScreenX = hitScreenX;
		int iHitScreenY = hitScreenY;

		int iFromScreenX = fromScreenX;
		int iFromScreenY = fromScreenY;

		int hit_px_idx = iHitScreenY * aWidth + iHitScreenX;
		int from_px_idx = iFromScreenY * aWidth + iFromScreenX;

		float3 light;

		light.x = rlight.x * panM0.x + rlight.y * panM1.x + rlight.z * panM2.x;
		light.y = rlight.x * panM0.y + rlight.y * panM1.y + rlight.z * panM2.y;
		light.z = rlight.x * panM0.z + rlight.y * panM1.z + rlight.z * panM2.z;



		float4 diff_col = (float4)(1.0f,
			1.0f,
			1.0f, 1.0f);



		float dot_prod = 0;
		float oNormDirDist = 0.0f;
		float screenDot = 0.0f;
		if (ObjectID == -1 || TriangleID == -1) {
			diff_col = diff_col * 0.0f;
		}
		else {
			if (aIsHit[k] == 1) {//aRays[k].Origin.w){
				diff_col = diff_col * 0.0f;
			}
			int aIndices = aObjVrtShift[ObjectID];
			float3 pos = MixTriangleVertices(aPositions + aIndices * 3, aIDs + aIndices, TriangleID, &aHit[k].uv).xyz;

			float3 norm = MixTriangleVertices(aNormals + aIndices * 3, aIDs + aIndices, TriangleID, &aHit[k].uv).xyz;
			norm = normalize(norm);



			// Calculate lighting
	   //     float4 col = (float4)( 0.f, 0.f, 0.f, 0.f );
			float3 relLightPos = light - pos;
			float3 light_dir = normalize(relLightPos);
			dot_prod = dot(norm, light_dir);
			if (dot_prod < 0) dot_prod = 0;
			diff_col.xyz = diff_col.xyz * dot_prod;

			oNormDirDist = distance(norm, aRays[k].Direction.xyz);
			if (from_px_idx > 0 && from_px_idx < aWidth * aHeight) {
				hit = true;
				resultBitmap[from_px_idx * 4 + 3] = fromScreenZ + EPSILON;//1;
			}
		}


		diff_col.xyz = diff_col.xyz * iLightColor;


		float4 out_col = (float4)(resultBitmap[k * 4],
			resultBitmap[k * 4 + 1],
			resultBitmap[k * 4 + 2], 1.0f);


		if (aCastTimes < 0.5f) out_col *= 0.0f;

		diff_col = out_col + diff_col;


		if (aRays[k].Direction.w > 1.0f && hit && hit_px_idx != from_px_idx && hitDistance > EPSILON && (oNormDirDist < 1.414f) && hitScreenZ >= 0.0f && hitScreenX >= 0 && hitScreenY >= 0 && hitScreenX < aWidth && hitScreenY < aHeight) {
			if (hitScreenZ < resultBitmap[hit_px_idx * 4 + 3]) {

				if (/*from_px_idx > 0 && from_px_idx < aWidth*aHeight && */hit_px_idx > aWidth&& hit_px_idx < aWidth * aHeight - 1 - aWidth) {

					int hit_ObjectID = aHit[hit_px_idx].ObjectID;
					int hit_TriangleID = aHit[hit_px_idx].TriangleID;
					//		int hit_ObjectID2 = aHit[hit_px_idx+1].ObjectID;
					//		int hit_TriangleID2 = aHit[hit_px_idx+1].TriangleID;
					//		int hit_ObjectID3 = aHit[hit_px_idx+aWidth].ObjectID;
					//		int hit_TriangleID3 = aHit[hit_px_idx+aWidth].TriangleID;
					if (hit_ObjectID >= 0 && hit_TriangleID >= 0/* && hit_ObjectID2 >= 0 && hit_TriangleID2 >= 0 && hit_ObjectID3 >= 0 && hit_TriangleID3 >= 0*/) {
						int hit_ind = aObjVrtShift[hit_ObjectID];
						//int hit_ind2 = aObjVrtShift[hit_ObjectID2];
						//int hit_ind3 = aObjVrtShift[hit_ObjectID3];

						float3 hit_norm = MixTriangleVertices(aNormals + hit_ind * 3, aIDs + hit_ind, hit_TriangleID, &aHit[hit_px_idx].uv).xyz;
						//float3 hit_norm2 = MixTriangleVertices(aNormals + hit_ind2*3, aIDs + hit_ind2, hit_TriangleID2, &aHit[hit_px_idx+1].uv).xyz;
						//float3 hit_norm3 = MixTriangleVertices(aNormals + hit_ind3*3, aIDs + hit_ind3, hit_TriangleID3, &aHit[hit_px_idx+aWidth].uv).xyz;
						hit_norm = normalize(hit_norm);
						//hit_norm2 = normalize(hit_norm2);					
						//hit_norm3 = normalize(hit_norm3);					
//							screenDot = max(0.0f, min(min(dot(-hit_norm, aPerspRays[hit_px_idx].Direction.xyz), dot(-hit_norm2, aPerspRays[hit_px_idx+1].Direction.xyz)), dot(-hit_norm3, aPerspRays[hit_px_idx+aWidth].Direction.xyz)));
						screenDot = max(0.0f, dot(-hit_norm, aPerspRays[hit_px_idx].Direction.xyz) * 1.5f - 0.5f);
					}
				}




				float rayDot = aRays[k].Origin.w;
				float iMod = 32.0f;
				if (RayId < 64) iMod = 4.0f;
				resultBitmap[hit_px_idx * 4] += diff_col.x / aCastTimes * rayDot * iMod * screenDot;
				resultBitmap[hit_px_idx * 4 + 1] += diff_col.y / aCastTimes * rayDot * iMod * screenDot;
				resultBitmap[hit_px_idx * 4 + 2] += diff_col.z / aCastTimes * rayDot * iMod * screenDot;

				diff_col.x += resultBitmap[hit_px_idx * 4] / aCastTimes * dot_prod * iMod;
				diff_col.y += resultBitmap[hit_px_idx * 4 + 1] / aCastTimes * dot_prod * iMod;
				diff_col.z += resultBitmap[hit_px_idx * 4 + 2] / aCastTimes * dot_prod * iMod;

			}
			//			
		}

		resultBitmap[k * 4] = diff_col.x;//col.x * 255;
		resultBitmap[k * 4 + 1] = diff_col.y;//col.y * 255;
		resultBitmap[k * 4 + 2] = diff_col.z;//col.z * 255;


	}
}

__kernel void ShadingToRGBA(//scene
	__global float* fResult,
	int aWidth,
	int aHeight,
	float ffRayId,
	__global unsigned char* resultBitmap)
{
	int2 globalid;
	globalid.x = get_global_id(0);
	globalid.y = get_global_id(1);

	// Check borders
	if (globalid.x < aWidth && globalid.y < aHeight)
	{
		int k = globalid.y * aWidth + globalid.x;

		resultBitmap[k * 4] = min(255.0f, max(0.0f, pow(fResult[k * 4] / ffRayId, 1.0f / 2.2f) * 255));
		resultBitmap[k * 4 + 1] = min(255.0f, max(0.0f, pow(fResult[k * 4 + 1] / ffRayId, 1.0f / 2.2f) * 255));
		resultBitmap[k * 4 + 2] = min(255.0f, max(0.0f, pow(fResult[k * 4 + 2] / ffRayId, 1.0f / 2.2f) * 255));
		resultBitmap[k * 4 + 3] = 255;

	}
}

