I have been testing moving a lot of objects in Unity through normal C# code and through HLSL shaders. However, there is no difference in speed. FPS remains the same. Different perlin noise is used to change the position. The C# code uses the standard Mathf.PerlinNoise, while the HLSL uses a custom noise function.
Scenario 1 - Update via C# code only
Object spawn:
[SerializeField]
private GameObject prefab;
private void Start()
{
for (int i = 0; i < 50; i++)
for (int j = 0; j < 50; j++)
{
GameObject createdParticle;
createdParticle = Instantiate(prefab);
createdParticle.transform.position = new Vector3(i * 1f, Random.Range(-1f, 1f), j * 1f);
}
}
Code to move an object via C#. This script is added to each created object:
private Vector3 position = new Vector3();
private void Start()
{
position = new Vector3(transform.position.x, Mathf.PerlinNoise(Time.time, Time.time), transform.position.z);
}
private void Update()
{
position.y = Mathf.PerlinNoise(transform.position.x / 20f + Time.time, transform.position.z / 20f + Time.time) * 5f;
transform.position = position;
}
Scenario 2 - via Compute Kernel (GPGPU)
Part 1: C# client code
Object spawn, running the calculation on the shader and assigning the resulting value to the objects:
public struct Particle
{
public Vector3 position;
}
[SerializeField]
private GameObject prefab;
[SerializeField]
private ComputeShader computeShader;
private List<GameObject> particlesList = new List<GameObject>();
private Particle[] particlesDataArray;
private void Start()
{
CreateParticles();
}
private void Update()
{
UpdateParticlePosition();
}
private void CreateParticles()
{
List<Particle> particlesDataList = new List<Particle>();
for (int i = 0; i < 50; i++)
for (int j = 0; j < 50; j++)
{
GameObject createdParticle;
createdParticle = Instantiate(prefab);
createdParticle.transform.position = new Vector3(i * 1f, Random.Range(-1f, 1f), j * 1f);
particlesList.Add(createdParticle);
Particle particle = new Particle();
particle.position = createdParticle.transform.position;
particlesDataList.Add(particle);
}
particlesDataArray = particlesDataList.ToArray();
particlesDataList.Clear();
computeBuffer = new ComputeBuffer(particlesDataArray.Length, sizeof(float) * 7);
computeBuffer.SetData(particlesDataArray);
computeShader.SetBuffer(0, "particles", computeBuffer);
}
private ComputeBuffer computeBuffer;
private void UpdateParticlePosition()
{
computeShader.SetFloat("time", Time.time);
computeShader.Dispatch(computeShader.FindKernel("CSMain"), particlesDataArray.Length / 10, 1, 1);
computeBuffer.GetData(particlesDataArray);
for (int i = 0; i < particlesDataArray.Length; i++)
{
Vector3 pos = particlesList[i].transform.position;
pos.y = particlesDataArray[i].position.y;
particlesList[i].transform.position = pos;
}
}
Part 2: Compute kernel (GPGPU)
#pragma kernel CSMain
struct Particle {
float3 position;
float4 color;
};
RWStructuredBuffer<Particle> particles;
float time;
float mod(float x, float y)
{
return x - y * floor(x / y);
}
float permute(float x) { return floor(mod(((x * 34.0) + 1.0) * x, 289.0)); }
float3 permute(float3 x) { return mod(((x * 34.0) + 1.0) * x, 289.0); }
float4 permute(float4 x) { return mod(((x * 34.0) + 1.0) * x, 289.0); }
float taylorInvSqrt(float r) { return 1.79284291400159 - 0.85373472095314 * r; }
float4 taylorInvSqrt(float4 r) { return float4(taylorInvSqrt(r.x), taylorInvSqrt(r.y), taylorInvSqrt(r.z), taylorInvSqrt(r.w)); }
float3 rand3(float3 c) {
float j = 4096.0 * sin(dot(c, float3(17.0, 59.4, 15.0)));
float3 r;
r.z = frac(512.0 * j);
j *= .125;
r.x = frac(512.0 * j);
j *= .125;
r.y = frac(512.0 * j);
return r - 0.5;
}
float _snoise(float3 p) {
const float F3 = 0.3333333;
const float G3 = 0.1666667;
float3 s = floor(p + dot(p, float3(F3, F3, F3)));
float3 x = p - s + dot(s, float3(G3, G3, G3));
float3 e = step(float3(0.0, 0.0, 0.0), x - x.yzx);
float3 i1 = e * (1.0 - e.zxy);
float3 i2 = 1.0 - e.zxy * (1.0 - e);
float3 x1 = x - i1 + G3;
float3 x2 = x - i2 + 2.0 * G3;
float3 x3 = x - 1.0 + 3.0 * G3;
float4 w, d;
w.x = dot(x, x);
w.y = dot(x1, x1);
w.z = dot(x2, x2);
w.w = dot(x3, x3);
w = max(0.6 - w, 0.0);
d.x = dot(rand3(s), x);
d.y = dot(rand3(s + i1), x1);
d.z = dot(rand3(s + i2), x2);
d.w = dot(rand3(s + 1.0), x3);
w *= w;
w *= w;
d *= w;
return dot(d, float4(52.0, 52.0, 52.0, 52.0));
}
[numthreads(10, 1, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
Particle particle = particles[id.x];
float modifyTime = time / 5.0;
float positionY = _snoise(float3(particle.position.x / 20.0 + modifyTime, 0.0, particle.position.z / 20.0 + modifyTime)) * 5.0;
particle.position = float3(particle.position.x, positionY, particle.position.z);
particles[id.x] = particle;
}
What am I doing wrong, why is there no increase in calculation speed? :)
Thanks in advance!
TL;DR: your GPGPU (compute shader) scenario is unoptimized thus skewing your results. Consider binding a material to the computeBuffer and rendering via Graphics.DrawProcedural. That way everything stays on the GPU.
OP:
What am I doing wrong, why is there no increase in calculation speed?
Essentially, there are two parts to your problem.
(1) Reading from the GPU is slow
With most things GPU-related, you generally want to avoid reading from the GPU since it will block the CPU. This is true also for GPGPU scenarios.
If I were to hazard a guess it would be the GPGPU (compute shader) call computeBuffer.GetData() shown below:
private void Update()
{
UpdateParticlePosition();
}
private void UpdateParticlePosition()
{
.
.
.
computeBuffer.GetData(particlesDataArray); // <----- OUCH!
Unity (my emphasis):
ComputeBuffer.GetData
Read data values from the buffer into an array...
Note that this function reads the data back from the GPU, which can be slow...If any GPU work has been submitted that writes to this buffer, Unity waits for the tasks to complete before it retrieves the requested data. Tell me more...
(2) Explicit GPU reading is not required in your scenario
I can see you are creating 2,500 "particles" where each particle is attached to a GameObject. If the intent is to just draw a simple quad then it's more efficient to create an array structs containing a Vector3 position and then performing a batch render call to draw all the particles in one go.
Proof: see video below of nBody simulation. 60+ FPS on 2014 era NVidia card
e.g. for my GPGPU n-Body Galaxy Simulation I do just that. Pay attention to the StarMaterial.SetBuffer("stars", _starsBuffer) during actual rendering. That tells the GPU to use the buffer that already exists on the GPU, the very same buffer that the computer shader used to move the star positions. There is no CPU reading the GPU here.
public class Galaxy1Controller : MonoBehaviour
{
public Texture2D HueTexture;
public int NumStars = 10000; // That's right! 10,000 stars!
public ComputeShader StarCompute;
public Material StarMaterial;
private ComputeBuffer _quadPoints;
private Star[] _stars;
private ComputeBuffer _starsBuffer;
.
.
.
private void Start()
{
_updateParticlesKernel = StarCompute.FindKernel("UpdateStars");
_starsBuffer = new ComputeBuffer(NumStars, Constants.StarsStride);
_stars = new Star[NumStars];
// Create initial positions for stars here (not shown)
_starsBuffer.SetData(_stars);
_quadPoints = new ComputeBuffer(6, QuadStride);
_quadPoints.SetData(...); // star quad
}
private void Update()
{
// bind resources to compute shader
StarCompute.SetBuffer(_updateParticlesKernel, "stars", _starsBuffer);
StarCompute.SetFloat("deltaTime", Time.deltaTime*_manager.MasterSpeed);
StarCompute.SetTexture(_updateParticlesKernel, "hueTexture", HueTexture);
// dispatch, launch threads on GPU
var numberOfGroups = Mathf.CeilToInt((float) NumStars/GroupSize);
StarCompute.Dispatch(_updateParticlesKernel, numberOfGroups, 1, 1);
// "Look Ma, no reading from the GPU!"
}
private void OnRenderObject()
{
// bind resources to material
StarMaterial.SetBuffer("stars", _starsBuffer);
StarMaterial.SetBuffer("quadPoints", _quadPoints);
// set the pass
StarMaterial.SetPass(0);
// draw
Graphics.DrawProcedural(MeshTopology.Triangles, 6, NumStars);
}
}
n-Body galaxy simulation of 10,000 stars:
I think everyone can agree that Microsoft's GPGPU documentation is pretty sparse so your best bet is to check out examples scattered around the interwebs. One that comes to mind is the excellent "GPU Ray Tracing in Unity" series over at Three Eyed Games. See the link below.
See also:
MickyD, "n-Body Galaxy Simulation using Compute Shaders on GPGPU via Unity 3D", 2014
Kuri, D, "GPU Ray Tracing in Unity – Part 1", 2018
ComputeBuffer.GetData is very long. The CPU copies data from the GPU. This stops the main thread.
Then you loop around all transforms to change their positions, this is certainly faster than thousands of MonoBehaviour, but also very long.
There are two ways to optimize your code.
CPU
C# Job System + Burst
Detailed tutorial: https://github.com/stella3d/job-system-cookbook
GPU
Use the structured buffer calculated in the compute shader without copying it back to the CPU. Here is a detailed tutorial on how to do it:
https://catlikecoding.com/unity/tutorials/basics/compute-shaders/
I am currently following a Brackeys tutorial on procedural terrain generation colors. I got to a point where it gives me this error:
IndexOutOfRangeException: Index was outside the bounds of the array. mapgeneration.CreateShape () (at Assets/mapgeneration.cs:108
mapgeneration.Update () (at Assets/mapgeneration.cs:131)
I am using gradients to display color on line 108 in CreateShape. This is the line:
colors[iloopedforvertecy] = gradient.Evaluate(height);
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
[RequireComponent(typeof(MeshFilter))]
public class mapgeneration : MonoBehaviour
{
Mesh mesh;
Color[] colors;
Vector3[] vertices;
int[] triangles;
public int xsize = 20;
public int zsize = 20;
[Range(1, 100)]
public float smooth = 1.0f;
public MeshCollider _mesh;
public Transform water;
public float scale;
public float smoothfactor;
public float xoffset = 0.0f;
public float zoffset = 0.0f;
public float minwaterheight;
public float maxwaterheight;
public float minterainheight;
public float maxterainheight;
public Gradient gradient;
// Start is called before the first frame update
void Start()
{
mesh = new Mesh();
GetComponent<MeshFilter>().mesh = mesh;
CreateShape();
_mesh = GetComponent<MeshCollider>();
water.transform.position = new Vector3(0, Random.Range(minwaterheight, maxwaterheight), 0);
}
void CreateShape()
{
vertices = new Vector3[(xsize + 1) * (zsize + 1)];
water.transform.localScale = new Vector3(xsize, 0, zsize);
int iloopedforvertecy = 0;
triangles = new int[xsize * zsize * 6];
int vert = 0;
int tris = 0;
for(int z = 0; z < zsize; z++)
{
for(int x = 0; x < xsize; x++)
{
triangles[tris + 0] = vert + 0;
triangles[tris + 1] = vert + xsize + 1;
triangles[tris + 2] = vert + 1;
triangles[tris + 3] = vert + 1;
triangles[tris + 4] = vert + xsize + 1;
triangles[tris + 5] = vert + xsize + 2;
vert++;
tris += 6;
}
vert++;
}
colors = new Color[vertices.Length];
for (int z = 0; z <= zsize; z++)
{
for(int x = 0; x <= xsize; x++)
{
float xCoord = (float)x / xsize * scale + xoffset;
float zCoord = (float)z / zsize * scale + zoffset;
float y = Mathf.PerlinNoise(xCoord * smooth, zCoord * smooth) * smoothfactor;
vertices[iloopedforvertecy] = new Vector3(x, y, z);
iloopedforvertecy += 1;
if(y > maxterainheight)
{
maxterainheight = y;
}
if(y < minterainheight)
{
minterainheight = y;
}
float height = Mathf.InverseLerp(minterainheight, maxterainheight, 0.5f); //vertices[iloopedforvertecy].z
Debug.LogWarning(height);
colors[iloopedforvertecy] = gradient.Evaluate(height);
}
}
}
void UpdateMsh()
{
mesh.Clear();
mesh.vertices = vertices;
mesh.triangles = triangles;
mesh.RecalculateNormals();
mesh.colors = colors;
}
void Update()
{
CreateShape();
UpdateMsh();
}
}
I know that my code is messy. Still new to coding and unity in general.
Oh PS. Can somebody please help me add a collider to code generated object as you can see in the code above?
In your for-loop you increase the value of iloopedforvertecy before you evaluate your gradient value.
Because of this the last value of iloopedforvertecy will be greater than the array length of your colors array.
Try to move the line which increases the value at the end of the loop
for(int x = 0; x <= xsize; x++)
{
float xCoord = (float)x / xsize * scale + xoffset;
float zCoord = (float)z / zsize * scale + zoffset;
float y = Mathf.PerlinNoise(xCoord * smooth, zCoord * smooth) * smoothfactor;
vertices[iloopedforvertecy] = new Vector3(x, y, z);
// iloopedforvertecy += 1; // HERE THE VALUE WAS INCREASED
if(y > maxterainheight)
{
maxterainheight = y;
}
if(y < minterainheight)
{
minterainheight = y;
}
float height = Mathf.InverseLerp(minterainheight, maxterainheight, 0.5f); //vertices[iloopedforvertecy].z
Debug.LogWarning(height);
colors[iloopedforvertecy] = gradient.Evaluate(height);
iloopedforvertecy += 1; // HERE SHOULD THE VALUE BE INCREASED
}
In the example here I commented the two lines.
I find a lot of examples of creating a hex grid like the following. But I'm having a hard time understanding how I might have a list of the corners in a hex grid. Basically I'd want a character to move along the line of the grid instead of the center. so I want to grab the position of the next corner in the hex grid and move them there.
I was thinking of using a basic hex grid code to create the prefabs in the right place and then just add empty game objects on each corner of the prefab, but then I have a bunch of overlapping positions that are shared corners for each hexagon. I thought I could delete them if they are overlapping but it just all seems too brute force and hard to keep track of. I'd love to hear some ideas on approaching something like this. By the way I'd also want to know the center of the hex besides knowing the corners.
this code successfully creates a hex grid pattern where I can add a hex shaped game object to instance.
using UnityEngine;
public class Grid : MonoBehaviour
{
public Transform hexPrefab;
public int gridWidth = 11;
public int gridHeight = 11;
float hexWidth = 1.732f;
float hexHeight = 2.0f;
public float gap = 0.0f;
Vector3 startPos;
void Start()
{
AddGap();
CalcStartPos();
CreateGrid();
}
void AddGap()
{
hexWidth += hexWidth * gap;
hexHeight += hexHeight * gap;
}
void CalcStartPos()
{
float offset = 0;
if (gridHeight / 2 % 2 != 0)
offset = hexWidth / 2;
float x = -hexWidth * (gridWidth / 2) - offset;
float z = hexHeight * 0.75f * (gridHeight / 2);
startPos = new Vector3(x, 0, z);
}
Vector3 CalcWorldPos(Vector2 gridPos)
{
float offset = 0;
if (gridPos.y % 2 != 0)
offset = hexWidth / 2;
float x = startPos.x + gridPos.x * hexWidth + offset;
float z = startPos.z - gridPos.y * hexHeight * 0.75f;
return new Vector3(x, 0, z);
}
void CreateGrid()
{
for (int y = 0; y < gridHeight; y++)
{
for (int x = 0; x < gridWidth; x++)
{
Transform hex = Instantiate(hexPrefab) as Transform;
Vector2 gridPos = new Vector2(x, y);
hex.position = CalcWorldPos(gridPos);
hex.parent = this.transform;
hex.name = "Hexagon" + x + "|" + y;
}
}
}
}
What I am trying to achieve is something like this:
What I have so far is the edges for the circles.
I know this would involve a nested for loop. This is what I have so far:
public GameObject player;
private GameObject playerGrid;
public int numOfObjects;
private Vector3 centerPos;
public int size = 2;
public Vector2 speed = new Vector2(50, 50);
private float smoothTime = 0.25f;
void Start()
{
playerGrid = new GameObject();
centerPos = transform.position;
for (int i = 0; i < numOfObjects; i++)
{
float pointNum = (i * 1.0f) / numOfObjects;
float angle = pointNum * Mathf.PI * 2;
float r = size / 2 * (Mathf.PI);
float x = Mathf.Sin(angle) * r;
float y = Mathf.Cos(angle) * r;
Vector3 pointPos = new Vector3(x, y, 0) + centerPos;
GameObject obj = Instantiate(player, pointPos, Quaternion.identity);
obj.transform.SetParent(playerGrid.transform);
}
}
I am stuck on how to implement the conditional for the nested for loop. Also, I have trouble understanding the calculations of column positions in the nested for loop. I believe the conditional would be the start and end of I for that column or row: for(int j = i + 1; j < i - 1, j++)
For the col positions, I would think it would be incrementing the angle enough to give the square its space for that column: float x = (Mathf.Sin(angle) + somethingHere) * r;
I just not sure how to progress from here.
Here's a simple way to draw a circle:
public float circleRadius = 5f;
public float objectSize = 1f;
void OnDrawGizmos()
{
for (var x = -circleRadius; x <= circleRadius; x++)
{
for (var y = -circleRadius; y <= circleRadius; y++)
{
var pos = new Vector3(x, 0f, y);
if (pos.magnitude >= circleRadius) continue;
Gizmos.DrawSphere(pos * (objectSize * 2f), objectSize);
}
}
}
I wrote a infinite terrain script which works! Saddly everytime the player moves a chunk it lags for a moment. I know my code isn't great but I'm here to learn why :D
I'm unsure of what else to do. I've looked online and found no simple or understandable solution to me because I just don't know enough so I tried to write it on my own and it works but barley.
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class GEN_InfiniteTerrain : MonoBehaviour
{
public GameObject targetObject;
public GameObject chunkObject;
public int chunkSize;
public float unitSize;
public int renderDistance;
Dictionary<Vector2, GameObject> gridOfChunks = new Dictionary<Vector2, GameObject>();
List<Vector2> expectedChunkGridPositions = new List<Vector2>();
public float noiseScale;
// Infinite terrain values
float absoluteChunkSize;
private void Start()
{
// Calculate absolute chunk size
GetAbsoluteChunkSize();
// Generate base world
GenerateBase();
}
Vector2 lastTargetGridPosition = Vector2.zero;
private void LateUpdate()
{
// Get the targets position in world space
Vector3 targetAbsolutePosition = targetObject.transform.position;
// Convert the targets world position to grid position (/ 10 * 10 is just rounding to 10)
Vector2 targetGridPosition = new Vector2();
targetGridPosition.x = Mathf.RoundToInt(targetAbsolutePosition.x / 10) * 10 / absoluteChunkSize;
targetGridPosition.y = Mathf.RoundToInt(targetAbsolutePosition.z / 10) * 10 / absoluteChunkSize;
if (targetGridPosition - lastTargetGridPosition != Vector2.zero)
{
GenerateExpectedChunkAreas(targetGridPosition);
UpdateChunkPositions(targetGridPosition);
}
lastTargetGridPosition = targetGridPosition;
}
void GenerateBase()
{
for (int x = -renderDistance / 2; x < renderDistance / 2; x++)
{
for (int z = -renderDistance / 2; z < renderDistance / 2; z++)
{
Vector2 gridPosition = new Vector2(x, z);
Vector3 worldPosition = new Vector3(x * (unitSize * chunkSize), 0, z * (unitSize * chunkSize));
GameObject chunk = Instantiate(chunkObject, worldPosition, Quaternion.identity);
chunk.GetComponent<GEN_Chunk>().gridPosition = gridPosition;
gridOfChunks.Add(gridPosition, chunk);
}
}
GenerateExpectedChunkAreas(Vector2.zero);
}
void GenerateExpectedChunkAreas(Vector2 targetGridPosition)
{
expectedChunkGridPositions.Clear();
for (int x = -renderDistance / 2; x < renderDistance / 2; x++)
{
for (int z = -renderDistance / 2; z < renderDistance / 2; z++)
{
Vector2 gridPosition = new Vector2(x, z) + targetGridPosition;
expectedChunkGridPositions.Add(gridPosition);
}
}
}
void UpdateChunkPositions(Vector2 targetGridPosition)
{
List<Vector2> positionsWithoutChunks = new List<Vector2>();
List<Vector2> positionsWithOldChunks = new List<Vector2>();
for (int chunkCount = 0, x = -renderDistance / 2; x < renderDistance / 2; x++)
{
for (int z = -renderDistance / 2; z < renderDistance / 2; z++)
{
Vector2 gridPosition = new Vector2(x, z) + targetGridPosition;
if(!gridOfChunks.ContainsKey(gridPosition))
{
positionsWithoutChunks.Add(gridPosition);
}
chunkCount++;
}
}
foreach (GameObject chunk in gridOfChunks.Values)
{
if(!expectedChunkGridPositions.Contains(chunk.GetComponent<GEN_Chunk>().gridPosition))
{
positionsWithOldChunks.Add(chunk.GetComponent<GEN_Chunk>().gridPosition);
}
}
for (int i = 0; i < positionsWithOldChunks.Count; i++)
{
Vector3 worldPosition = new Vector3(positionsWithoutChunks[i].x * absoluteChunkSize, 0, positionsWithoutChunks[i].y * absoluteChunkSize);
gridOfChunks[positionsWithOldChunks[i]].transform.position = worldPosition;
// Recalculating noise for chunk based on its new position does lag more but even WITHOUT this it still stutters when player moves around. ( plan to learn threading just to calculate noise on seperate threads )
// gridOfChunks[positionsWithOldChunks[i]].GetComponent<GEN_Chunk>().ApplyNoise();
}
}
void GetAbsoluteChunkSize()
{
absoluteChunkSize = unitSize * chunkSize;
}
}
I need some smooth working infinite terrain (in quotes 'infinite')
And I'd like to learn too!