diff --git a/Assets/Resources/OVRBuildConfig.asset b/Assets/Resources/OVRBuildConfig.asset index b85a7d9..683e9de 100644 Binary files a/Assets/Resources/OVRBuildConfig.asset and b/Assets/Resources/OVRBuildConfig.asset differ diff --git a/Assets/RocketAI74.onnx b/Assets/RocketAI74.onnx new file mode 100644 index 0000000..3b1903e Binary files /dev/null and b/Assets/RocketAI74.onnx differ diff --git a/Assets/RocketAI74.onnx.meta b/Assets/RocketAI74.onnx.meta new file mode 100644 index 0000000..fda93d2 --- /dev/null +++ b/Assets/RocketAI74.onnx.meta @@ -0,0 +1,16 @@ +fileFormatVersion: 2 +guid: b43fa7013f7e799478c9f4f983bb6eea +ScriptedImporter: + internalIDToNameTable: [] + externalObjects: {} + serializedVersion: 2 + userData: + assetBundleName: + assetBundleVariant: + script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3} + optimizeModel: 1 + forceArbitraryBatchSize: 1 + treatErrorsAsWarnings: 0 + importMode: 1 + weightsTypeMode: 0 + activationTypeMode: 0 diff --git a/Assets/RocketAIAgent.cs b/Assets/RocketAIAgent.cs index 5165286..d46827d 100644 --- a/Assets/RocketAIAgent.cs +++ b/Assets/RocketAIAgent.cs @@ -22,6 +22,9 @@ public override void Initialize() base.Initialize(); lastPoints = 0; + + // set the team ID to random so AI's don't avoid shooting each other to avoid a team loss, + // not sure if this can be done in the other function or not so I'll put in both places just to be sure GetComponent().TeamId = (int)Random.Range(0.0f, 100.0f); } @@ -31,6 +34,16 @@ public override void CollectObservations(VectorSensor sensor) if (rocket) { + // What I learned is that you should not overfeed the AI learning with more data than + // it needs. Originaly I was trying to send it as much data as pssible but eventually + // it seems to work best with just the Ray Sensors 3d and the angular velocity of the rocket + // might make sense to send the rocket orientation in the same frame as the anugular velocity, + // but it doesn't seem to need it. Also I noticed that when training in the big sphere the rockets + // need to move faster and the learning sets the Unity3D Time base to 20, there was significat difference + // in learning when running so fast when the rockets need to move fast, learning worked much better whene + // Time base was set to a much lower number or even 1. Something to check if you find it's not training for + // some reaon. + //sensor.AddObservation(transform.rotation.eulerAngles); // ship on the sphere /* @@ -82,6 +95,10 @@ public override void OnActionReceived(ActionBuffers actionBuffers) if (rocket) { + // I tried give little rewards for things I wanted the AI to do but ended up + // removing these as they did not help and could result in an AI doing things just + // for the little rewards + rocket.horizontalInput = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f); // little reward turning //SetReward(0.001f * Mathf.Abs(rocket.horizontalInput)); @@ -99,15 +116,20 @@ public override void OnActionReceived(ActionBuffers actionBuffers) rocket.fireInput = false; } + // this is the only reward that matters, and in fact I just want to indicate a hit for 1.0 and + // NOT scale this reward depending on what was hit. + if (rocket.points > lastPoints) { // big reward if we hit something reward = 1.0f; // 3.0f * (rocket.points - lastPoints); SetReward(reward); - Debug.Log("Reward for points " + rocket.points + " last points " + lastPoints + " reward " + reward); + Debug.Log("Reward for points " /* + rocket.points + " last points " + lastPoints + " reward " */ + reward); lastPoints = rocket.points; } + // More little rewards that weren't really helpful + /* if (rocket.fireInput) { @@ -126,6 +148,11 @@ public override void OnActionReceived(ActionBuffers actionBuffers) */ } + // I tried to keep the AI from sitting and spinning which was an aretifact of feeding the AI + // too much info above. This seemed to result in depressed AI that would sit and spin even when I + // made this negative reward grow exponentially. Negative reward that was consitant seems to be + // desired by the AI than trying to figure out too much input and not getting enough positive rewards + /* if (rocket.countRotations != lastCountRotations) { @@ -140,6 +167,9 @@ public override void OnActionReceived(ActionBuffers actionBuffers) lastCountRotations = rocket.countRotations; } */ + + // This reward resulted in slower and more controllable rockets but is not adventagous when + // you need to move faster to get out of the way or find a new rock when there are not many left /* if (rb) { @@ -175,6 +205,10 @@ public override void OnActionReceived(ActionBuffers actionBuffers) } } */ + + // This set of rewards was actually my first glimpse of something that looked resonable but + // ultimately removing all this resulted in an even more impressive AI + if (raySensor) { if (raySensor.RaySensor != null) @@ -250,6 +284,9 @@ public override void OnActionReceived(ActionBuffers actionBuffers) } } */ + + // this reward was not as good as I had hoped and did not use it much + /* if (raySensor.RaySensor.RayPerceptionOutput.RayOutputs[0].HasHit || raySensor.RaySensor.RayPerceptionOutput.RayOutputs[1].HasHit @@ -287,7 +324,8 @@ public override void OnEpisodeBegin() { base.OnEpisodeBegin(); - // make sure everyone is on a different team, i.e. every man for himself + // set the team ID to random so AI's don't avoid shooting each other to avoid a team loss, + // not sure if this can be done in the other function or not so I'll put in both places just to be sure GetComponent().TeamId = (int)Random.Range(0.0f, 100.0f); raySensor = transform.gameObject.GetComponentInChildren(); @@ -304,6 +342,8 @@ public override void OnEpisodeBegin() } } + // I added this to try and end early and reward a long lived AI, but may have push some + // AI's to avoid taking chances and sitting around waiting for this reward public void EpisodeEndGood() { Debug.Log("reward survived for lifetime " + 0.2f); @@ -311,6 +351,7 @@ public void EpisodeEndGood() EndEpisode(); } + // this is the lose reward, AI rocket got hit with a rock or a shot public void EpisodeEndBad() { Debug.Log("Negative reward for dying " + -1.0f); @@ -318,6 +359,9 @@ public void EpisodeEndBad() EndEpisode(); } + // used this to create demos to use with the GAIL learning module, needs quite a bit of recordings to be helpful however + // seemed to result in better behaved AIs when not using other rewards. + public override void Heuristic(in ActionBuffers actionsOut) { base.Heuristic(actionsOut); diff --git a/Assets/RocketSphereAI2.prefab b/Assets/RocketSphereAI2.prefab index 1d9c65e..92f2645 100644 Binary files a/Assets/RocketSphereAI2.prefab and b/Assets/RocketSphereAI2.prefab differ diff --git a/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset b/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset index f0567be..cc469fa 100644 Binary files a/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset and b/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset differ diff --git a/Assets/ShotSphere.cs b/Assets/ShotSphere.cs index 31d6d78..819030d 100644 --- a/Assets/ShotSphere.cs +++ b/Assets/ShotSphere.cs @@ -43,8 +43,8 @@ void Start() [ServerCallback] void OnTriggerEnter(Collider other) { - // check if we are running into our own shots - if (other.transform.gameObject == transform.gameObject) + // check if rockets are running into their own shots or if this is also a shot + if ((other.transform.gameObject == transform.gameObject) || other.CompareTag("Shot")) { // ignore our own shots return; diff --git a/ProjectSettings/GraphicsSettings.asset b/ProjectSettings/GraphicsSettings.asset index e8a7538..3027a39 100644 Binary files a/ProjectSettings/GraphicsSettings.asset and b/ProjectSettings/GraphicsSettings.asset differ diff --git a/ProjectSettings/ProjectSettings.asset b/ProjectSettings/ProjectSettings.asset index 0e33921..b62eed5 100644 Binary files a/ProjectSettings/ProjectSettings.asset and b/ProjectSettings/ProjectSettings.asset differ