Prepare for release

plaidpants · Feb 21, 2022 · 56ffb33 · 56ffb33
1 parent 56de1b7
commit 56ffb33
Show file tree

Hide file tree

Showing 9 changed files with 64 additions and 4 deletions.
diff --git a/Assets/Resources/OVRBuildConfig.asset b/Assets/Resources/OVRBuildConfig.asset
diff --git a/Assets/RocketAI74.onnx b/Assets/RocketAI74.onnx
diff --git a/Assets/RocketAI74.onnx.meta b/Assets/RocketAI74.onnx.meta
diff --git a/Assets/RocketAIAgent.cs b/Assets/RocketAIAgent.cs
@@ -22,6 +22,9 @@ public override void Initialize()
         base.Initialize();
 
         lastPoints = 0;
+
+        // set the team ID to random so AI's don't avoid shooting each other to avoid a team loss,
+        // not sure if this can be done in the other function or not so I'll put in both places just to be sure
         GetComponent<BehaviorParameters>().TeamId = (int)Random.Range(0.0f, 100.0f);
     }
 
@@ -31,6 +34,16 @@ public override void CollectObservations(VectorSensor sensor)
 
         if (rocket)
         {
+            // What I learned is that you should not overfeed the AI learning with more data than
+            // it needs. Originaly I was trying to send it as much data as pssible but eventually
+            // it seems to work best with just the Ray Sensors 3d and the angular velocity of the rocket
+            // might make sense to send the rocket orientation in the same frame as the anugular velocity,
+            // but it doesn't seem to need it. Also I noticed that when training in the big sphere the rockets
+            // need to move faster and the learning sets the Unity3D Time base to 20, there was significat difference
+            // in learning when running so fast when the rockets need to move fast, learning worked much better whene
+            // Time base was set to a much lower number or even 1. Something to check if you find it's not training for
+            // some reaon.
+
             //sensor.AddObservation(transform.rotation.eulerAngles); // ship on the sphere
 
             /*
@@ -82,6 +95,10 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
 
         if (rocket)
         {
+            // I tried give little rewards for things I wanted the AI to do but ended up
+            // removing these as they did not help and could result in an AI doing things just
+            // for the little rewards
+
             rocket.horizontalInput = Mathf.Clamp(actionBuffers.ContinuousActions[0], -1f, 1f);
             // little reward turning
             //SetReward(0.001f * Mathf.Abs(rocket.horizontalInput));
@@ -99,15 +116,20 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
                 rocket.fireInput = false;
             }
 
+            // this is the only reward that matters, and in fact I just want to indicate a hit for 1.0 and
+            // NOT scale this reward depending on what was hit.
+
             if (rocket.points > lastPoints)
             {
                 // big reward if we hit something
                 reward = 1.0f; // 3.0f * (rocket.points - lastPoints);
                 SetReward(reward);
-                Debug.Log("Reward for points " + rocket.points + " last points " + lastPoints + " reward " + reward);
+                Debug.Log("Reward for points " /* + rocket.points + " last points " + lastPoints + " reward " */ + reward);
                 lastPoints = rocket.points;
             }
 
+            // More little rewards that weren't really helpful
+
             /*
             if (rocket.fireInput)
             {
@@ -126,6 +148,11 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
             */
         }
 
+        // I tried to keep the AI from sitting and spinning which was an aretifact of feeding the AI
+        // too much info above. This seemed to result in depressed AI that would sit and spin even when I
+        // made this negative reward grow exponentially. Negative reward that was consitant seems to be
+        // desired by the AI than trying to figure out too much input and not getting enough positive rewards
+
  /*
         if (rocket.countRotations != lastCountRotations)
         {
@@ -140,6 +167,9 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
             lastCountRotations = rocket.countRotations;
         }
  */
+
+        // This reward resulted in slower and more controllable rockets but is not adventagous when
+        // you need to move faster to get out of the way or find a new rock when there are not many left
  /*
         if (rb)
         {
@@ -175,6 +205,10 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
             }
         }
  */
+
+        // This set of rewards was actually my first glimpse of something that looked resonable but
+        // ultimately removing all this resulted in an even more impressive AI
+
         if (raySensor)
         {
             if (raySensor.RaySensor != null)
@@ -250,6 +284,9 @@ public override void OnActionReceived(ActionBuffers actionBuffers)
                                 }
                             }
                             */
+
+                            // this reward was not as good as I had hoped and did not use it much
+
                             /*
                             if (raySensor.RaySensor.RayPerceptionOutput.RayOutputs[0].HasHit 
                                 ||  raySensor.RaySensor.RayPerceptionOutput.RayOutputs[1].HasHit
@@ -287,7 +324,8 @@ public override void OnEpisodeBegin()
     {
         base.OnEpisodeBegin();
 
-        // make sure everyone is on a different team, i.e. every man for himself
+        // set the team ID to random so AI's don't avoid shooting each other to avoid a team loss,
+        // not sure if this can be done in the other function or not so I'll put in both places just to be sure
         GetComponent<BehaviorParameters>().TeamId = (int)Random.Range(0.0f, 100.0f);
 
         raySensor = transform.gameObject.GetComponentInChildren<RayPerceptionSensorComponent3D>();
@@ -304,20 +342,26 @@ public override void OnEpisodeBegin()
         }
     }
 
+    // I added this to try and end early and reward a long lived AI, but may have push some
+    // AI's to avoid taking chances and sitting around waiting for this reward
     public void EpisodeEndGood()
     {
         Debug.Log("reward survived for lifetime " + 0.2f);
         //SetReward(0.2f);
         EndEpisode();
     }
 
+    // this is the lose reward, AI rocket got hit with a rock or a shot
     public void EpisodeEndBad()
     {
         Debug.Log("Negative reward for dying " + -1.0f);
         SetReward(-1.0f);
         EndEpisode();
     }
 
+    // used this to create demos to use with the GAIL learning module, needs quite a bit of recordings to be helpful however
+    // seemed to result in better behaved AIs when not using other rewards.
+
     public override void Heuristic(in ActionBuffers actionsOut)
     {
         base.Heuristic(actionsOut);

diff --git a/Assets/RocketSphereAI2.prefab b/Assets/RocketSphereAI2.prefab
diff --git a/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset b/Assets/Rocks, Rockets And Rayguns.mlagents.settings.asset
diff --git a/Assets/ShotSphere.cs b/Assets/ShotSphere.cs
@@ -43,8 +43,8 @@ void Start()
     [ServerCallback]
     void OnTriggerEnter(Collider other)
     {
-        // check if we are running into our own shots
-        if (other.transform.gameObject == transform.gameObject)
+        // check if rockets are running into their own shots or if this is also a shot
+        if ((other.transform.gameObject == transform.gameObject) || other.CompareTag("Shot"))
         {
             // ignore our own shots
             return;

diff --git a/ProjectSettings/GraphicsSettings.asset b/ProjectSettings/GraphicsSettings.asset
diff --git a/ProjectSettings/ProjectSettings.asset b/ProjectSettings/ProjectSettings.asset