{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":745311984,"defaultBranch":"main","name":"self-rewarding-lm-pytorch","ownerLogin":"lucidrains","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2024-01-19T04:02:02.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/108653?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1712797782.0","currentOid":""},"activityList":{"items":[{"before":"d4755a2b4f6ccbd958a3e9880fa54a1ce895094f","after":"ebeca908cb355dff7ffbdbff54a200e3abdf47ab","ref":"refs/heads/main","pushedAt":"2024-04-11T01:09:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"0.2.12","shortMessageHtmlLink":"0.2.12"}},{"before":"2db4fed1a4c2a66e93b936352cd37b3807153936","after":"d4755a2b4f6ccbd958a3e9880fa54a1ce895094f","ref":"refs/heads/main","pushedAt":"2024-04-11T01:09:08.000Z","pushType":"pr_merge","commitsCount":3,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #29 from Control-derek/derek\n\nFixed deep copy, shallow copy error and label mask error.","shortMessageHtmlLink":"Merge pull request #29 from Control-derek/derek"}},{"before":"35016d08a3faaefe76c45dd7da28f85bc9152cc9","after":"2db4fed1a4c2a66e93b936352cd37b3807153936","ref":"refs/heads/main","pushedAt":"2024-03-29T17:17:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"0.2.11","shortMessageHtmlLink":"0.2.11"}},{"before":"41cb177221fa849b9494c56113e19183cb1776cd","after":"35016d08a3faaefe76c45dd7da28f85bc9152cc9","ref":"refs/heads/main","pushedAt":"2024-03-29T17:16:46.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #28 from Control-derek/derek\n\nSolves the problem that some variables are not declared","shortMessageHtmlLink":"Merge pull request #28 from Control-derek/derek"}},{"before":"8fb57636667f720d8da2bd5607ffc7307f3fb707","after":"41cb177221fa849b9494c56113e19183cb1776cd","ref":"refs/heads/main","pushedAt":"2024-03-29T13:42:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"0.2.10","shortMessageHtmlLink":"0.2.10"}},{"before":"f8b26fbf08bbd806af50b586efcce1eb54e53d16","after":"8fb57636667f720d8da2bd5607ffc7307f3fb707","ref":"refs/heads/main","pushedAt":"2024-03-29T13:41:51.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #27 from Control-derek/derek\n\nSolves the problem that some variables are not declared","shortMessageHtmlLink":"Merge pull request #27 from Control-derek/derek"}},{"before":"72291ff9c271487cfbf6d9bb3d0893016d81d6e6","after":"f8b26fbf08bbd806af50b586efcce1eb54e53d16","ref":"refs/heads/main","pushedAt":"2024-03-27T01:03:31.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"patch","shortMessageHtmlLink":"patch"}},{"before":"0649dfb9cbd4e214aa1041f46bdb5aaee4e5300d","after":"72291ff9c271487cfbf6d9bb3d0893016d81d6e6","ref":"refs/heads/main","pushedAt":"2024-03-27T01:03:14.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #26 from Control-derek/derek\n\nadd self.","shortMessageHtmlLink":"Merge pull request #26 from Control-derek/derek"}},{"before":"5934686ce19a3a6a5042abed0234645e24246b24","after":"0649dfb9cbd4e214aa1041f46bdb5aaee4e5300d","ref":"refs/heads/main","pushedAt":"2024-02-19T02:20:33.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix type error","shortMessageHtmlLink":"fix type error"}},{"before":"81fc3df92e3bff77b737a3428f49ff7de4dd0057","after":"5934686ce19a3a6a5042abed0234645e24246b24","ref":"refs/heads/main","pushedAt":"2024-02-19T02:20:07.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #19 from ViswanathaReddyGajjala/patch-1\n\nFix TypeError for is_valid_reward in SelfRewardDPOConfig","shortMessageHtmlLink":"Merge pull request #19 from ViswanathaReddyGajjala/patch-1"}},{"before":"fd5bc41e3a9bddd22c35fdf6243f81022402277b","after":"81fc3df92e3bff77b737a3428f49ff7de4dd0057","ref":"refs/heads/main","pushedAt":"2024-02-09T03:08:28.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"patch","shortMessageHtmlLink":"patch"}},{"before":"b4e17bae361fff8e6af696c0b98cec4c257941e1","after":"fd5bc41e3a9bddd22c35fdf6243f81022402277b","ref":"refs/heads/main","pushedAt":"2024-02-09T03:07:55.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"Merge pull request #17 from unaidedelf8777/patch-1\n\nUpdate self_rewarding_lm_pytorch.py","shortMessageHtmlLink":"Merge pull request #17 from unaidedelf8777/patch-1"}},{"before":"04ec0667f8eaaaf7844dfed141a2abdc37828846","after":"b4e17bae361fff8e6af696c0b98cec4c257941e1","ref":"refs/heads/main","pushedAt":"2024-02-08T14:55:42.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for an external LLM to play as reward model, as in OAIF","shortMessageHtmlLink":"allow for an external LLM to play as reward model, as in OAIF"}},{"before":"ec8b9112d4ced084ae7cacfe776e1ec01fa1f950","after":"04ec0667f8eaaaf7844dfed141a2abdc37828846","ref":"refs/heads/main","pushedAt":"2024-02-08T14:19:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow for an external LLM to play as reward model, as in DAP","shortMessageHtmlLink":"allow for an external LLM to play as reward model, as in DAP"}},{"before":"34c99a6f1a7b6472087925abb9ee8e9cf4e04bf2","after":"ec8b9112d4ced084ae7cacfe776e1ec01fa1f950","ref":"refs/heads/main","pushedAt":"2024-02-03T15:23:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"address https://github.com/lucidrains/self-rewarding-lm-pytorch/issues/15","shortMessageHtmlLink":"address #15"}},{"before":"bf546cdd61b3d85bcfc7a22641e925744d73e262","after":"34c99a6f1a7b6472087925abb9ee8e9cf4e04bf2","ref":"refs/heads/main","pushedAt":"2024-02-02T16:21:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"readme","shortMessageHtmlLink":"readme"}},{"before":"2ee0c4d6ddcd9521e2a390b379436894ba6ee7a3","after":"bf546cdd61b3d85bcfc7a22641e925744d73e262","ref":"refs/heads/main","pushedAt":"2024-02-01T19:29:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"fix misnamed hyperparameter, and add validation function for parsed reward, project management","shortMessageHtmlLink":"fix misnamed hyperparameter, and add validation function for parsed r…"}},{"before":"51b991c46798acb5981d4bd6c1cf28a358ca14b8","after":"2ee0c4d6ddcd9521e2a390b379436894ba6ee7a3","ref":"refs/heads/main","pushedAt":"2024-02-01T18:28:58.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"todo","shortMessageHtmlLink":"todo"}},{"before":"bded2ccf5783abec2734088781078240316f13db","after":"51b991c46798acb5981d4bd6c1cf28a358ca14b8","ref":"refs/heads/main","pushedAt":"2024-02-01T18:04:51.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"make sure nucleus sampling and its threshold is customizable","shortMessageHtmlLink":"make sure nucleus sampling and its threshold is customizable"}},{"before":"e9a582c52c0391319368dda60b002ff2a259c7bb","after":"bded2ccf5783abec2734088781078240316f13db","ref":"refs/heads/main","pushedAt":"2024-02-01T17:37:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"sft trainer auto concats multiple datasets","shortMessageHtmlLink":"sft trainer auto concats multiple datasets"}},{"before":"7c4ba1a86190b57561d8a1b7ed6e3dbf69e23bec","after":"e9a582c52c0391319368dda60b002ff2a259c7bb","ref":"refs/heads/main","pushedAt":"2024-02-01T17:35:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"save an import for researcher","shortMessageHtmlLink":"save an import for researcher"}},{"before":"465de3e3fc74b2230b6ac576e8b4c6135478549e","after":"7c4ba1a86190b57561d8a1b7ed6e3dbf69e23bec","ref":"refs/heads/main","pushedAt":"2024-02-01T17:29:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"generalize the system","shortMessageHtmlLink":"generalize the system"}},{"before":"0dcd7f2eb49bb4b6b24a32a3926f104bf463578a","after":"465de3e3fc74b2230b6ac576e8b4c6135478549e","ref":"refs/heads/main","pushedAt":"2024-02-01T15:46:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"take one more tiny step towards goal","shortMessageHtmlLink":"take one more tiny step towards goal"}},{"before":"f33215b7b4b1038ac469287447bf0963f3688646","after":"0dcd7f2eb49bb4b6b24a32a3926f104bf463578a","ref":"refs/heads/main","pushedAt":"2024-02-01T15:16:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"allow the creation of the self-reward dpo dataset to be called within dpo trainer, also move dropout into the trainers, so it can be customized per fine tuning stage","shortMessageHtmlLink":"allow the creation of the self-reward dpo dataset to be called within…"}},{"before":"1acf9f320f1777d55dacfa737e38162c44ac91cf","after":"f33215b7b4b1038ac469287447bf0963f3688646","ref":"refs/heads/main","pushedAt":"2024-01-31T18:45:36.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"gradient accumulation","shortMessageHtmlLink":"gradient accumulation"}},{"before":"602049deaf07b0a5661b8be7477b48f9e66b81c5","after":"1acf9f320f1777d55dacfa737e38162c44ac91cf","ref":"refs/heads/main","pushedAt":"2024-01-31T17:46:13.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"del memmap","shortMessageHtmlLink":"del memmap"}},{"before":"113c0ce20620430d608f30bd1a85e14d1ff4a52e","after":"602049deaf07b0a5661b8be7477b48f9e66b81c5","ref":"refs/heads/main","pushedAt":"2024-01-31T17:08:37.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"use separate folders for each iteration of dpo w/ early stopping","shortMessageHtmlLink":"use separate folders for each iteration of dpo w/ early stopping"}},{"before":"acdc5b6ffcf43158341a22750a673f139621679c","after":"113c0ce20620430d608f30bd1a85e14d1ff4a52e","ref":"refs/heads/main","pushedAt":"2024-01-31T16:56:24.000Z","pushType":"force_push","commitsCount":0,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"one more step towards arbitrary ordering of fine-tuning","shortMessageHtmlLink":"one more step towards arbitrary ordering of fine-tuning"}},{"before":"9673d9b9be9c61f9dacef7a27bffd6d8ef6430ba","after":"acdc5b6ffcf43158341a22750a673f139621679c","ref":"refs/heads/main","pushedAt":"2024-01-31T16:56:11.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"one more step towards arbitrary ordering of fine-tuning","shortMessageHtmlLink":"one more step towards arbitrary ordering of fine-tuning"}},{"before":"a83249f8b8163f70608dbcf99640e48c792b605a","after":"9673d9b9be9c61f9dacef7a27bffd6d8ef6430ba","ref":"refs/heads/main","pushedAt":"2024-01-31T16:47:02.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lucidrains","name":"Phil Wang","path":"/lucidrains","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/108653?s=80&v=4"},"commit":{"message":"update reference model with policy within the spin and dpo trainers themselves, to ready for arbitrary ordering of fine tuning steps","shortMessageHtmlLink":"update reference model with policy within the spin and dpo trainers t…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNC0xMVQwMTowOTozOS4wMDAwMDBazwAAAAQtm9uI","startCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wNC0xMVQwMTowOTozOS4wMDAwMDBazwAAAAQtm9uI","endCursor":"Y3Vyc29yOnYyOpK7MjAyNC0wMS0zMVQxNjo0NzowMi4wMDAwMDBazwAAAAPuQ53J"}},"title":"Activity · lucidrains/self-rewarding-lm-pytorch"}