{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":683573931,"defaultBranch":"main","name":"Awesome-LLM-Inference","ownerLogin":"DefTruth","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-08-27T02:32:15.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/31974251?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1718845922.0","currentOid":""},"activityList":{"items":[{"before":"cdb678cacfe26435f12d6a84def170cd098011b5","after":"240e82a11da6fd06f88a06db241af33e312efce6","ref":"refs/heads/main","pushedAt":"2024-07-03T01:11:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"llama.cpp: Inference of Meta's LLaMA model (and others) in pure C/C++(@ggerganov)","shortMessageHtmlLink":"llama.cpp: Inference of Meta's LLaMA model (and others) in pure C/C++(@…"}},{"before":"0a5d08278e7766b97210385df62293f0632180b3","after":"cdb678cacfe26435f12d6a84def170cd098011b5","ref":"refs/heads/main","pushedAt":"2024-07-03T01:02:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Mooncake: A KVCache-centric Disaggregated Architecture for LLM Serving(@Moonshot AI)","shortMessageHtmlLink":"Mooncake: A KVCache-centric Disaggregated Architecture for LLM Serving(…"}},{"before":"e15fb1d792c3a9b53b895b1fd45d4890ca4778ba","after":"0a5d08278e7766b97210385df62293f0632180b3","ref":"refs/heads/main","pushedAt":"2024-06-30T09:20:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[OutlierTune] OutlierTune: Efficient Channel-Wise Quantization for Large Language Models(@Beijing University)","shortMessageHtmlLink":"πŸ”₯[OutlierTune] OutlierTune: Efficient Channel-Wise Quantization for L…"}},{"before":"73711a36497fe91dec0de21543a650c3db19c54b","after":"e15fb1d792c3a9b53b895b1fd45d4890ca4778ba","ref":"refs/heads/main","pushedAt":"2024-06-30T08:40:41.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[MLC-LLM]Universal LLM Deployment Engine with ML Compilation(@mlc-ai)","shortMessageHtmlLink":"πŸ”₯[MLC-LLM]Universal LLM Deployment Engine with ML Compilation(@mlc-ai)"}},{"before":"83c7e51dc7acdaae0fa6a602bb1d3972bbf07f60","after":"73711a36497fe91dec0de21543a650c3db19c54b","ref":"refs/heads/main","pushedAt":"2024-06-30T03:15:21.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"9b60efb01a3fe7635371aff534fc47a941abea09","after":"83c7e51dc7acdaae0fa6a602bb1d3972bbf07f60","ref":"refs/heads/main","pushedAt":"2024-06-30T02:14:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"2d3e71a527527f3d1a424300190645dfb957a8bb","after":"9b60efb01a3fe7635371aff534fc47a941abea09","ref":"refs/heads/main","pushedAt":"2024-06-30T02:14:10.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"b4d176b85ac5806f2b5d7e7ead1c164babb29b32","after":"2d3e71a527527f3d1a424300190645dfb957a8bb","ref":"refs/heads/main","pushedAt":"2024-06-29T07:39:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"abb8accc2d934bf89138ff68197d1b73f1815e9e","after":"b4d176b85ac5806f2b5d7e7ead1c164babb29b32","ref":"refs/heads/main","pushedAt":"2024-06-29T06:36:57.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"[Mooncake] Mooncake: A KVCache-centric Disaggregated Architecture for LLM Serving(@Moonshot AI)","shortMessageHtmlLink":"[Mooncake] Mooncake: A KVCache-centric Disaggregated Architecture for…"}},{"before":"d8dd0c168d6cd2b621c9286bafc56d8aacaf12a8","after":"abb8accc2d934bf89138ff68197d1b73f1815e9e","ref":"refs/heads/main","pushedAt":"2024-06-29T03:00:59.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"143b8584a2211830953fa23c814ae7d5f41521ea","after":"d8dd0c168d6cd2b621c9286bafc56d8aacaf12a8","ref":"refs/heads/main","pushedAt":"2024-06-28T01:58:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[LOOK-M] LOOK-M: Look-Once Optimization in KV Cache for Efficient Multimodal Long-Context Inference(@osu.edu etc)","shortMessageHtmlLink":"πŸ”₯[LOOK-M] LOOK-M: Look-Once Optimization in KV Cache for Efficient Mu…"}},{"before":"33833fb7f550eae9f1fe5d34a27c09de4a4aa540","after":"143b8584a2211830953fa23c814ae7d5f41521ea","ref":"refs/heads/main","pushedAt":"2024-06-24T14:21:01.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"liyucheng09","name":null,"path":"/liyucheng09","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/27999909?s=80&v=4"},"commit":{"message":"Merge pull request #19 from liyucheng09/main\n\n[MoA] MoA: Mixture of Sparse Attention for Automatic LLM Compression","shortMessageHtmlLink":"Merge pull request #19 from liyucheng09/main"}},{"before":"d86bf96191b306b7466990dc635ca11c6913d99b","after":"33833fb7f550eae9f1fe5d34a27c09de4a4aa540","ref":"refs/heads/main","pushedAt":"2024-06-20T01:11:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"96192e8a8d972cd9622af441824eca757edbef44","after":"d86bf96191b306b7466990dc635ca11c6913d99b","ref":"refs/heads/main","pushedAt":"2024-06-15T05:48:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯πŸ”₯[**RWKV-CLIP**] RWKV-CLIP: A Robust Vision-Language Representation Learner(@DeepGlint etc)","shortMessageHtmlLink":"πŸ”₯πŸ”₯[**RWKV-CLIP**] RWKV-CLIP: A Robust Vision-Language Representation …"}},{"before":"1760508eb841c3fd4b0b8e3cb08791a958cdfac0","after":"96192e8a8d972cd9622af441824eca757edbef44","ref":"refs/heads/main","pushedAt":"2024-06-13T06:17:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[CompressKV] Effectively Compress KV Heads for LLM(@alibaba etc)","shortMessageHtmlLink":"πŸ”₯[CompressKV] Effectively Compress KV Heads for LLM(@alibaba etc)"}},{"before":"edfe64e36b9efe73024f3652d5c121816065d90b","after":"1760508eb841c3fd4b0b8e3cb08791a958cdfac0","ref":"refs/heads/main","pushedAt":"2024-06-12T02:05:01.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Merge pull request #18 from Kthyeon/main\n\nUpdate README.md","shortMessageHtmlLink":"Merge pull request #18 from Kthyeon/main"}},{"before":"71977627283fda306696e39738fa9ee93de37451","after":"edfe64e36b9efe73024f3652d5c121816065d90b","ref":"refs/heads/main","pushedAt":"2024-06-12T01:10:46.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"49ade184939e4fe62bddf2d7275dc71d6c7cd46a","after":"71977627283fda306696e39738fa9ee93de37451","ref":"refs/heads/main","pushedAt":"2024-06-05T01:45:17.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[I-LLM] I-LLM: Efficient Integer-Only Inference for Fully-Quantized Low-Bit Large Language Models(@Houmo AI)","shortMessageHtmlLink":"πŸ”₯[I-LLM] I-LLM: Efficient Integer-Only Inference for Fully-Quantized …"}},{"before":"14576cae786bedfe0bc0e975c3352de0137cc23d","after":"49ade184939e4fe62bddf2d7275dc71d6c7cd46a","ref":"refs/heads/main","pushedAt":"2024-06-03T01:44:34.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯πŸ”₯[DeFT] DeFT: Decoding with Flash Tree-Attention for Efficient Tree-structured LLM Inference(@Westlake University etc)","shortMessageHtmlLink":"πŸ”₯πŸ”₯[DeFT] DeFT: Decoding with Flash Tree-Attention for Efficient Tree-…"}},{"before":"296260510379cc95a6f302b80177b52ac2c4c308","after":"14576cae786bedfe0bc0e975c3352de0137cc23d","ref":"refs/heads/main","pushedAt":"2024-06-01T09:25:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"422672eb0ee8b925e3bfbbdff1708a035ee26e0d","after":"296260510379cc95a6f302b80177b52ac2c4c308","ref":"refs/heads/main","pushedAt":"2024-06-01T09:20:13.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"25fef5ccd1af170a22c1a339e89e8c8b7498f4c3","after":"422672eb0ee8b925e3bfbbdff1708a035ee26e0d","ref":"refs/heads/main","pushedAt":"2024-06-01T08:47:39.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Add many LLM Inference papers","shortMessageHtmlLink":"Add many LLM Inference papers"}},{"before":"ab989bab9b39ffc3deb12b4ddb5e5611dc492441","after":"25fef5ccd1af170a22c1a339e89e8c8b7498f4c3","ref":"refs/heads/main","pushedAt":"2024-05-30T02:09:18.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[Instructive Decoding] INSTRUCTIVE DECODING: INSTRUCTION-TUNED LARGE LANGUAGE MODELS ARE SELF-REFINER FROM NOISY INSTRUCTIONS(@KAIST AI)","shortMessageHtmlLink":"πŸ”₯[Instructive Decoding] INSTRUCTIVE DECODING: INSTRUCTION-TUNED LARGE…"}},{"before":"790d27f317f68b1d953c89cc8779cf094a66c540","after":"ab989bab9b39ffc3deb12b4ddb5e5611dc492441","ref":"refs/heads/main","pushedAt":"2024-05-27T02:19:00.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯[ZipCache] ZipCache: Accurate and Efficient KV Cache Quantization with Salient Token Identification(@Zhejiang University etc)","shortMessageHtmlLink":"πŸ”₯[ZipCache] ZipCache: Accurate and Efficient KV Cache Quantization wi…"}},{"before":"9a4e04d04bb74c4a13cc3ff7a3ced8a9c7f4f7b7","after":"790d27f317f68b1d953c89cc8779cf094a66c540","ref":"refs/heads/main","pushedAt":"2024-05-27T02:15:20.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"32680bee00de2a59811091648b9a6dcf63103ddd","after":"9a4e04d04bb74c4a13cc3ff7a3ced8a9c7f4f7b7","ref":"refs/heads/main","pushedAt":"2024-05-25T06:13:55.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"52f1dbcc2adc5246208c46ef77d91d9111da1c0a","after":"32680bee00de2a59811091648b9a6dcf63103ddd","ref":"refs/heads/main","pushedAt":"2024-05-25T05:58:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Update README.md","shortMessageHtmlLink":"Update README.md"}},{"before":"271bd694f8a836d8c2cbffed8346a069247d2770","after":"52f1dbcc2adc5246208c46ef77d91d9111da1c0a","ref":"refs/heads/main","pushedAt":"2024-05-20T09:10:13.000Z","pushType":"pr_merge","commitsCount":2,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"Merge pull request #16 from KylinC/main\n\nupdate [Decoding Speculative Decoding] github repo","shortMessageHtmlLink":"Merge pull request #16 from KylinC/main"}},{"before":"a31b3c7f6a06ced107d6bcf0895d88b829292067","after":"271bd694f8a836d8c2cbffed8346a069247d2770","ref":"refs/heads/main","pushedAt":"2024-05-20T06:09:23.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯πŸ”₯[SQKV] SKVQ: Sliding-window Key and Value Cache Quantization for Large Language Models(@Shanghai AI Laboratory)","shortMessageHtmlLink":"πŸ”₯πŸ”₯[SQKV] SKVQ: Sliding-window Key and Value Cache Quantization for La…"}},{"before":"f6a6c0dc57b882e7ef62820e993341a08a3a606f","after":"a31b3c7f6a06ced107d6bcf0895d88b829292067","ref":"refs/heads/main","pushedAt":"2024-05-15T03:52:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"DefTruth","name":"DefTruth","path":"/DefTruth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/31974251?s=80&v=4"},"commit":{"message":"πŸ”₯πŸ”₯[YOCO] You Only Cache Once: Decoder-Decoder Architectures for Language Models(@Microsoft)","shortMessageHtmlLink":"πŸ”₯πŸ”₯[YOCO] You Only Cache Once: Decoder-Decoder Architectures for Langu…"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEdV3cdAA","startCursor":null,"endCursor":null}},"title":"Activity Β· DefTruth/Awesome-LLM-Inference"}