<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[Into AI]]></title><description><![CDATA[Helping you become a 100X AI Engineer]]></description><link>https://www.intoai.pub</link><image><url>https://substackcdn.com/image/fetch/$s_!xBa1!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png</url><title>Into AI</title><link>https://www.intoai.pub</link></image><generator>Substack</generator><lastBuildDate>Sun, 05 Jul 2026 10:03:03 GMT</lastBuildDate><atom:link href="https://www.intoai.pub/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[Dr. Ashish Bamania]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[intoai@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[intoai@substack.com]]></itunes:email><itunes:name><![CDATA[Dr. Ashish Bamania]]></itunes:name></itunes:owner><itunes:author><![CDATA[Dr. Ashish Bamania]]></itunes:author><googleplay:owner><![CDATA[intoai@substack.com]]></googleplay:owner><googleplay:email><![CDATA[intoai@substack.com]]></googleplay:email><googleplay:author><![CDATA[Dr. Ashish Bamania]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[Your company can pay for Into AI (here's how)]]></title><description><![CDATA[A ready-to-send email template to expense your subscription.]]></description><link>https://www.intoai.pub/p/your-company-can-pay-for-into-ai</link><guid isPermaLink="false">https://www.intoai.pub/p/your-company-can-pay-for-into-ai</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sat, 04 Jul 2026 13:12:53 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!4eOW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4eOW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4eOW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 424w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 848w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 1272w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4eOW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif" width="1456" height="971" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:971,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:211754,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/avif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/205046443?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4eOW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 424w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 848w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 1272w, https://substackcdn.com/image/fetch/$s_!4eOW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F23c77f59-f2fc-4dd3-a2c1-82e5a23b7997_1740x1160.avif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://unsplash.com/photos/man-in-blue-crew-neck-t-shirt-standing-beside-woman-in-orange-tank-top-H4ClLKv3pqw">Source</a></figcaption></figure></div><p>&#128075;&#127995; Hey friend!</p><p>Nearly every company sets aside a learning &amp; development budget for books, courses, conferences, and technical subscriptions, and every year a large chunk of it goes unspent.</p><p>Into AI fits perfectly into this category.</p><p>Reading lessons in AI foundations, staying up to date with the latest research, and knowing how to implement it aren't side hobbies but make you and your team better at your job.</p><p>If you've been paying for Into AI yourself, or if you've been meaning to upgrade to a paid plan, there's a good chance you don't have to spend your own money at all.</p><p>You could follow either of these two approaches when you become a paid member:</p><p>1.  You&#8217;ll get a payment receipt which you can forward straight to your manager for reimbursement.</p><p>2. Use the email template below, edit the details, and send it to your manager for approval.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://sprout-night-cc6.notion.site/Email-template-for-Expenses-Reimbursement-393e94035b7b806baedcc5ea80e18fe0&quot;,&quot;text&quot;:&quot;Email template&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://sprout-night-cc6.notion.site/Email-template-for-Expenses-Reimbursement-393e94035b7b806baedcc5ea80e18fe0"><span>Email template</span></a></p><p>I also offer a 30% discount on group subscriptions for your team which you can grab below.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?group=true&quot;,&quot;text&quot;:&quot;30% group subscription discount &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?group=true"><span>30% group subscription discount &#8594;</span></a></p><p>Reply to this email directly if you have any questions, and I will be happy to answer them for you! &#128522;</p>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (21-30 June 26) 🗓️]]></title><description><![CDATA[The top 10 research papers of this week: GPT&#8209;5.6, Sonnet 5, Meta's real&#8209;time brain&#8209;to&#8209;text decoder, a 35B model that beats trillion&#8209;parameter LLMs & more!]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-21-30-june</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-21-30-june</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 03 Jul 2026 02:04:12 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/a484bd61-b51b-496d-8163-a2081766b48b_1863x1048.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>&#10024; Before we begin, I want to introduce you to a wonderful book called <strong>&#8220;30 Agents Every AI Engineer Must Build&#8221;.</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DsLh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DsLh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 424w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 848w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 1272w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DsLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png" width="1456" height="670" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:670,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:690341,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!DsLh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 424w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 848w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 1272w, https://substackcdn.com/image/fetch/$s_!DsLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F231205e7-cfe9-4cf3-9ea9-d1938dfe8987_2172x1000.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This book guides you through 30 real-world agent architectures, covering the core building blocks of perception, memory, reasoning, and planning, and teaches you LangChain and LangGraph to create agents across finance, legal, healthcare, and more.</p><p>It also helps you learn how to deploy, evaluate, and guard your agents to ensure that they perform well in production. Grab your copy today using the link below.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://packt.link/e0kRc&quot;,&quot;text&quot;:&quot;Build production ready AI agents &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://packt.link/e0kRc"><span>Build production ready AI agents &#8594;</span></a></p><div><hr></div><h3>10. Accurate Decoding of Natural Sentences from Non-Invasive Brain Recordings</h3><p>This research introduces <strong>Brain2Qwerty v2</strong>, an AI model that can decode natural sentences a person is typing from their magnetoencephalography (MEG) recordings in real time.<br><br>The model has an average word error rate of 39%, and for the best participant, it can accurately decode half of the sentences with one word error or less.</p><p>The model&#8217;s accuracy also improves log-linearly with more data. This means that increasing the number of recordings could close the gap with surgically implanted brain-computer interfaces, reaching accuracy levels that were once believed to be possible only with implants.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!75as!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!75as!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 424w, https://substackcdn.com/image/fetch/$s_!75as!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 848w, https://substackcdn.com/image/fetch/$s_!75as!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 1272w, https://substackcdn.com/image/fetch/$s_!75as!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!75as!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png" width="1456" height="708" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/efe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:708,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:466676,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!75as!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 424w, https://substackcdn.com/image/fetch/$s_!75as!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 848w, https://substackcdn.com/image/fetch/$s_!75as!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 1272w, https://substackcdn.com/image/fetch/$s_!75as!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fefe5ddc8-22d7-44fc-a2d0-401d555b9765_2510x1220.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://facebookresearch.github.io/brain2qwerty/assets/brain2qwerty_v2.pdf">using this link</a>.</p><div><hr></div><h3>9. GPT&#8209;5.6 Sol</h3><p>OpenAI released its GPT&#8209;5.6 series with three models:</p><ul><li><p>Sol (flagship model)</p></li><li><p>Terra (balanced model for everyday work)</p></li><li><p>Luna (a fast and affordable model)</p></li></ul><p>Among these, GPT&#8209;5.6 Sol is the strongest model for tough agentic work such as coding, scientific analysis, biological workflows, cybersecurity, and long-horizon tool use.</p><p><span>For coding workflows, GPT&#8209;5.6 Sol sets a new state of the art on Terminal&#8209;Bench 2.1.</span></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IX--!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IX--!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 424w, https://substackcdn.com/image/fetch/$s_!IX--!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 848w, https://substackcdn.com/image/fetch/$s_!IX--!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!IX--!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IX--!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png" width="1456" height="784" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:784,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:171861,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!IX--!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 424w, https://substackcdn.com/image/fetch/$s_!IX--!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 848w, https://substackcdn.com/image/fetch/$s_!IX--!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!IX--!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2943163b-43aa-4519-8aac-3ac09d2f6705_2058x1108.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><span>It also performs competitively with Mythos Preview using only a third of the output tokens </span>on <a href="https://exploitbench.ai/">ExploitBench</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!0L7N!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!0L7N!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 424w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 848w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 1272w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!0L7N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png" width="1456" height="910" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:910,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:149877,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!0L7N!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 424w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 848w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 1272w, https://substackcdn.com/image/fetch/$s_!0L7N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd998908f-f123-4cf7-9768-a86d7fb97b5b_2054x1284.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://openai.com/index/previewing-gpt-5-6-sol/">using this link</a>.</p><div><hr></div><h3>8. Sakana Fugu</h3><p>This research paper introduces <strong>Sakana</strong> <strong>Fugu</strong>, a family of orchestrator LLMs trained to understand user queries and dynamically create agentic scaffolds to solve them.</p><p>The two models in the Sakana Fugu family are: </p><ul><li><p>Fugu (balances performance with latency for everyday use)</p></li><li><p>Fugu-Ultra (prioritizes answer quality on the most difficult problems)</p></li></ul><p>Using adaptive scaffolds, these models achieve performance beyond that of any individual LLM agent, and reach SOTA results compared to other publicly accessible models across a wide range of challenging benchmarks (SWE-Bench Pro, Terminal Bench, LiveCodeBench, GPQA-Diamond, Humanity&#8217;s Last Exam, and CharXiv Reasoning).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!baYW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!baYW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 424w, https://substackcdn.com/image/fetch/$s_!baYW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 848w, https://substackcdn.com/image/fetch/$s_!baYW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 1272w, https://substackcdn.com/image/fetch/$s_!baYW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!baYW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png" width="1456" height="862" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:862,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:329815,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!baYW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 424w, https://substackcdn.com/image/fetch/$s_!baYW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 848w, https://substackcdn.com/image/fetch/$s_!baYW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 1272w, https://substackcdn.com/image/fetch/$s_!baYW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9625ee70-d1bc-4d43-99bc-f8d3a656a096_2182x1292.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.21228v2">using this link</a>.</p><div><hr></div><h3>7. The Red Queen G&#246;del Machine</h3><p>This research paper presents the <strong>Red Queen G&#246;del Machine (RQGM)</strong>, which is based on the evolutionary insight that species do not optimize against a static environment but adapt as their environments change with them.</p><p>Red Queen G&#246;del Machine (RQGM) is a framework for recursively self-improving agents in which both the agent and its evaluator evolve together rather than relying on a fixed benchmark.</p><p>It uses &#8220;Controlled utility evolution&#8221;, a technique that keeps evaluation constant within each epoch but allows it to change between epochs.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7Utc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7Utc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 424w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 848w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 1272w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7Utc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png" width="1456" height="698" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:698,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:567158,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!7Utc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 424w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 848w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 1272w, https://substackcdn.com/image/fetch/$s_!7Utc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F327c785a-46ed-46d9-997b-9912ee0a1ea6_2676x1282.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>RQGM improves test pass rates compared to previous SOTA by adding an agent-as-a-judge code-review signal while using 1.35&#215;-1.72&#215; fewer tokens because the reviewer is queried only once.</p><p>With RQGM, co-evolved scientific paper writing and reviewing agents reach 1.78&#215;&#8211;1.86&#215; higher acceptance rates under a diverse agent-as-a-judge panel, while co-evolved Olympiad-level proof writing and grading agents reach 9% higher ground-truth accuracy.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GiDc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GiDc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 424w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 848w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 1272w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GiDc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png" width="1456" height="755" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:755,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:612512,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!GiDc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 424w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 848w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 1272w, https://substackcdn.com/image/fetch/$s_!GiDc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c2173e7-197e-4aaf-9be8-85b326adda26_2546x1320.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.26294">using this link</a>.</p><div><hr></div><h3>6. OCR 4</h3><p>Mistral released OCR 4, which they claim to be the best OCR model to date. </p><p><span>Instead of just turning a document into text, OCR 4:</span></p><ul><li><p><span>Returns bounding boxes around text items</span></p></li><li><p><span>Classifies each box into titles, tables, equations, signatures, and more</span></p></li><li><p><span>Tells how sure it is about each extraction (inline confidence scores)</span></p></li></ul><p>The model also:</p><ul><li><p><span>Supports 170 languages across 10 language groups</span></p></li><li><p><span>Is small enough to run in a single container for fully self-hosted deployments</span></p></li><li><p><span>Costs $2-5 per 1,000 processed pages</span></p></li></ul><p>OCR 4 has a 72% average win rate across all leading OCR systems in blind human evaluations and achieves top scores on OlmOCRBench (85.20) and OmniDocBench (93.07).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iqSX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iqSX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 424w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 848w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 1272w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iqSX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp" width="1456" height="549" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:549,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iqSX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 424w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 848w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 1272w, https://substackcdn.com/image/fetch/$s_!iqSX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83d0eb01-cebb-4a05-a421-9f6faca3d95e_1920x724.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!um9r!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!um9r!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 424w, https://substackcdn.com/image/fetch/$s_!um9r!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 848w, https://substackcdn.com/image/fetch/$s_!um9r!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 1272w, https://substackcdn.com/image/fetch/$s_!um9r!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!um9r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp" width="1456" height="832" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:832,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!um9r!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 424w, https://substackcdn.com/image/fetch/$s_!um9r!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 848w, https://substackcdn.com/image/fetch/$s_!um9r!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 1272w, https://substackcdn.com/image/fetch/$s_!um9r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e395935-2836-43d6-99aa-a0ee04f89d3f_1920x1097.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://mistral.ai/news/ocr-4/">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>5. MOPD: Multi-Teacher On-Policy Distillation for Capability Integration in LLM Post-Training</h3><p>This research presents <strong>Multi-teacher On-Policy Distillation (MOPD)</strong>, a post-training method that combines the capabilities of multiple domain RL-trained teacher models into one student model.</p><p>The method first RL trains separate domain-expert/teacher models. Then, it distills them into the student using the student&#8217;s own on-policy rollouts. This eliminates exposure bias and gives a denser optimization signal than off-policy finetuning.</p><p>On Qwen3-30B-A3B, MOPD outperforms Mix-RL, Cascade RL, Off-Policy Finetune, and Param-Merge baselines, while inheriting nearly all of each teacher&#8217;s specialized capability. </p><p>It also enables parallel training of domain teachers, followed by merging their strengths into a single deployable model.</p><p>MOPD has been used in the post-training of <a href="https://mimo.xiaomi.com/blog/mimo-v2-flash">MiMo-V2-Flash</a>, a powerful, efficient, and ultra-fast foundation language model that particularly excels in reasoning, coding, and agentic scenarios.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LLIt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LLIt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 424w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 848w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 1272w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LLIt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png" width="1456" height="786" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:786,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:565495,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LLIt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 424w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 848w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 1272w, https://substackcdn.com/image/fetch/$s_!LLIt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa21b7367-be7d-4540-9600-6f5c51dd7934_2346x1266.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.30406">using this link</a>.</p><div><hr></div><h3>4. Ask, Don&#8217;t Judge: Binary Questions for Interpretable LLM Evaluation and Self-Improvement </h3><p>This research paper presents <strong>BINEVAL</strong>, an alternative to &#8220;LLM-as-a-judge&#8221; scoring, which often produces opaque scores that are hard to debug.</p><p>Instead of asking a model to provide one overall score, BINEVAL divides each evaluation criterion into multiple simple yes/no (binary) questions. </p><p>Given a task prompt, a meta-prompt generates fine-grained evaluation questions, and an LLM answers them independently for each output. It then scores each of these answers independently and combines them into interpretable, multi-dimensional scores.</p><p>This makes it easier to identify which checks failed, such as factual consistency, missing information, relevance, or redundancy.</p><p>This question-level feedback can also be used to iteratively improve evaluator prompts for summarization and generation.</p><p>Across multiple benchmarks such as SummEval, Topical-Chat, and QAGS, BINEVAL performs as well as or better than strong baselines such as UniEval and G-Eval, with particularly strong results on factual consistency.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B0R-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B0R-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 424w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 848w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 1272w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B0R-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png" width="1456" height="1043" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1043,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:485958,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B0R-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 424w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 848w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 1272w, https://substackcdn.com/image/fetch/$s_!B0R-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbce60a36-33d7-46b6-8af9-1e4051c6a82f_1848x1324.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.27226">using this link</a>.</p><div><hr></div><h3>3. DSpark: Confidence-Scheduled Speculative Decoding with Semi-Autoregressive Generation</h3><p><a href="https://www.intoai.pub/p/speculative-decoding-simply-explained?utm_source=publication-search">Speculative Decoding</a> speeds up LLM inference by using a small draft model that suggests tokens while the larger LLM checks them. </p><p>While the draft model is fast, it often loses token consistency due to a lack of inter-token dependencies. Also, indiscriminately verifying long drafts wastes compute, severely reducing throughput in high-concurrency serving systems.</p><p>This research paper proposes <strong>DSpark</strong>, a speculative decoding framework that combines high-throughput parallel generation with adaptive, load-aware verification.</p><p>DSpark uses:</p><ul><li><p>A semi-autoregressive architecture that adds a lightweight sequential dependency module to introduce intra-block dependency modeling and reduce suffix decay.</p></li><li><p>Confidence-scheduled verification to dynamically decide how many draft tokens to verify for each request based on the chance that the prefix will survive and the current throughput profile of the serving engine</p></li></ul><p>DSpark substantially improves the accepted length compared to SOTA autoregressive and parallel drafters across multiple offline benchmarks. </p><p>When deployed within the DeepSeek-V4 serving system under live user traffic, DSpark successfully reduces verification waste. It improves per-user generation speeds by 60-85% at matched throughput levels compared to the established production baseline (MTP-1). </p><p>By preventing severe throughput degradation under strict interactivity constraints, it also enables performance tiers that were previously unattainable, shifting the Pareto frontier of the DeepSeek-V4 serving system.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!0L-4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!0L-4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 424w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 848w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 1272w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!0L-4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png" width="1456" height="1208" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1208,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:332056,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!0L-4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 424w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 848w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 1272w, https://substackcdn.com/image/fetch/$s_!0L-4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcc8ccdca-6980-4554-9515-bc2d02dcf382_1526x1266.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://github.com/deepseek-ai/DeepSpec/blob/main/DSpark_paper.pdf">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>2. Claude Sonnet 5</h3><p>Anthropic released <strong>Claude Sonnet 5</strong>, their most agentic Sonnet model yet. The model is well suited for coding, tool use, browser and terminal workflows, long-running agentic workflows, and professional knowledge work. </p><p>The model substantially outperforms Sonnet 4.6 across multiple domains, and its performance is close to that of Claude Opus 4.8 on many benchmarks, but at a lower price.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Nyq_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Nyq_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 424w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 848w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 1272w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Nyq_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp" width="1456" height="691" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:691,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Claude Sonnet 5 benchmark table&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Claude Sonnet 5 benchmark table" title="Claude Sonnet 5 benchmark table" srcset="https://substackcdn.com/image/fetch/$s_!Nyq_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 424w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 848w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 1272w, https://substackcdn.com/image/fetch/$s_!Nyq_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9bb5045-67b5-4958-89bf-341951acecd9_2600x1234.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>It also has lower rates of hallucination, sycophancy, and unwanted behaviors compared to Sonnet 4.6, has better resistance to prompt injection, and comes with cyber safeguards that detect and block dangerous cyber activity in real time.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!y-ni!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!y-ni!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!y-ni!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Rates of misaligned behavior across Claude models&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Rates of misaligned behavior across Claude models" title="Rates of misaligned behavior across Claude models" srcset="https://substackcdn.com/image/fetch/$s_!y-ni!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!y-ni!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0b09ab8d-9ce5-4ad0-98e5-fb1b28836430_3840x2160.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://www.anthropic.com/news/claude-sonnet-5">using this link</a>.</p><div><hr></div><h3>1. Reaching Trillion-Parameter Performance with a 35B Agent</h3><p>This research introduces <strong>Agents-A1</strong>, a 35B <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Mixture-of-Experts</a> agentic model that achieves performance comparable to trillion-parameter models by expanding the agent horizon rather than model size/ parameters. </p><p>The authors build a long-horizon knowledge-action infrastructure that connects external knowledge, actions, observations, and verifier outcomes to produce agentic trajectories averaging 45K tokens.</p><p>Based on these, Agents-A1 is trained in three stages: </p><ul><li><p>Full-domain supervised fine-tuning to align the base model with broad agentic behaviors</p></li><li><p>Domain-level teacher models to capture specialized skills in each domain</p></li><li><p>Multi-teacher domain-routed on-policy distillation to improve knowledge transfer efficiency across different domains and combine six different domains into a single deployable student model</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xu_w!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xu_w!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 424w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 848w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 1272w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xu_w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png" width="1456" height="1045" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1045,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:550539,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/204110303?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xu_w!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 424w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 848w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 1272w, https://substackcdn.com/image/fetch/$s_!xu_w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1e998e13-9dd9-4589-b068-02a79cdc5da1_1904x1366.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The resulting model achieves strong performance on long-horizon agent benchmarks compared with 1T-parameter models such as Kimi-K2.6 and DeepSeek-V4-Pro.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wZGw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wZGw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 424w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 848w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 1272w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wZGw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg" width="1456" height="718" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:718,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;a1_benchmarks_altair_grid.svg&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="a1_benchmarks_altair_grid.svg" title="a1_benchmarks_altair_grid.svg" srcset="https://substackcdn.com/image/fetch/$s_!wZGw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 424w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 848w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 1272w, https://substackcdn.com/image/fetch/$s_!wZGw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2af4e86d-5e31-4e54-b926-10b5d68e8f21_1144x564.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.30616">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read. Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-21-30-june?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-21-30-june?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p>]]></content:encoded></item><item><title><![CDATA[NVIDIA GPU Ecosystem, Simply Explained]]></title><description><![CDATA[A guide to NVIDIA GPU architecture, interconnects, and scaling in plain English.]]></description><link>https://www.intoai.pub/p/what-every-ai-engineer-must-know-about-nvidia-gpus</link><guid isPermaLink="false">https://www.intoai.pub/p/what-every-ai-engineer-must-know-about-nvidia-gpus</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 30 Jun 2026 11:37:56 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/7bda3fd8-2e3e-43dc-9282-f16ca4d4bd86_2400x1260.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>&#10024; Before we begin, I want to introduce you to my book &#8220;<strong>LLMs In 100 Images</strong>&#8221;. &#10024; </p><p>Are you struggling to keep up with LLMs and developments around them? I wrote this book to exactly solve this for you!</p><p>&#8220;LLMs in 100 Images&#8221; turns the difficult parts of modern LLM systems (Attention variants, prompting techniques, sampling techniques, post-training algorithms, and more) into visuals, making them simple to understand.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!OFYu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!OFYu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 424w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 848w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!OFYu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png" width="1456" height="728" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:728,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:834004,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!OFYu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 424w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 848w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!OFYu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff1c4ce38-b3d9-48a5-8ec9-c89e61cc5917_6912x3456.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1Uwq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1Uwq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 424w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 848w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1Uwq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png" width="1456" height="728" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:728,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1Uwq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 424w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 848w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!1Uwq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2e494190-e1b7-4a7f-a1b7-c811db0887e8_6912x3456.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>I&#8217;m running a <strong>flash sale</strong> on the book for a very limited time, and you can grab your copy today at a <strong>30% discount</strong>!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://bamaniaashish.gumroad.com/l/llmbook/LLMFLASH30&quot;,&quot;text&quot;:&quot;Grab your 30% discount &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://bamaniaashish.gumroad.com/l/llmbook/LLMFLASH30"><span>Grab your 30% discount &#8594;</span></a></p><p>It&#8217;s now time for the lesson.</p><div><hr></div><p>GPUs are driving the current AI revolution, and understanding them well will make you invaluable in the AI ecosystem. In this lesson, we will learn about NVIDIA GPUs and the interconnects that scale them to massive data centers used for training and serving LLMs.</p><p>Let&#8217;s begin!</p><div><hr></div><h3>But first, what is a GPU and why is it needed?</h3><p>A GPU, or Graphics Processing Unit, is a specialized chip originally designed to render 3D graphics rapidly and efficiently.</p><p>But it soon became clear that the same mathematical operations (matrix addition and multiplication) used in graphics could also be used to train and serve AI. This made NVIDIA, a company that started as a chipmaker for video games, the leading company building GPUs for AI today.</p><p>GPUs are quite different from CPUs, which are the chips used for general processing in the computer. While CPUs contain a few powerful cores that are perfect for executing tasks with sequential and branching logic at very low latency, GPUs<strong> </strong><span>are</span><strong> </strong><span>the masters of parallel computing.</span></p><p><span>They have thousands of cores, each slower than a CPU core, but together they produce massive throughput for parallel computations, especially matrix or tensor operations. (</span><em><span>A matrix is a 2D tensor</span></em><span>).</span></p><p>A GPU has units called <strong>Streaming Multiprocessors (SMs)</strong>, where calculations actually take place. Each SM contains smaller specialized components called:</p><ul><li><p><strong>CUDA cores</strong>: that perform fast general mathematical operations</p></li><li><p><strong>Tensor cores</strong>: that perform fast matrix operations</p></li></ul><p>Alongside this, a GPU has two types of memory:</p><ol><li><p><strong>On-chip memory (L2 cache, L1 cache, and registers)</strong> that is physically etched on the GPU die</p></li><li><p><strong>High Bandwidth Memory (HBM)</strong>, generally called global memory or VRAM, that is mounted alongside the die</p></li></ol><p>HBM is a specialized type of <a href="https://en.wikipedia.org/wiki/Dynamic_random-access_memory"><span>Dynamic random-access memory (DRAM)</span></a> designed for massive parallel data throughput.</p><p><span>On-chip memory components are </span><a href="https://en.wikipedia.org/wiki/Static_random-access_memory"><span>Static random-access memory (SRAM)</span></a><span>, which is extremely fast but much smaller and more expensive than the GPU HBM.</span></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mIsv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mIsv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 424w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 848w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1272w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png" width="1456" height="659" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:659,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mIsv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 424w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 848w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1272w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A simplified architectural overview of a GPU (<a href="https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference">Source</a>)</figcaption></figure></div><p>If you&#8217;re interested in reading about how data flows through CPU and GPU during LLM inference, here is a lesson that will help:</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;b053e6a9-0e01-41a5-878b-35ecfb5836ea&quot;,&quot;caption&quot;:&quot;&#10024; Today&#8217;s newsletter edition is sponsored by Backplanes.&#10024;&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;A hardware-level tour of how LLMs generate text&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-06-22T23:51:37.217Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!4DwK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:201900247,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:64,&quot;comment_count&quot;:2,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>The NVIDIA ecosystem of GPUs</h3><p>NVIDIA GPUs come in different architectures, each named after a famous scientist or mathematician.</p><p><span>The company produces consumer-grade GPUs for graphics and gaming in its </span><a href="https://blogs.nvidia.com/blog/whats-the-difference-between-nvidia-rtx-and-gtx/"><span>GeForce GTX (older) and RTX series</span></a><span>. It also produces the </span><a href="https://www.nvidia.com/en-gb/products/workstations/professional-desktop-gpus/"><span>RTX PRO</span></a><span> (previously known as Quadro) series for commercial/ scientific/ creative workloads on workstations.</span></p><p><span>We won&#8217;t be discussing these GPU architectures in this lesson, but we'll focus more on data center-grade GPUs designed for deep learning/AI.</span></p><p><span>In 2017, NVIDIA introduced Tensor Cores and </span>FP16 mixed-precision training <span>for deep learning in its&nbsp;</span><a href="https://www.nvidia.com/en-gb/data-center/tesla-v100/"><span>Volta series</span></a><span>&nbsp;of GPUs (Tesla V100). </span>OpenAI used a cluster of these GPUs <a href="https://arxiv.org/pdf/2005.14165">to train GPT-3</a>.</p><p><span>Since then, NVIDIA has made its GPUs more performant, and some popular architectures with their flagship GPU models released are:</span></p><ul><li><p><strong>Turing</strong> series in 2018 (Tesla T4): Optimized for lower-precision integer (INT8/INT4) inference pipelines</p></li><li><p><strong>Ampere</strong> series in 2020 (A100): Used to train early LLMs and serve many GPT-3-scale models.</p></li><li><p><strong>Hopper</strong> series in 2022 (H100, H200): Introduced the <a href="https://github.com/NVIDIA/TransformerEngine">Transformer Engine</a> and FP8 precision, which were used in the training of <a href="https://www.intoai.pub/p/distributed-training-of-llama-explained">Llama 3</a> and similar models</p></li><li><p><strong>Ada Lovelace</strong> series in 2022 (L40, L40S): Designed for high-performance AI inference and graphics rendering tasks</p></li><li><p><strong>Blackwell</strong> series in 2024 (B100, B200): Built for training and serving trillion-parameter reasoning models with support for <a href="https://developer.nvidia.com/blog/introducing-nvfp4-for-efficient-and-accurate-low-precision-inference/">FP4 (4-bit floating-point) precision</a></p></li><li><p><strong>Blackwell Ultra</strong> series in 2025 (B300): Improved Blackwell generation with more memory and higher performance (~50% higher FP4 compute)</p></li><li><p><strong>Rubin</strong> series in 2026 (Rubin GPU): Designed to provide roughly double the FP4 compute (50 vs 20 PFLOPS) and GPU-to-GPU bandwidth (3.6 TB/s vs 1.8 TB/s) compared to the Blackwell series GPUs.</p></li><li><p><strong>Rubin Ultra </strong>(announced to be released in 2027)</p></li><li><p><strong>Feynman</strong> (announced to be released in 2028)</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kYeO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kYeO!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 424w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 848w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 1272w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kYeO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png" width="1456" height="1004" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1004,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:146809,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kYeO!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 424w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 848w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 1272w, https://substackcdn.com/image/fetch/$s_!kYeO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff09a9175-2570-457e-a850-de9d1370ed95_1638x1130.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>Understanding inter-GPU connections</h3><p>A popular GPU like H100 from the Hopper series has 80GB of HBM. A single such GPU is too small to fit the weights of today's trillion-parameter large reasoning models, which have hundreds of GBs of parameters. This is why multiple GPUs are connected to form a large server (also called a Node). This approach is called <strong>Vertical scaling</strong> or the &#8220;scaling-up&#8221; approach.</p><p>There are different ways GPUs can be connected in a server:</p><ul><li><p><strong>PCIe (Peripheral Component Interconnect Express): </strong>This is the general-purpose connection used inside servers to connect GPUs, network cards, SSDs, and other devices to the CPU. The 6th generation PCIe offers 256 GB/s of GPU-to-GPU bandwidth. PCIe makes GPU-to-GPU data take a slower path through the <a href="https://en.wikipedia.org/wiki/Root_complex">CPU's root complex</a>, which increases latency and reduces bandwidth.</p></li></ul><ul><li><p><strong>NVLink: </strong>This is NVIDIA&#8217;s proprietary communication channel that gives a dedicated high-speed data path between GPUs. The bidirectional bandwidth between two GPUs provided by different generations of NVLinks is as follows:</p><ul><li><p>900 GB/s on H100 (NVLink 4)</p></li><li><p>1.8 TB/s on B200 (NVLink 5)</p></li><li><p>3.6 TB/s on Rubin (NVLink 6)</p></li></ul></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JvhB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JvhB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 424w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 848w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 1272w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JvhB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Fifth Generation NVLink&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Fifth Generation NVLink" title="Fifth Generation NVLink" srcset="https://substackcdn.com/image/fetch/$s_!JvhB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 424w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 848w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 1272w, https://substackcdn.com/image/fetch/$s_!JvhB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F03ee51e9-8dc9-4145-9d0c-6251d97bdc17_1920x1080.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Inter-GPU bandwidth offered by various generations of NVIDIA NVLink (<a href="https://www.nvidia.com/en-gb/data-center/nvlink/">Source</a>)</figcaption></figure></div><p>This is massive compared to the 256 GB/s offered by the 6th-generation PCIe (a 14&#215; increase with NVLink 6)!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!utK6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!utK6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!utK6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!utK6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!utK6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!utK6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png" width="1456" height="665" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:665,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:118591,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!utK6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!utK6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!utK6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!utK6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2821e9d7-dd9f-4d3c-b920-6a5f84af288e_1674x764.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>NVLink has an issue, though, in that it splits each GPU&#8217;s total bandwidth among the GPUs it connects. Check out the following example, where each Rubin GPU splits its 3.6 TB/s bandwidth among the other three, resulting in 1.2 TB/s per connection.</p><p>The general formula for the effective inter-GPU bandwidth with NVLink is <code>B/N</code>, where</p><ul><li><p><code>B</code> is a GPU&#8217;s total NVLink bandwidth</p></li><li><p><code>N</code> is the number of GPUs it is connected to</p></li></ul><p>The solution in this case is an NVSwitch.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QEay!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QEay!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!QEay!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!QEay!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!QEay!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QEay!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png" width="1456" height="665" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:665,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:121567,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QEay!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!QEay!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!QEay!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!QEay!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa122cab7-e697-4e01-bf44-0e2b9697737d_1674x764.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p><strong><span>NVSwitch:&nbsp;</span></strong><span>This is a high-bandwidth, low-latency fabric that connects multiple GPUs within a system, ensuring GPUs can communicate with one another at full bandwidth simultaneously, regardless of how many are connected. T</span>his lets all the GPUs work together and act like a single massive GPU in a server.<br><br>It must be noted that NVSwitches are more expensive than other connections and might be unnecessary if you&#8217;re working with smaller (a few billion parameters) LLMs.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dMUy!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dMUy!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dMUy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png" width="1456" height="665" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:665,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:126391,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!dMUy!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 424w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 848w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 1272w, https://substackcdn.com/image/fetch/$s_!dMUy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3d346837-7882-4197-adab-0754f47e1fb7_1674x764.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>GPUs aren&#8217;t all that we need</h3><p>While GPUs are used for fast parallel operations during LLM training and inference, CPUs are needed to:</p><ul><li><p>preprocess and load/ unload data into GPUs</p></li><li><p>orchestrate jobs between GPUs</p></li><li><p>manage storage and networking</p></li><li><p>run other serial/ non-parallel operations</p></li></ul><p>If the CPUs used in the process are too slow, the expensive GPUs would sit idle most of the time. This is why NVIDIA has launched its own <a href="https://en.wikipedia.org/wiki/Arm_architecture_family">Arm</a>-based CPU models, namely:</p><ul><li><p><strong><a href="https://www.nvidia.com/en-gb/data-center/grace-cpu-superchip/">Grace</a></strong></p></li><li><p><strong><a href="https://www.nvidia.com/en-gb/data-center/vera-cpu/">Vera</a></strong></p></li></ul><p>These are used alongside GPUs and connected to them <span>using&nbsp;</span><strong><a href="https://www.nvidia.com/en-gb/data-center/nvlink-c2c/"><span>NVLink-C2C</span></a><span>&nbsp;</span></strong><span>(C2C stands for chip-to-chip</span>). This is a superfast CPU-to-GPU connection that replaces the slower PCIe connection and provides both the GPU and CPU with a unified memory space, allowing them to access each other&#8217;s memory directly without manual copying.</p><p>NVIDIA combines its GPUs and CPUs together in a server rack, with three popular ones being:</p><ul><li><p><strong>GB200 NVL72: </strong>Combines 72 Blackwell GPUs and 36 Grace CPUs</p></li><li><p><strong>GB300 NVL72:</strong> Combines 72 Blackwell Ultra GPUs and 36 Grace CPUs </p></li><li><p><strong>Vera Rubin NVL72: </strong>Combines 72 Rubin GPUs and 36 Vera CPUs</p></li></ul><p>(<strong>GB</strong> stands for Grace-Blackwell, and <strong>NVL</strong> tells how many GPUs are connected using NVLink/NVSwitch.)</p><p>But these racks aren&#8217;t the only way NVIDIA packages these components. NVIDIA also makes multi-GPU (no CPU included) baseboards in its <strong>HGX series</strong>, with the popular ones being: </p><ul><li><p><strong>HGX A100: </strong>Combines 4, 8, or 16 A100 GPUs</p></li><li><p><strong>HGX H100: </strong>Combines 4 or 8 H100 GPUs</p></li><li><p><strong>HGX Rubin NVL8: </strong>Combines 8 Rubin GPUs</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DtWp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DtWp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 424w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 848w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1272w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png" width="1456" height="465" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:465,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DtWp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 424w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 848w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1272w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">NVIDIA HGX H100 server with GPUs connected using NVSwitch/ NVLink (<a href="https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference">Source</a>)</figcaption></figure></div><p>In the <strong>DGX</strong> <strong>series</strong>, NVIDIA combines GPUs, CPUs, memory, networking, storage, cooling, and software into a complete AI system (for example, the DGX H100 server).</p><p><span>Its&nbsp;</span><strong><span>MGX</span></strong><span>&nbsp;</span><strong><span>series</span></strong><span> is a modular server architecture in which the above-described components are swappable, letting server makers to combine different options to build custom server configurations.</span></p><div><hr></div><h3>Connecting servers &amp; scaling horizontally</h3><p>There&#8217;s a limit to how much we can vertically scale GPUs. This is due to the chip's physical constraints, as well as power and cooling requirements. In such a case, multiple servers are connected in an approach called <strong>Horizontal scaling</strong> or the &#8220;scaling-out&#8221; approach.</p><p>Although data transfer speed is lower than with vertically scaled GPUs, the benefit of horizontal scaling is that one can theoretically connect any number of GPUs together. </p><p>NVIDIA offers two high-throughput connections for horizontal scaling, <span>both of which use&nbsp;</span><strong><a href="https://developer.nvidia.com/gpudirect"><span>GPUDirect</span></a></strong><a href="https://developer.nvidia.com/gpudirect"><span>&nbsp;</span></a><strong><a href="https://developer.nvidia.com/gpudirect"><span>RDMA (Remote Direct Memory Access)</span></a></strong><span>, which lets GPUs exchange data directly from&nbsp;</span>memory while bypassing the CPU and OS.</p><ol><li><p><strong>Spectrum-X Ethernet:</strong> This is the standard AI-tuned Ethernet connection in a data center that offers high performance in AI workflows. It uses <strong><a href="https://en.wikipedia.org/wiki/RDMA_over_Converged_Ethernet">RoCE (RDMA over Converged Ethernet)</a></strong>, a protocol that helps run RDMA over an Ethernet network.</p></li><li><p><strong>Quantum InfiniBand:</strong>&nbsp;This is a specialized connection that delivers ultra-low latency for high-end LLM training data centers. It uses&nbsp;RDMA natively without using RoCE.</p></li></ol><p>Both have a throughput of about 100 GB/s per connection, which is far lower than that of NVLink with NVSwitch (3.6 TB/s per connection). This means that workloads that require super-fast data transfer must be kept between vertically scaled GPUs, while the others could be directed to GPUs shared across servers.</p><p>For example, during training, Tensor parallelism (TP) is implemented within GPUs on a single server. On the other hand, Data parallelism (DP) and Pipeline parallelism (PP) are implemented across servers as they exchange data less often.</p><p><a href="https://developer.nvidia.com/nccl">NVIDIA Collective Communications Library (NCCL)</a> handles and coordinates data exchange between GPUs, automatically routing each workload to the fastest available link.</p><p>If you&#8217;re new to the terms TP, DP, and PP, the following lesson would help:</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;d6820aa4-0bdd-43bf-af85-a5336aec2527&quot;,&quot;caption&quot;:&quot;Understanding distributed setups for LLM training and inference is one of the biggest advantages that you can have as an engineer today. This is what we will work towards in this lesson by studying how Meta&#8217;s Llama 3 models were trained in a distributed setting.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Distributed Training of Llama, Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-06-05T11:27:54.327Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!k_v2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/distributed-training-of-llama-explained&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:200488145,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:15,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>The hierarchy of GPU connections</h3><p>Revisiting what we discussed before, GPUs are arranged hierarchically in large data centers as follows:</p><ul><li><p>Server or Node (4 to 8 GPUs)</p></li><li><p>Server rack (10s to 100s GPUs)</p></li><li><p>Cluster or Pod (100s to 1000s GPUs)</p></li><li><p>Data center (10,000 to 100,000 GPUs)</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wCP6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wCP6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 424w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 848w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 1272w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wCP6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png" width="1456" height="402" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:402,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:147510,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201598182?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wCP6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 424w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 848w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 1272w, https://substackcdn.com/image/fetch/$s_!wCP6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ed18331-e59a-40ba-b78b-5fa07491b793_2722x752.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>An example of a massive data center is <strong><a href="https://x.ai/colossus">Colossus</a></strong><span>, developed by </span>xAI<span> primarily for training Grok. Today, this data center also powers LLMs and research labs at Anthropic, Google, and </span>Reflection AI. Colossus consists of 200,000 H100 GPUs and is intended to be scaled to 1 million GPUs (including H100, H200, and GB200).</p><div><hr></div><h3><strong>TL;DR</strong></h3><p>To summarise:</p><ul><li><p>GPUs, initially designed for graphics rendering, are now the backbone of the current AI ecosystem, thanks to their ability to perform large numbers of matrix operations in parallel.</p></li><li><p>While CPUs have few cores that can perform sequential work faster than GPUs, GPUs have thousands of slower cores that, overall, deliver massive parallel throughput.</p></li><li><p>A GPU is made up of Streaming Multiprocessors (SMs) containing CUDA cores (for general math operations) and Tensor cores (for matrix math operations).</p></li><li><p>A GPU has smaller but faster on-chip SRAM (L1 cache/L2 cache/registers) and large off-chip HBM/VRAM, which is a type of DRAM.</p></li><li><p>The most commonly used NVIDIA GPU series today are Ampere, Hopper, Blackwell, and Rubin.</p></li><li><p>GPUs are connected in a single server using PCIe, NVLink, or NVSwitch. This approach is called Vertical scaling.</p></li><li><p>NVIDIA pairs its GPUs with its own Arm-based CPUs (Grace and Vera) using NVLink-C2C, a high-speed connection that provides both chips with a unified memory space.</p></li><li><p>Multiple servers are connected together using Spectrum-X Ethernet or Quantum InfiniBand to build massive data centers. This approach is called Horizontal scaling.</p></li></ul><div><hr></div><p>This article is completely free to read. Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/what-every-ai-engineer-must-know-about-nvidia-gpus?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/what-every-ai-engineer-must-know-about-nvidia-gpus?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p>Also, don&#8217;t forget to grab your copy of &#8220;<strong>LLMs In 100 Images</strong>&#8221; at a <strong>30% discount</strong>! &#10024; </p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://bamaniaashish.gumroad.com/l/llmbook/LLMFLASH30&quot;,&quot;text&quot;:&quot;Grab your 30% discount &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://bamaniaashish.gumroad.com/l/llmbook/LLMFLASH30"><span>Grab your 30% discount &#8594;</span></a></p>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (14-20 June 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-14-20-june</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-14-20-june</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 24 Jun 2026 23:27:49 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/27264d55-cd49-436d-a834-53632a08e2b4_1448x1086.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>&#10024; Before we begin, I want to introduce you to a wonderful book called &#8216;<strong>RAG from First Principles&#8217;</strong>.</p><p>While most developers can spin up a RAG pipeline in an afternoon using LangChain or LlamaIndex, very few understand its internals well and know how to fix it when things go wrong. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tkbp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tkbp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 424w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 848w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tkbp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png" width="725.46875" height="411.06574089972526" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:825,&quot;width&quot;:1456,&quot;resizeWidth&quot;:725.46875,&quot;bytes&quot;:293940,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tkbp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 424w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 848w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!tkbp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffe03a781-519b-4f76-b6a4-b68140d6bc99_1956x1108.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#8216;RAG from First Principles&#8217;</strong> takes the RAG stack apart layer by layer and teaches you about ingestion, chunking, embeddings, vector indexes, hybrid search, reranking, and evaluation. </p><p>Each chapter answers the questions practitioners actually hit in production, building from data import all the way to GraphRAG, Agentic RAG, and Modular RAG.</p><p>By the end, you&#8217;ll be able to optimize, debug, and extend your RAG systems with confidence and not guesswork.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://packt.link/T2CIA&quot;,&quot;text&quot;:&quot;Master RAG today &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://packt.link/T2CIA"><span>Master RAG today &#8594;</span></a></p><div><hr></div><h3>10. <span>ENPIRE: Agentic Robot Policy Self-Improvement in the Real World</span></h3><p>This research from NVIDIA introduces <strong>ENPIRE</strong>, a framework for coding agents that enables them to autonomously improve robot policies in real-world settings. </p><p>ENPIRE has four core modules:</p><ol><li><p>Environment module (<strong>EN</strong>) that automatically resets the scene and checks whether a task succeeded</p></li><li><p>Policy Improvement module (<strong>PI</strong>) that launches policy refinement</p></li><li><p>Rollout module (<strong>R</strong>) that evaluates policies with single or multiple physical robots operating in parallel</p></li><li><p>Evolution module (<strong>E</strong>) that lets coding agents analyze logs, consult literature, improve training infrastructure, and algorithm code to fix failures</p></li></ol><p>Using ENPIRE, frontier coding agents can autonomously build a policy that achieves a 99% success rate on challenging dexterous manipulation tasks such as <a href="https://github.com/huggingface/gym-pusht">PushT</a>, organizing pins into a pin box, and using a cutter to cut a zip tie.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8P8D!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8P8D!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 424w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 848w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 1272w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8P8D!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png" width="1456" height="1329" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1329,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:933772,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8P8D!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 424w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 848w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 1272w, https://substackcdn.com/image/fetch/$s_!8P8D!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb4c41e42-f17d-49d7-876f-eabd212553b5_1468x1340.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.19980">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>9. Looped World Models</h3><p>World models today have significant computational requirements to run faithful long-horizon simulations. This makes them expensive to deploy and prone to compounding errors.</p><p>This research addresses this by introducing <strong>Looped World Models (LoopWM)</strong>, which use looped, parameter-shared transformer blocks to refine latent environment states through repeated internal iterations, rather than adding multiple separate layers.</p><p>This leads to ~100&#215; parameter efficiency over conventional approaches with adaptive computation that automatically scales depth to match the complexity of each prediction step.</p><p>LoopWM introduces iterative latent depth as a new scaling dimension for world simulation, rather than increasing model size or training data.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!n0jK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!n0jK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 424w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 848w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 1272w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!n0jK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png" width="1456" height="762" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:762,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2861419,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!n0jK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 424w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 848w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 1272w, https://substackcdn.com/image/fetch/$s_!n0jK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2db60d2d-f270-47f5-9ea2-4c26e84a7ca8_2426x1270.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.18208">using this link</a>.</p><div><hr></div><h3>8. Surpassing Frontier Performance with Fusion</h3><p><span>This research from OpenRouter introduces&nbsp;</span><strong><span>Fusion</span></strong><span>, an approach&nbsp;that allows selecting a panel of participant models alongside a judge model. It then sends a prompt to multiple participant models in parallel and uses the judge model to combine their answers into a single stronger response.</span></p><p><span>The results show </span>that:</p><ol><li><p>Panels of models consistently outperform individual models</p></li><li><p>Frontier panels can achieve beyond-frontier performance</p></li><li><p>Panels of budget models can beat frontier models and get close to frontier panel performance</p></li></ol><p>Two notable examples from the results are:</p><ul><li><p><span>Fable 5 + GPT-5.5 scores 69% on OpenRouter&#8217;s&nbsp;</span><a href="https://arxiv.org/abs/2602.11685"><span>DRACO deep-research benchmark,</span></a><span>&nbsp;while Fable 5 alone scores 65.3% on this benchmark.</span></p></li><li><p>A budget panel of Gemini 3 Flash, Kimi K2.6, and DeepSeek V4 Pro outperforms GPT-5.5 and Opus 4.8. It also scores within 1% of Fable 5&#8217;s score while costing half as much.</p></li></ul><p>Although highly performant, it must be noted that this method is slower, costlier, and not a drop-in replacement for coding or long-horizon agents.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pcSB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pcSB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 424w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 848w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 1272w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pcSB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png" width="1024" height="714" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:714,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;DRACO benchmark scores for Fusion and solo configurations&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="DRACO benchmark scores for Fusion and solo configurations" title="DRACO benchmark scores for Fusion and solo configurations" srcset="https://substackcdn.com/image/fetch/$s_!pcSB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 424w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 848w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 1272w, https://substackcdn.com/image/fetch/$s_!pcSB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38f49fe6-a76b-443e-a857-a552296fcb72_1024x714.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this method <a href="https://openrouter.ai/blog/announcements/fusion-beats-frontier/">using this link</a>.</p><div><hr></div><h3>7. Next-Latent Prediction Transformers Learn Compact World Models</h3><p><span>This research paper introduces&nbsp;</span><strong><span>NextLatent Prediction (NextLat)</span></strong><span>, which adds a self-supervised&nbsp;</span><strong><span>next-latent prediction</span></strong><span>&nbsp;loss to transformers, helping them learn latent representations that predict the next latent state given the next token.</span></p><p>These latest representations form &#8220;belief states&#8221;, which are compressed information about the history necessary to predict the future (compact internal world models).</p><p>Across benchmarks in world modeling, reasoning, planning, and language modeling, NextLat leads to significant gains over standard next-token prediction and other baselines in downstream accuracy, representation compression, and lookahead planning. </p><p>It also enables variable-length self-speculative decoding, improving inference by up to 3.3&#215; in language modeling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MWUe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MWUe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 424w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 848w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 1272w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MWUe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png" width="1456" height="596" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:596,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:334777,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MWUe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 424w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 848w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 1272w, https://substackcdn.com/image/fetch/$s_!MWUe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa59532a9-3248-4d0f-9817-a16319354fd8_2478x1014.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2511.05963">using this link</a>.</p><div><hr></div><h3>6. HumanScale: Egocentric Human Video Can Outperform Real-Robot Data for Embodied Pretraining</h3><p>This research paper presents a systematic study that compares egocentric human video with teleoperated real-robot trajectories as pretraining data for embodied robot foundation models.</p><p>Human egocentric data is not only scalable, substantially lower-cost, and more diverse than teleoperated real-robot data, but under fixed pretraining, post-training, and validation protocols, it also leads to superior performance.</p><p>With the same amount of pretraining data, models pretrained on egocentric data achieve:</p><ul><li><p>24% lower validation loss on real-robot action prediction, </p></li><li><p>52.5% higher success rates on in-distribution real-robot task execution</p></li><li><p>90% higher success rates on out-of-distribution real-robot task execution</p></li></ul><p>This suggests that the best approach to training an embodied foundation model is to pretrain on egocentric human video to learn diverse world representations, then adapt using a small amount of labeled real-robot data for action-space alignment.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!5-zm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!5-zm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 424w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 848w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!5-zm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png" width="1456" height="1005" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1005,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:805676,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!5-zm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 424w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 848w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!5-zm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F09276905-3d95-4ce3-9052-67086cc1e0a6_1854x1280.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.20521">using this link</a>.</p><div><hr></div><h3>5. Variable-Width Transformers</h3><p>Standard Transformers use the same width for all layers. This means that each layer has the same number of parameters and compute budget, even though they might have different functions and computational needs in language modeling. </p><p>This research paper from MIT changes this by introducing an X-shaped Transformer called the &#8220;<strong>&gt;&lt; former</strong>&#8221;, in which the early and late layers remain wide, while the middle layers are narrower.</p><p>This approach works surprisingly well and outperforms parameter-matched standard Transformers (ranging from 200M to 3B parameters) in terms of language modeling loss and on most downstream tasks. </p><p>This architecture also requires fewer overall FLOPs (a 22% reduction) and smaller KV cache memory and I/O costs (a 15% reduction).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3oUc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3oUc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 424w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 848w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 1272w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3oUc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png" width="1456" height="574" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:574,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:145056,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3oUc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 424w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 848w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 1272w, https://substackcdn.com/image/fetch/$s_!3oUc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9c21ecd-4416-4ab6-9186-2842de45502f_2354x928.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.18246v1">using this link</a>.</p><div><hr></div><h3>4. Kimi K2.7 Code</h3><p>Moonshot AI released <strong>Kimi K2.7 Code</strong>, its new open-source agentic model that is optimized for long-horizon software engineering tasks.</p><p>It is a 1T-parameter MoE model with 32B active parameters and uses <a href="https://www.intoai.pub/p/multi-head-latent-attention-is-the?utm_source=publication-search">MLA attention</a> and the MoonViT vision encoder.</p><p>It improves over K2.6 on multiple coding and agent benchmarks, has better instruction-following capabilities in long contexts, uses about 30% fewer thinking tokens, and supports a 256K context window.</p><p>Alongside this, its performance is close to that of GPT-5.5 and Claude Opus 4.8 on many benchmarks while it is roughly 5-7x cheaper to run.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AbPl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AbPl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 424w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 848w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 1272w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AbPl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png" width="1280" height="720" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:720,&quot;width&quot;:1280,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Benchmark comparison of Kimi K2.7 Code, Kimi K2.6, GPT-5.5, and Claude Opus 4.8 across six coding and agentic benchmarks&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Benchmark comparison of Kimi K2.7 Code, Kimi K2.6, GPT-5.5, and Claude Opus 4.8 across six coding and agentic benchmarks" title="Benchmark comparison of Kimi K2.7 Code, Kimi K2.6, GPT-5.5, and Claude Opus 4.8 across six coding and agentic benchmarks" srcset="https://substackcdn.com/image/fetch/$s_!AbPl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 424w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 848w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 1272w, https://substackcdn.com/image/fetch/$s_!AbPl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62b46fbb-ee77-419b-bfd9-e75302dd996f_1280x720.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://www.kimi.com/resources/kimi-k2-7-code">using this link</a>.</p><div><hr></div><h3>3. VibeThinker-3B</h3><p>This research paper from Weibo introduces <strong>VibeThinker-3B, </strong>a 3B-parameter dense reasoning model that achieves performance comparable to that of large frontier models on math and coding tasks. </p><p>The model is post-trained using Qwen2.5-Coder-3B as the base model, using several techniques such as:</p><ul><li><p><strong>Curriculum-based two-stage SFT:</strong> Training first on broad reasoning/dialogue data, then progressively harder long-horizon math/code/STEM examples </p></li><li><p><strong>Multi-domain RLVR using MGPO:</strong> Training across math, coding, and STEM examples using the MGPO algorithm. MaxEnt-Guided Policy Optimization (MGPO) helps the model explore different reasoning paths near its current capability, then amplifies the paths that produce verifiably correct answers.</p></li><li><p><strong>Offline Self-Distillation:</strong> Collecting the best reasoning traces from the model&#8217;s own RL-specialized checkpoints and distilling them back into a single 3B model </p></li><li><p><strong>Instruct RL:</strong> Training the model to follow user instructions and stick to output formats and user constraints reliably without losing reasoning gains</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gzqI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gzqI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 424w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 848w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 1272w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gzqI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png" width="1456" height="485" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:485,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:356201,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gzqI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 424w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 848w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 1272w, https://substackcdn.com/image/fetch/$s_!gzqI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F13ca5176-9e4b-4578-8b6a-409b6d4d6528_2720x906.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The model scores 94.3% on AIME26 (97.1% with Claim-level Reliability), 80.2% Pass@1 on LiveCodeBench v6, and achieves a 96.1% acceptance rate on recent unseen LeetCode contests.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dyj4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dyj4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 424w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 848w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dyj4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png" width="1456" height="789" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:789,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:421730,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!dyj4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 424w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 848w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!dyj4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc5f91c7-7864-48dc-90cb-e4f74a443b5e_2132x1156.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.16140">using this link</a>.</p><div><hr></div><h3>2. GLM-5.2</h3><p>Z.ai introduced <strong>GLM-5.2</strong>, its open MIT-licensed flagship model for long-horizon coding and agent tasks.</p><p>The model:</p><ul><li><p>Has a 1M-token context window</p></li><li><p>Has stronger coding capabilities with multiple thinking effort levels to balance performance and latency</p></li><li><p>Uses <a href="https://arxiv.org/abs/2603.12201">IndexShare</a><span>, which reuses the same indexer across every four sparse attention layers, reducing per-token FLOPs by 2.9&#215; at a 1M context length. </span></p></li><li><p>MTP layer is also better suited for speculative decoding, increasing the acceptance length by up to 20%.</p></li></ul><p>GLM-5.2 is the strongest open model on several coding and agentic benchmarks, with performance close to frontier models on long-horizon tasks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!q4pt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!q4pt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 424w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 848w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 1272w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!q4pt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png" width="1456" height="944" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:944,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;img_v3_0212n_dd3e6c79-bb10-4959-9080-56eb8525b92g&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="img_v3_0212n_dd3e6c79-bb10-4959-9080-56eb8525b92g" title="img_v3_0212n_dd3e6c79-bb10-4959-9080-56eb8525b92g" srcset="https://substackcdn.com/image/fetch/$s_!q4pt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 424w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 848w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 1272w, https://substackcdn.com/image/fetch/$s_!q4pt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0babc4ae-5e66-4742-b8d0-c4c59f0721a2_6006x3894.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JYJP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JYJP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 424w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 848w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 1272w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JYJP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png" width="1456" height="961" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:961,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;img_v3_0212o_51684a16-c33f-4429-aea5-9f5f7cdfc30g&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="img_v3_0212o_51684a16-c33f-4429-aea5-9f5f7cdfc30g" title="img_v3_0212o_51684a16-c33f-4429-aea5-9f5f7cdfc30g" srcset="https://substackcdn.com/image/fetch/$s_!JYJP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 424w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 848w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 1272w, https://substackcdn.com/image/fetch/$s_!JYJP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F64f6e814-19dc-4d48-8079-6c2ed35b46d8_4239x2799.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://z.ai/blog/glm-5.2">using this link</a>.</p><div><hr></div><h3><strong>1. Towards autonomous medical artificial intelligence agents</strong></h3><p>This research paper introduces <strong>MIRA (Medical Intelligence for Reasoning and Action)</strong>, an autonomous AI agent capable of operating within a sandboxed EHR (Electronic Health Record) environment.</p><p>It can:</p><ul><li><p>Take histories</p></li><li><p>Order and interpret laboratory, imaging, and microbiology tests</p></li><li><p>Generate differential diagnoses</p></li><li><p>Formulate treatment plans, including prescribing medications, scheduling surgical procedures, and planning admissions. </p></li></ul><p>On 574 real MIMIC-IV cases across 8 diseases, MIRA achieved 88.9% diagnostic accuracy and, in head-to-head testing, outperformed board-certified physicians (87.8% vs 78.1% accuracy), while showing strong medication safety and alignment with medical guidelines.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Qr2X!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Qr2X!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 424w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 848w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 1272w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Qr2X!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png" width="1254" height="1368" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1368,&quot;width&quot;:1254,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:951356,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/202885851?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Qr2X!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 424w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 848w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 1272w, https://substackcdn.com/image/fetch/$s_!Qr2X!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1f6cd9f3-4776-44f3-899c-0ad51d5e1280_1254x1368.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.nature.com/articles/s41586-026-10675-5.pdf">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read. Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-14-20-june?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-14-20-june?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p>Join the <strong>paid tier today</strong> to get access to all posts in this newsletter:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#129489;&#127995;&#8205;&#128187; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/tiny-recursive-model">Tiny Recursive Model (TRM): A Deep Dive</a></p></li><li><p>&#128119;&#127996;&#8205;&#9794;&#65039; <a href="https://www.intoai.pub/p/build-a-vector-database-from-scratch">Build A Vector Database From Scratch To Understand RAG In Depth</a></p></li><li><p>&#128640; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) Layer from Scratch</a></p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[A hardware-level tour of how LLMs generate text]]></title><description><![CDATA[Understand in depth how LLM Inference actually works at the CPU and GPU level.]]></description><link>https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference</link><guid isPermaLink="false">https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 22 Jun 2026 23:51:37 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!4DwK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p><strong>&#10024; Today&#8217;s newsletter edition is sponsored by <a href="https://www.backplanes.com/">Backplanes</a>.&#10024;</strong></p><p>Your agent just ran for an hour. It changed multiple files, called multiple tools, followed leads, hit dead ends, made decisions, and maybe touched something you wish it hadn&#8217;t. Most of this is invisible or too big for you to go through.</p><p><strong><a href="https://www.backplanes.com/">Spotlight by Backplanes</a></strong> turns your Claude Code and Codex sessions into valuable reports, so you can understand the agent run without digging through logs.</p><p>Spotlight is <strong>free</strong> for individual developers and the teams they work with (no credit card required). They also remove sensitive info, encrypt your data, use providers that do not store it, never sell it, and delete it when you delete your sessions, projects, or account.</p><p><em>Btw, I personally used it and, embarrassingly, found out that Claude Code was reading my API keys during my coding sessions.</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9ShS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9ShS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 424w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 848w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9ShS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg" width="1362" height="1014" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1014,&quot;width&quot;:1362,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9ShS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 424w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 848w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!9ShS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F164cab30-0648-4865-b5a6-74d774114d52_1362x1014.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.backplanes.com/&quot;,&quot;text&quot;:&quot;Try Spotlight today &#8594;&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.backplanes.com/"><span>Try Spotlight today &#8594;</span></a></p><div><hr></div><p>LLM Inference is the process of running a forward pass through a trained model to produce text. Understanding what happens at inference time at the CPU/GPU level will help you optimize this process more effectively. Here is a lesson where we discuss exactly this.</p><div><hr></div><h3>The process starts with loading parameters into the CPU memory</h3><p>To begin with, a trained model&#8217;s parameters are stored in the hard drive (preferably an <a href="https://www.ibm.com/think/topics/ssd-vs-nvme">NVMe SSD</a>) and can have different formats such as:</p><ul><li><p><a href="https://huggingface.co/docs/safetensors/index">Safetensors</a></p></li><li><p><a href="https://docs.pytorch.org/tutorials/beginner/saving_loading_models.html">bin or pt</a> when working with PyTorch</p></li><li><p><a href="https://huggingface.co/docs/hub/en/gguf">GGUF</a></p></li></ul><p>These come alongside a <code>config.json</code> file that tells about the model architecture, hyperparameters, and data type.</p><p>(Check out the <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash/blob/main/config.json">config.json</a> and <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash/tree/main">model parameter files</a> for the DeepSeek-V4-Flash model to understand this better.)</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2l3O!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2l3O!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 424w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 848w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 1272w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2l3O!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png" width="1262" height="1102" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1102,&quot;width&quot;:1262,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:331963,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2l3O!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 424w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 848w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 1272w, https://substackcdn.com/image/fetch/$s_!2l3O!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6ce4d0a-7cc4-4a9a-89b2-b28bf5d2e1b8_1262x1102.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Model parameters for DeepSeek-V4-Flash in the safetensor format (<a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash/tree/main">Source</a>)</figcaption></figure></div><p>A model loader uses this file to build the model skeleton and then loads the parameters into the CPU memory (also called System memory). <span>This memory&nbsp;</span><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">is typically&nbsp;</span><a href="https://en.wikipedia.org/wiki/Dynamic_random-access_memory">Dynamic random-access memory&nbsp;<span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">(DRAM)</span></a><span>,</span><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">&nbsp;which offers larger capacity and is cheaper than the GPU memory.</span></p><p><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">The model parameters are next transferred to the GPU's HBM (High Bandwidth Memory), also known as global memory (or, generally, VRAM). This transfer takes place over&nbsp;</span><a href="https://en.wikipedia.org/wiki/PCI_Express"><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">PCIe</span></a><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">,&nbsp;a high-speed connection between the CPU and GPU. </span></p><p><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">Seen </span><code>tensor.to('cuda')</code> method while <a href="https://www.intoai.pub/i/182314468/preparing-to-train-our-model">training models</a>? This is what happens under the hood when you call this method.</p><p>HBM is a specialized type of DRAM designed for massive parallel data throughput. While CPU memory has a throughput of<span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);"> 50 to a few hundred GB/s, HBM can deliver a throughput of a few TB/s. Although fast, HBM is smaller and much more expensive than CPU memory.</span></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eviC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eviC!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 424w, https://substackcdn.com/image/fetch/$s_!eviC!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 848w, https://substackcdn.com/image/fetch/$s_!eviC!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 1272w, https://substackcdn.com/image/fetch/$s_!eviC!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eviC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png" width="728" height="123" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:246,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:48621,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eviC!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 424w, https://substackcdn.com/image/fetch/$s_!eviC!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 848w, https://substackcdn.com/image/fetch/$s_!eviC!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 1272w, https://substackcdn.com/image/fetch/$s_!eviC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4769da7e-dead-41ab-a66b-b9684550da23_2318x392.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">Flow of model parameters from SSD to GPU HBM</figcaption></figure></div><div><hr></div><h3>What if the LLM is too big for the GPU memory?</h3><p>Let&#8217;s go back to the <a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Flash/blob/main/config.json">config.json</a> file for DeepSeek-V4-Flash, which is a <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Mixture-of-Experts model</a> with 284 billion total parameters.</p><p>If each parameter of this model is loaded in <a href="https://grokipedia.com/page/Minifloat">FP8 (8-bit floating-point) precision</a>, which means that 1 parameter is represented in 8 bits or 1 byte, the total memory requirement of this model for parameter storage will be:</p><blockquote><p>284B parameters &#215; 1 byte/ parameter = 284 GB</p></blockquote><p>But this is not the only thing that the GPU HBM needs to store. We also need space for <a href="https://en.wikipedia.org/wiki/Transformer_(deep_learning)#KV_caching">KV cache</a>, activations, and other overheads.</p><p>The popularly used <a href="https://www.nvidia.com/en-gb/data-center/h100/">NVIDIA H100 GPU</a> comes with 80 GB of HBM. This is nowhere near enough to store 284 GB of parameters, let alone the KV cache and others. This means that the parameters must be distributed across multiple GPUs.</p><p>A standard architecture for hosting a model is an <span>8-GPU </span><a href="https://developer.nvidia.com/blog/introducing-nvidia-hgx-h100-an-accelerated-server-platform-for-ai-and-high-performance-computing/"><span>NVIDIA HGX H100</span></a><span> server. This server has 8 H100 GPUs, each with 80 GB of HBM, which sums to a total of 640 GB of memory.</span></p><p>The GPUs in the server are linked <span>using an </span><a href="https://docs.nvidia.com/ai-enterprise/release-8/latest/infra-software/vgpu/features/nvswitch.html"><span>NVSwitch</span></a><span>, which provides each GPU with a full-bandwidth path to every other GPU, enabling them to transfer data fast enough (900GB/s) to behave as</span> a single large accelerator.</p><p>The term &#8220;full-bandwidth&#8221; is important here because if no NVSwitch is used with NVLinks, the bandwidth is split between GPU pairs. In our 8-GPU setup, any single GPU-to-GPU pair would get only about 128 GB/s of bandwidth, compared to 900 GB/s (full bandwidth) with NVSwitch.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DtWp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DtWp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 424w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 848w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1272w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png" width="1456" height="465" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:465,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:112022,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DtWp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 424w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 848w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1272w, https://substackcdn.com/image/fetch/$s_!DtWp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F201fb07d-a2f4-427e-ba4f-f5b1c5d2a575_2286x730.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://developer.nvidia.com/blog/introducing-nvidia-hgx-h100-an-accelerated-server-platform-for-ai-and-high-performance-computing/">NVIDIA HGX H100</a> server with GPUs connected using NVSwitch/ NVLink</figcaption></figure></div><p>There are many techniques that are used to distribute the inference load across these GPUs. Some techniques split the model itself, while others split the input workload. </p><p>These are described as follows:</p><ul><li><p><strong><span>Tensor parallelism (TP):</span></strong><span>&nbsp;Splitting parameter/ weight matrices across GPUs, with each GPU performing part of the matrix multiplication and then syncing the results.</span></p></li><li><p><strong>Pipeline parallelism (PP):</strong> Dividing the model&#8217;s layers across GPUs, with each GPU handling a consecutive block of layers and passing the activations to the next.</p></li><li><p><strong>Context parallelism (CP):</strong> Splitting a single sequence and its KV cache across GPUs, with each GPU managing a part of the context to support very long sequence lengths.</p></li><li><p><strong><span>Expert parallelism (EP):</span></strong><span>&nbsp;Distributing the experts of a&nbsp;</span><a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch"><span>Mixture-of-Experts (MoE) model</span></a><span>&nbsp;across GPUs and routing the tokens to the GPU that holds their chosen experts.</span></p></li><li><p><strong>Data parallelism (DP):</strong> Replicating the entire model on each GPU when it is small enough to fit, or replicating a fully sharded model across multiple GPU groups when the model is too large for a single GPU. Each replica then handles different user requests.</p></li><li><p><strong>Hybrid parallelism:</strong> Combining several of these techniques discussed above. For example, TP, PP, CP, and EP are combined to create a complete sharded model replica. DP is then added by creating multiple such replicas, each serving different user requests in parallel.</p></li></ul><p>If you&#8217;re completely new to these techniques, the following lesson will help.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;8fe83905-5f6d-4241-a162-f782cc1f7faa&quot;,&quot;caption&quot;:&quot;Understanding distributed setups for LLM training and inference is one of the biggest advantages that you can have as an engineer today. This is what we will work towards in this lesson by studying how Meta&#8217;s Llama 3 models were trained in a distributed setting.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Distributed Training of Llama, Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-06-05T11:27:54.327Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!k_v2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/distributed-training-of-llama-explained&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:200488145,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:14,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>What's next after loading parameters into GPU memory?</h3><p>Once the parameters are loaded into GPU HBM, they are ready to process incoming user requests.</p><p>Let&#8217;s talk a bit about the main components of a GPU like the H100. These are: </p><ul><li><p>Streaming Multiprocessors (SMs)</p></li><li><p>High-bandwidth memory (HBM)</p></li><li><p>On-chip memory</p></li></ul><p>Streaming Multiprocessors, or SMs, are the processing units where calculations happen in a GPU. They contain hundreds of smaller components called:</p><ul><li><p>CUDA cores: that perform fast general mathematical operations</p></li><li><p>Tensor cores: that perform fast matrix operations (matrix multiplications)</p></li></ul><p>The on-chip memory is <span>etched directly on the&nbsp;</span><a href="https://en.wikipedia.org/wiki/Die_(integrated_circuit)"><span>GPU die</span></a><span>, unlike HBM, which is mounted alongside</span> the die. This memory further consists of (arranged in ascending order of speed and descending order of capacity):</p><ul><li><p>L2 memory/cache</p></li><li><p>L1 or shared memory/cache (shared across components of an SM)</p></li><li><p>Registers</p></li></ul><p><span>On-chip memory&nbsp;components </span><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">are&nbsp;</span><a href="https://en.wikipedia.org/wiki/Static_random-access_memory"><span>Static random-access memory&nbsp;</span><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">(SRAM)</span></a><span>,</span><span data-color="rgb(61, 59, 73)" style="color: rgb(61, 59, 73);">&nbsp;which is extremely fast but much smaller than other types of DRAM (CPU memory and GPU HBM) we discussed earlier.</span></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mIsv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mIsv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 424w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 848w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1272w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png" width="1456" height="659" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:659,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:108763,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mIsv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 424w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 848w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1272w, https://substackcdn.com/image/fetch/$s_!mIsv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a7d8ea-3216-47fa-a7f3-1fd86378e780_1984x898.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A simplified architectural overview of a GPU</figcaption></figure></div><p>For every user request/prompt, the model performs forward passes through its parameters in two phases: <strong>Prefill</strong> and <strong>Decode</strong>.</p><p>In each forward pass, every layer&#8217;s parameters are streamed from HBM to on-chip memory and then to the CUDA/Tensor cores, where the calculations actually occur. The weights are discarded once a token is produced, and this process repeats until the full sequence is generated or the maximum token-generation limit is reached.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!T5JU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!T5JU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 424w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 848w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 1272w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!T5JU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png" width="1456" height="415" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:415,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:77616,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!T5JU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 424w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 848w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 1272w, https://substackcdn.com/image/fetch/$s_!T5JU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff4a2f01f-b5d6-4e21-b8ee-f94efcdbae7a_2252x642.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Flow of model parameters in the GPU</figcaption></figure></div><p>If you want to better understand what calculations occur at the Transformer level, we have covered this in the previous lessons that you can find here:</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;10c3c926-0638-44ea-80b8-4eea2239c6ae&quot;,&quot;caption&quot;:&quot;In the previous lesson on &#8216;Into AI&#8217;, we learned how to implement the Causal Multi-Head Self-Attention.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build a Decoder-only Transformer from Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2025-12-18T14:24:35.561Z&quot;,&quot;cover_image&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1c6156df-dbda-43ed-8a45-69ab67b23092_6912x3072.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-a-decoder-only-transformer&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:181774222,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:19,&quot;comment_count&quot;:2,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;adb9956e-b5ef-4871-b0b1-1a708e0e00af&quot;,&quot;caption&quot;:&quot;Into AI thrives thanks to the support of paid subscribers. If you want to access exclusive analysis, in-depth guides, and help this work continue, consider becoming a paid member today. Your support truly makes a difference!&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Top 4 Decoding Strategies In LLMs Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2025-10-17T12:04:56.382Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!6KOw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe731fdea-885b-463b-8bce-5d32ac1d5ef0_2400x1067.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/decoding-strategies-in-llms&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:176405190,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:5,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>What is Prefill and Decode?</h3><p><strong>Prefill</strong> is the first phase where all tokens in a user&#8217;s prompt are processed together in a single forward pass through the LLM.</p><p>During this phase, each token computes a key (K) and value (V) vector, which are cached to build a <strong>KV cache</strong> for the entire initial user prompt. This KV cache is stored alongside the model parameters and intermediate activations in the HBM.</p><p>Prefill relies heavily on the GPU's tensor cores and is <strong>compute-bound</strong>. This is because all tokens of the initial user prompt are being processed in parallel.</p><p>A faster prefill means a shorter Time to First Token (TTFT), which is the delay before the model begins responding to the user's prompt.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BlFY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BlFY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 424w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 848w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 1272w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BlFY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png" width="1456" height="456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:84304,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BlFY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 424w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 848w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 1272w, https://substackcdn.com/image/fetch/$s_!BlFY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef657480-9c1a-4b08-ae6a-1cee501c0fe8_2076x650.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Prefill (with KV caching not shown in the illustration)</figcaption></figure></div><p>Next comes <strong>Decode</strong>, the second phase, where the LLM generates one token at a time, with a single forward pass through the model for each token.</p><p>Instead of recomputing the key (K) and value (V) vectors for every previous token at each step, the model reuses these from the KV cache and only computes them for the new token. The newly computed K and V values are added to the KV cache for use in the next step of Decode.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LNhV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LNhV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 424w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 848w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 1272w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LNhV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png" width="1456" height="1231" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1231,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:131575,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!LNhV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 424w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 848w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 1272w, https://substackcdn.com/image/fetch/$s_!LNhV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44fd7774-d53d-4ff6-b668-532bc09bb672_1464x1238.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Decode (with KV caching not shown in the illustration)</figcaption></figure></div><p><strong>Decode</strong> is <strong>memory-bound </strong>and<strong> </strong>does not utilize the full capacity of the GPU&#8217;s Tensor cores. This is because, for every generated token, all model parameters must be streamed from HBM to the Tensor cores for a small amount of computation, discarded when the token is generated, and then re-read in full for the next token.</p><p>In this process, the calculations performed per token relative to the volume of weights moved are so small that the cores spend most of their time waiting on memory rather than computing.</p><div><hr></div><h3>Why is Decode memory-bound?</h3><p>Let&#8217;s understand this better using an example. </p><p>Let&#8217;s say that we are using a 30B Dense model in 16-bit precision on a single H100 (with 80 GB of HBM). This is 60 GB of parameters (30B &#215; 2 bytes).</p><p>To produce one token, all 60 GB are streamed from HBM to the Tensor cores. At the higher end of the <a href="https://www.colfax-intl.com/nvidia/nvidia-h100">H100&#8217;s memory bandwidth</a> of 3.35 TB/s, it takes about 18 ms to generate one token. This gives a throughput of around 56 tokens per second.</p><p>Now let&#8217;s compare this with the computation involved. </p><p>A forward pass costs roughly 2 FLOPs per parameter per token (one multiply and one add operation), so the computational cost of processing a token through a 30B dense model is 60 GFLOPs (2 FLOPs &#215; 30B).</p><p>The H100's Tensor cores can perform 990 TFLOP/s of FP16 operations. This means that 60 GFLOPs of operations will take around 0.06 ms (60 / 990,000). </p><p>Taken together, the computation takes roughly 0.06 ms, while streaming the 60 GB of weights takes 18 ms.</p><p>The cores only compute for 0.3% of the time required for the memory transfer. For the rest, they sit idle, waiting for the next set of parameters to arrive, making Decode memory-bound.</p><div class="pullquote"><p>Over the past 20 years, peak server hardware FLOPS has scaled by a factor of 3 every 2 years, outpacing the growth of DRAM and interconnect bandwidth, which have scaled by factors of 1.6&#215; and 1.4&#215; every 2 years, respectively. This has made memory (and not compute) the primary bottleneck in LLM inference. (<a href="https://arxiv.org/pdf/2403.14123">Source</a>)</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zHk2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zHk2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 424w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 848w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 1272w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zHk2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png" width="1456" height="590" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:590,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:311951,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zHk2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 424w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 848w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 1272w, https://substackcdn.com/image/fetch/$s_!zHk2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcac7af18-0cf7-4943-8c3a-59d659ca6355_2252x912.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div></div><div><hr></div><h3>Why is the KV Cache so important for Decode? </h3><p>The KV cache ensures that the keys (K) and values (V) for past tokens are computed once and stored, so each Decode step computes only one new token's K and V and reads the rest from cache. </p><p>This slows the increase in the cost of generating a single token as the sequence length grows, making Decode faster and more efficient.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4DwK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4DwK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 424w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 848w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 1272w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4DwK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png" width="728" height="297.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:595,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:142404,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201900247?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4DwK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 424w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 848w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 1272w, https://substackcdn.com/image/fetch/$s_!4DwK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8aa99349-6dd1-4c14-8dd6-b765d23ded36_2696x1102.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>You must remember that using a KV cache is not always a &#8220;free lunch&#8221; since it also takes space in the HBM. As its size grows (with increasing context length and the number of batched requests), it could consume as much memory as the parameters themselves. Managing its size is something that one needs to keep in mind when serving LLMs.</p><div><hr></div><h3>TL;DR</h3><p>To summarise:</p><ul><li><p>During inference, model parameters are moved from SSD to CPU memory, GPU HBM, on-chip memory, and finally to the computation cores.</p></li><li><p>Models that are too large for a single GPU are split across multiple GPUs using multiple parallelism techniques. </p></li><li><p>The multi-GPU server uses NVSwitch to connect the GPUs, resulting in full communication bandwidth between them.</p></li><li><p>Each inference request is processed in two phases: Prefill and Decode.</p></li><li><p>Prefill processes all prompt tokens at once. It is a compute-bound process, and the Time to First Token (TTFT) depends on it.</p></li><li><p>Decode generates one token at a time. It is memory-bound and acts as the bottleneck of inference. This is because each token generation involves moving a large number of parameters while performing very little computation, leaving the GPU's computation cores mostly idle while waiting on memory.</p></li><li><p>The KV cache helps keep token generation costs from growing rapidly as sequence length increases.</p></li><li><p>KV cache also consumes HBM alongside model parameters and must be managed for efficient LLM serving.</p></li></ul><div><hr></div><p><strong>&#10024; </strong>Courtesy of <strong><a href="https://www.backplanes.com/">Backplanes</a>, </strong>this newsletter edition is completely free to read. <strong>&#10024;</strong></p><p>Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/a-hardware-level-tour-of-llm-inference?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (7-13 June 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-7-13-june</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-7-13-june</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 15 Jun 2026 09:14:04 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Srj9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Srj9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Srj9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Srj9!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2211611,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Srj9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!Srj9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ad8741e-91a7-4a6c-8938-7965360ed317_1672x941.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>10. DiffusionGemma</h3><p>Google released <strong>DiffusionGemma</strong>, an experimental open model that uses <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion</a> to generate entire blocks of text simultaneously, leading to 4x faster text generation on dedicated GPUs.</p><p>DiffusionGemma is a 26B <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts">Mixture-of-Experts (MoE) model</a> that activates only 3.8 billion parameters during inference. It uses bidirectional attention to&nbsp;generate 256 tokens in parallel, with each forward pass allowing every token to attend to all others.</p><p>The model comes with native support for NVIDIA&#8217;s new <a href="https://developer.nvidia.com/blog/introducing-nvfp4-for-efficient-and-accurate-low-precision-inference/">NVFP4 (4-bit floating-point) format</a> on Blackwell GPUs, which dramatically increases compute throughput, enabling it to run at faster speeds with near-lossless accuracy.</p><p>The model&#8217;s impressive capabilities make it particularly helpful for speed-critical local workflows such as inline editing, rapid iteration, code infilling, non-linear text structures, amino acid sequences, mathematical graphs, and tasks like Sudoku.</p><p>However, it must also be noted that the standard Gemma 4 still produces higher-quality outputs than DiffusionGemma.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DtnG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DtnG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 424w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 848w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 1272w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DtnG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin" width="1000" height="562" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:562,&quot;width&quot;:1000,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;DiffusionGemma Benchmark&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="DiffusionGemma Benchmark" title="DiffusionGemma Benchmark" srcset="https://substackcdn.com/image/fetch/$s_!DtnG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 424w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 848w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 1272w, https://substackcdn.com/image/fetch/$s_!DtnG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4bca72-5447-4c6d-93ab-183b30e52bab_1000x562.bin 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://blog.google/innovation-and-ai/technology/developers-tools/diffusion-gemma-faster-text-generation/">Source</a></figcaption></figure></div><p>Read more about this release <a href="https://blog.google/innovation-and-ai/technology/developers-tools/diffusion-gemma-faster-text-generation/">using this link</a>.</p><p>If you&#8217;re new to Diffusion LLMs, you can read more about them using the following lessons.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;c6b52666-5b50-48ee-b1f8-86b2be5ea89a&quot;,&quot;caption&quot;:&quot;LLM-based chatbots are all around us. They reply by producing their responses sequentially. This means that they generate their output token by token, one at a time.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Diffusion LLMs, Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-04-23T18:19:46.246Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!4OWT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8461e51e-f9d6-485f-ac77-552a448ec9e7_1922x948.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/diffusion-llms-explained-simply&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:194697013,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:12,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;893729d5-0356-4a6f-870e-073cda267580&quot;,&quot;caption&quot;:&quot;In the previous lessons on &#8216;Into AI&#8217;, we learned how to build and train an LLM from scratch.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build and train a Diffusion LLM from scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-06-14T10:32:11.502Z&quot;,&quot;cover_image&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/95122e3b-af92-47a5-99b3-c861decf844a_1446x1330.webp&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-and-train-a-diffusion-llm&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:194059235,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:9,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xBa1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fad0f7ec6-837c-4c2b-9b4d-5365d1a9e668_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>9. Frontier Code</h3><p>Cognition introduced <strong>FrontierCode</strong>, a benchmark to test whether AI coding agents can produce production-quality, mergeable code rather than just passing tests. </p><p>Built with over 20 open-source maintainers across 36 major repositories, it includes 150 tasks divided into three tiers: Extended, Main, and the hardest Diamond tier. </p><p>These tasks are graded on correctness, regression safety, test quality, scope control, style, and codebase conventions. Its evaluation combines unit tests, rubrics, command checks, LLM review, and Cognition&#8217;s &#8220;mutagent&#8221; (an LLM-based tool to surgically patch the test environment/ application code and align with the agent&#8217;s implementation details), all with strong quality control.</p><p>Cognition claims that FrontierCode has an 81% lower false-positive rate than SWE-Bench Pro and that it is the first-ever benchmark measuring code quality and subtle human preferences.</p><p>Claude Opus 4.8 leads the benchmark with a score of 13.4%, GPT-5.5 scores 6.3% while using fewer tokens, and Kimi K2.6 is the top open-source model with a 3.8% score, all on the Diamond tier.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!w6Aw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!w6Aw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 424w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 848w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 1272w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!w6Aw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png" width="1456" height="801" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:801,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:233500,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!w6Aw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 424w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 848w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 1272w, https://substackcdn.com/image/fetch/$s_!w6Aw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9cf61af7-d86f-48a0-a516-8134e0539f2b_2638x1452.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://cognition.ai/blog/frontier-code">using this link</a>.</p><div><hr></div><p>Before we move forward, I want to introduce you to my book called &#8216;<strong>LLMs In 100 Images</strong>&#8217;.</p><p>It is a collection of 100 easy-to-follow visuals that describe the most important concepts you need to master LLMs today.</p><p><strong><a href="https://bamaniaashish.gumroad.com/l/llmbook/EARLYBIRD">Grab your copy today at a special discount using this link.</a></strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_ysS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" width="1456" height="728" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:728,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>8. Claude Fable 5 &amp; Mythos 5</h3><p>Anthropic released <strong>Claude Fable 5</strong>, a public <a href="https://www.anthropic.com/claude/mythos">Mythos-class model</a>, and <strong>Claude Mythos 5</strong>, the same underlying model with safeguards lifted for trusted cyberdefenders and infrastructure providers.</p><p>This model outperforms all of Anthropic&#8217;s previous models on nearly all tested benchmarks and is especially strong on long and complex tasks. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hPLG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hPLG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 424w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 848w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 1272w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hPLG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp" width="1456" height="1607" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1607,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Benchmark table showing Claude Fable and Mythos compared to other leading models&quot;,&quot;title&quot;:&quot;Benchmark table showing Claude Fable and Mythos compared to other leading models&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Benchmark table showing Claude Fable and Mythos compared to other leading models" title="Benchmark table showing Claude Fable and Mythos compared to other leading models" srcset="https://substackcdn.com/image/fetch/$s_!hPLG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 424w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 848w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 1272w, https://substackcdn.com/image/fetch/$s_!hPLG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4b4241a2-7568-4081-b229-13f73a287685_2600x2870.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>It also has major cybersecurity and scientific research capabilities, including faster drug-design workflows and the generation of novel biological hypotheses.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!cYZ_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!cYZ_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 424w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 848w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 1272w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!cYZ_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/542691be-1967-4cba-98b3-59e858073422_1920x1080.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!cYZ_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 424w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 848w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 1272w, https://substackcdn.com/image/fetch/$s_!cYZ_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F542691be-1967-4cba-98b3-59e858073422_1920x1080.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Protein complexes designed by Mythos 5. Targets include immune checkpoints, growth factor and receptor signaling, neurodegeneration, muscle disease, and more complex structural targets.</figcaption></figure></div><p>The publicly released Fable 5 includes safeguards for cybersecurity, biology/chemistry, and distillation-related requests and routes these to the previous model, Claude Opus 4.8.</p><p>As of 12th June 2026, access to Claude Fable 5 and Claude Mythos 5 has been <a href="https://www.anthropic.com/news/fable-mythos-access">suspended</a> due to national security concerns raised by the US government.</p><p>Read more about this release <a href="https://www.anthropic.com/news/claude-fable-5-mythos-5">using this link</a>.</p><div><hr></div><h3>7. FlashMemory-DeepSeek-V4</h3><p>This research paper introduces <strong>Lookahead Sparse Attention (LSA)</strong>, which makes ultra-long-context LLM inference more memory-efficient.</p><p>Instead of passively attending to all previous tokens, LSA proactively predicts which parts of the context it'll need in the future and keeps only those KV chunks in GPU memory. </p><p>It uses a small "Neural Memory Indexer" built on the DeepSeek-V4 architecture to decide which parts of the context are worth keeping.</p><p>Across long-context benchmarks (LongBench-v2, LongMemEval, and RULER), this uses just 13.5% of the memory on average while maintaining the same accuracy.<br><br>And at 500K-token lengths, it reduces memory overhead by more than 90% without compromising the model's reasoning ability.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mbFJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mbFJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 424w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 848w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 1272w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mbFJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png" width="1456" height="468" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:468,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:303066,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mbFJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 424w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 848w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 1272w, https://substackcdn.com/image/fetch/$s_!mbFJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0840aba9-6e2d-463f-9960-3a2fbe5f4ec0_2806x902.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.09079">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>6. <strong>Latent Spatial Memory for Video World Models</strong></h3><p>This research paper introduces <strong>Mirage</strong>, a latent-space spatial memory framework for making generated videos more 3D-consistent over long camera trajectories.</p><p>Rather than storing scene memory as an RGB point cloud that requires repeated rendering and re-encoding, Mirage stores static scene information as 3D latent tokens within the diffusion model&#8217;s latent space. </p><p>It builds this memory by lifting latent tokens into 3D with depth-guided back-projection, reads it through direct latent-space warping, and updates it chunk by chunk while filtering dynamic objects.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mAJb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mAJb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 424w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 848w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 1272w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mAJb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png" width="1456" height="937" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:937,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1052312,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mAJb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 424w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 848w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 1272w, https://substackcdn.com/image/fetch/$s_!mAJb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F61d4766c-5976-4b1e-8679-d2267e14622d_1908x1228.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Experiments show that Mirage achieves up to 10.57&#215; faster end-to-end video generation and a 55&#215; reduction in memory footprint relative to explicit 3D baselines. It also attains SOTA performance on <a href="https://github.com/haoyi-duan/WorldScore">WorldScore</a> and strong reconstruction quality on <a href="https://google.github.io/realestate10k/">RealEstate10K</a>.</p><p>Read more about this research <a href="https://arxiv.org/pdf/2606.09828">using this link</a>.</p><div><hr></div><h3>5. MiniMax Sparse Attention (MSA)</h3><p>This research paper introduces <strong>MiniMax Sparse Attention (MSA)</strong>, a blockwise sparse attention built upon <a href="https://www.intoai.pub/p/grouped-query-attention">Grouped Query Attention (GQA)</a>.</p><p>Traditional attention has a cost that grows quadratically with the number of tokens it attends to. This becomes far too expensive for LLMs with ultra-long contexts.<br><br>MSA uses a lightweight &#8220;Index Branch&#8221; to score KV blocks and select a Top-k subset for each query group, while a &#8220;Main Branch&#8221; performs exact sparse attention only over those selected blocks.</p><p>On a <a href="https://www.minimax.io/blog/minimax-m3">109B-parameter model with native multimodal training</a>, MSA performs on par with GQA while reducing per-token attention compute by 28.4&#215; at 1M context. With an efficient kernel, it also achieves 14.2&#215; prefill and 7.6&#215; decoding speedups on the H800 GPU.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!UA9r!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!UA9r!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 424w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 848w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 1272w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!UA9r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png" width="1456" height="884" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/246657f4-6523-4343-b656-b242783a1324_2170x1318.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:884,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:373923,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!UA9r!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 424w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 848w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 1272w, https://substackcdn.com/image/fetch/$s_!UA9r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F246657f4-6523-4343-b656-b242783a1324_2170x1318.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2606.13392v2">using this link</a>.</p><div><hr></div><h3>4. First Steps Toward Automated AI Research</h3><p>This blog post from Recursive introduces their automated AI research system, which achieves SOTA results across three benchmarks (fixed-budget language model training, small-model training speed, and GPU kernel optimization).</p><p>The system runs as an end-to-end research loop that includes proposing ideas, implementing them, running experiments, validating results, and using what it learns to choose future experiments.</p><p>On NanoChat Autoresearch, it improved fixed-budget language model training from 0.9372 to 0.9109 validation BPB.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!c8Dr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!c8Dr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!c8Dr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg" width="1456" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;NanoChat Autoresearch: final validation BPB by solution&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="NanoChat Autoresearch: final validation BPB by solution" title="NanoChat Autoresearch: final validation BPB by solution" srcset="https://substackcdn.com/image/fetch/$s_!c8Dr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!c8Dr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ae38d8a-d9c8-4fc4-b233-b555e9759fcf_1229x691.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>On NanoGPT Speedrun, it reduced training time from 79.7s to 77.5s.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rbn8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rbn8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rbn8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg" width="1456" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;NanoChat Autoresearch: training loss over wall-clock time&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="NanoChat Autoresearch: training loss over wall-clock time" title="NanoChat Autoresearch: training loss over wall-clock time" srcset="https://substackcdn.com/image/fetch/$s_!rbn8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!rbn8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbe261c1-1467-47fe-8dbf-e2f84bd65201_1229x691.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>And on SOL-ExecBench, it raised the mean GPU-kernel optimization score from 0.699 to 0.754, which is an 18% reduction in the gap to the estimated hardware optimum.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!aWx6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!aWx6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!aWx6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg" width="1456" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;SOL-ExecBench: mean SOL score by kernel category&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="SOL-ExecBench: mean SOL score by kernel category" title="SOL-ExecBench: mean SOL score by kernel category" srcset="https://substackcdn.com/image/fetch/$s_!aWx6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 424w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 848w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 1272w, https://substackcdn.com/image/fetch/$s_!aWx6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31d71f59-d7fe-4a85-a53a-6bc94efcf788_1229x691.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.recursive.com/articles/first-steps-toward-automated-ai-research">using this link</a>.</p><div><hr></div><h3>3. End-to-End Context Compression at Scale</h3><p>This research paper introduces <strong>Latent Context Language Models (LCLMs)</strong>, a family of encoder-decoder compressors that improve the efficiency of long-context LLM inference compared to using full KV caches.</p><p>LCLMs compress long token sequences into shorter latent embeddings, which the decoder can use directly. This improves the trade-off among task performance, compression speed, and peak memory usage while avoiding common limitations of KV compression (quality loss, high compression costs, constraints on target context windows, and poor compatibility with production inference systems).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gKeY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gKeY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 424w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 848w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 1272w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gKeY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png" width="1456" height="826" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:826,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:419392,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gKeY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 424w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 848w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 1272w, https://substackcdn.com/image/fetch/$s_!gKeY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a55536b-676a-48b5-9560-14d5952a7153_2288x1298.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.09659">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>2. <strong>General-purpose large language models outperform specialized clinical AI tools on medical benchmarks</strong></h3><p>This research paper evaluates two clinical AI tools, OpenEvidence and UpToDate Expert AI, against three frontier LLMs (GPT-5.2, Gemini 3.1 Pro, and Claude Opus 4.6) across:</p><ol><li><p>500 MedQA questions testing medical knowledge</p></li><li><p>500 HealthBench items measuring alignment with clinicians</p></li><li><p>Real clinical queries (RCQ) benchmark, built from 100 de-identified queries from physicians to a general-purpose language model in a live clinical environment</p></li></ol><p>The results show that frontier LLMs outperform clinical AI tools in all three evaluations. </p><p>On the RCQ benchmark, Clinical AI tools perform comparably to the auto-enabled Google Search AI Overview.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lRbF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lRbF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 424w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 848w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 1272w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lRbF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png" width="1456" height="637" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ebe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:637,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:571778,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lRbF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 424w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 848w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 1272w, https://substackcdn.com/image/fetch/$s_!lRbF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febe7bb47-88c8-4722-8144-7c9f82ee451b_2770x1212.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://www.nature.com/articles/s41591-026-04431-5">using this link</a>.</p><div><hr></div><h3>1. Self-Harness: Harnesses That Improve Themselves</h3><p>This research paper introduces <strong>Self-Harness</strong>, which lets LLM-based agents improve their own operating harness without relying on human engineers or stronger external agents.</p><p>Self-Harness works iteratively in three stages:</p><ol><li><p>It first checks the execution traces and finds which model-specific patterns led to failure (Weakness Mining)</p></li><li><p>It then generates harness modifications tied to these failures (Harness Proposal)</p></li><li><p>Finally, it accepts the modifications after successful regression testing (Proposal Validation)</p></li></ol><p>When tested on Terminal-Bench-2.0 using three models (MiniMax M2.5, Qwen3.5-35B-A3B, and GLM-5) initialized with a minimal harness, Self-Harness consistently improves held-out pass rates by 21.4%, 14.3%, and 14.2%, respectively.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j6yT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j6yT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 424w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 848w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 1272w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j6yT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png" width="1456" height="1134" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1134,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:423012,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201787804?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j6yT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 424w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 848w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 1272w, https://substackcdn.com/image/fetch/$s_!j6yT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ce20d60-6d7b-4ad1-9432-31f0d13d5083_1734x1350.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.09498">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read.</p><p>If you found it valuable, click the like button &#10084;&#65039; and consider subscribing for more such content every week.</p><p>If you have any questions or suggestions, feel free to leave a comment below.</p><p><strong>Into AI is a reader-supported newsletter. Gain access to deeper, members-only content by becoming a paid subscriber today.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[Build and train a Diffusion LLM from scratch]]></title><description><![CDATA[An end-to-end guide to training a LLaDA-style Diffusion LLM and using it to generate text.]]></description><link>https://www.intoai.pub/p/build-and-train-a-diffusion-llm</link><guid isPermaLink="false">https://www.intoai.pub/p/build-and-train-a-diffusion-llm</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sun, 14 Jun 2026 10:32:11 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/95122e3b-af92-47a5-99b3-c861decf844a_1446x1330.webp" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>In the previous lessons on &#8216;Into AI&#8217;, we learned how to build and train an LLM from scratch. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;210dde25-b62d-43de-8a76-a7297118abfe&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build and train an LLM from scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2025-12-31T11:47:16.904Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!MRfV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851366c5-6f74-479a-a06a-41e63fc79f6c_2480x1074.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-and-train-an-llm-from-scratch&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:182314468,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:24,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>We then deepened our understanding by building and training a&nbsp;Mixture-of-Experts (MoE) LLM from scratch.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;1cca3d05-8235-4f8b-8cf6-d7ea6550d642&quot;,&quot;caption&quot;:&quot;Most modern-day LLMs use the Mixture of Experts (MoE) architecture. This includes Grok-1, DeepSeekMoE, gpt-oss, and Mixtral (and many other proprietary LLMs whose architectural details aren&#8217;t publicly available).&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build and Train a Mixture-of-Experts (MoE) LLM from Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-03-20T11:51:20.529Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Jr5y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-and-train-a-mixture-of-experts&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:190610837,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:11,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Both these models were trained using the <strong>next-token prediction objective</strong> and generate tokens one at a time, left to right, autoregressively.</p><p>But this is not the only way that a model can be used to generate text. </p><p>We have Diffusion LLMs that can generate tokens in parallel using a process called <a href="https://www.intoai.pub/i/194697013/how-to-apply-diffusion-to-text">Diffusion</a>. One of the most successful examples of this type of LLM is <strong><a href="https://arxiv.org/abs/2502.09992">LLaDA (Large Language Diffusion with mAsking)</a></strong>, which we discussed in depth in the following lesson.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;f1b733ed-1502-4d1c-89aa-91d569b7191b&quot;,&quot;caption&quot;:&quot;LLM-based chatbots are all around us. They reply by producing their responses sequentially. This means that they generate their output token by token, one at a time.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Diffusion LLMs, Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-04-23T18:19:46.246Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!4OWT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8461e51e-f9d6-485f-ac77-552a448ec9e7_1922x948.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/diffusion-llms-explained-simply&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:194697013,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:11,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Google also released its experimental open-source model, <strong><a href="https://huggingface.co/google/diffusiongemma-26B-A4B-it">DiffusionGemma</a></strong>, this week, which operates on the same principle.</p><p>In this lesson, we will take our understanding to the next level and learn to:</p><ul><li><p>Implement a 13-million-parameter Diffusion LLM from scratch</p></li><li><p>Train it on a publicly available pre-training dataset using a free GPU</p></li><li><p>Generate text using it</p></li></ul><p>Let&#8217;s begin!</p><div><hr></div><h3>Setting up the environment</h3><p>We will code in PyTorch, use the Hugging Face <code>datasets</code> library for the training dataset, and <code>transformers</code> library to obtain the tokenizer.</p><p>The code is meant to run on Google Colaboratory and uses the free NVIDIA T4 GPU to train our model.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;5bb19bf8-f224-46d6-a3f2-b512b90d7c2b&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Install packages
!uv pip install torch datasets transformers tqdm</code></pre></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;bcdbebbd-25f7-4c0f-911a-488eb7c4815e&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># PyTorch core imports
import torch
import torch.nn as nn
import torch.nn.functional as F

# For numerical operations
import math

# For data processing
from torch.utils.data import DataLoader, Dataset

# Tokenizer
from transformers import AutoTokenizer

# Optimizer
import torch.optim as optim

# For mixed-precision training 
from torch import amp
from torch.nn.utils import clip_grad_norm_

# To visualise progress bar
from tqdm import tqdm

# Hide deprecation warnings
import warnings
warnings.filterwarnings('ignore')

# Set the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")</code></pre></div><div><hr></div><h3>Setting up the tokenizer</h3><p>Instead of building the <a href="https://www.intoai.pub/p/build-an-llm-tokenizer?utm_source=publication-search">tokenizer from scratch</a>, we will use the <a href="https://en.wikipedia.org/wiki/Byte-pair_encoding">BPE tokenizer</a> for GPT-2. This gives us a 50,257-token vocabulary made up of subwords.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;02d926d5-dc17-4c17-b947-cdfda4f8ba2a&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">tokenizer = AutoTokenizer.from_pretrained("gpt2")

print(f"Original vocabulary size: {tokenizer.vocab_size}")
# Original vocabulary size: 50257</code></pre></div><p>The end-of-sequence (EOS) token is at the last index in the vocabulary.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;f2748219-bf0f-4200-aaab-b349b931d92b&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">print(tokenizer.eos_token)
# &lt;|endoftext|&gt;

EOS_ID = tokenizer.eos_token_id 

print(EOS_ID)
# 50256</code></pre></div><p>To train a diffusion model, we need a special <code>&lt;MASK&gt;</code> token. This token acts as a placeholder for the model to identify the input positions it should fill. We simply append it as a new ID at the end of the vocabulary.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;0ca6bcf3-c03e-4c43-aa5e-44ea605c7628&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">MASK_ID = tokenizer.vocab_size

print(MASK_ID)
# 50257

# Increase the vocabulary size by 1 for the newly added &lt;MASK&gt; token
VOCAB_SIZE = tokenizer.vocab_size + 1 

print(VOCAB_SIZE)
# 50258</code></pre></div><p>Next, we create two helper functions to encode and decode text using the tokenizer.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;1772c8c6-06f2-4ce6-9744-c6dae5f20d17&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Convert input text into a list of token IDs using the tokenizer
def encode(text): 
    return tokenizer.encode(text, add_special_tokens=False)

# Remove any MASK_ID tokens from the sequence and convert the list of token IDs back into readable text
def decode(ids):
    ids = [i for i in ids if i != MASK_ID]
    return tokenizer.decode(ids, skip_special_tokens=True)</code></pre></div><div><hr></div><h3>Getting our data ready</h3><p>We will train our diffusion LLM on the <a href="https://huggingface.co/datasets/roneneldan/TinyStories">TinyStories dataset</a>. It is a synthetic dataset of short stories that contains the vocabulary used by a 3-year-old, generated by GPT-3.5 and GPT-4.</p><p>We will use a subset of this dataset that is small enough to train our model on a free-tier GPU, yet rich enough for it to learn semantic details. It is downloaded as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;3a3b473f-f8d7-4aaa-83e5-8a3b3e0f05b1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from datasets import load_dataset

# Download the 5000 stories from the 'train' split of TinyStories
dataset = load_dataset("roneneldan/TinyStories", split="train[:5000]")</code></pre></div><p>Each row in the dataset is one complete story. Check out an example of one of them.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;87a34476-6a84-410a-9591-369fd123466c&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">print(f"Example Story: \n\n{dataset[0]["text"]}")

"""
Example Story: 

One day, a little girl named Lily found a needle in her room. She knew it was difficult to play with it because it was sharp. Lily wanted to share the needle with her mom, so she could sew a button on her shirt.
Lily went to her mom and said, "Mom, I found this needle. Can you share it with me and sew my shirt?" Her mom smiled and said, "Yes, Lily, we can share the needle and fix your shirt."
Together, they shared the needle and sewed the button on Lily's shirt. It was not difficult for them because they were sharing and helping each other. After they finished, Lily thanked her mom for sharing the needle and fixing her shirt. They both felt happy because they had shared and worked together.
"""</code></pre></div><p>We pre-process this dataset of stories by:</p><ul><li><p>Tokenizing each story (converting sub-words into token IDs)</p></li><li><p>Joining them into a single list of token IDs</p></li><li><p>Inserting an <code>EOS</code> token between stories to help the model learn the boundaries between different stories</p></li></ul><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;65e45883-abee-46c9-b0ee-741065944af5&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">def clean_and_tokenize(dataset):
    token_ids = []

    for text in dataset["text"]:
        # Remove leading/trailing whitespace from each story
        story = text.strip()

        # Skip empty entries
        if not story:
            continue

        # Encode the story into token IDs and append to the sequence
        token_ids.extend(encode(story))

        # Add an EOS token to separate stories
        token_ids.append(EOS_ID)

    return token_ids</code></pre></div><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;9ddeb71a-14d5-4d42-b47e-5f9783d9b484&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">print("Preprocessing dataset...")

token_ids = clean_and_tokenize(dataset)

print(f"Total training tokens: {len(token_ids):,}")
# Total training tokens: 1,033,087</code></pre></div><p>If you&#8217;ve previously trained an autoregressive LLM from scratch, you must be familiar with the standard approach of shifting the input sequence by one token and using it as the target during training. </p><p><a href="https://www.intoai.pub/i/194697013/forward-masking-process-model-training">Diffusion LLMs</a> <strong>aren&#8217;t</strong> trained this way. </p><p>Instead of next-token prediction, they use fixed-length, probabilistically masked sequences, and the model is trained to predict the original clean input from these masked versions.</p>
      <p>
          <a href="https://www.intoai.pub/p/build-and-train-a-diffusion-llm">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Latent Mixture-of-Experts (Latent MoE), Clearly Explained]]></title><description><![CDATA[A lesson on NVIDIA's Latent Mixture-of-Experts (MoE) architecture that powers the Nemotron-3 Super and Ultra models.]]></description><link>https://www.intoai.pub/p/latent-mixture-of-experts</link><guid isPermaLink="false">https://www.intoai.pub/p/latent-mixture-of-experts</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 12 Jun 2026 15:21:10 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!eVeU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb80381f3-fb1d-4fc8-8114-93f96148defd_2428x1248.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Latent Mixture-of-Experts is increasingly becoming the preferred architecture for powerful LLMs.</p><p>It was adapted in NVIDIA&#8217;s <a href="https://arxiv.org/pdf/2512.20856">Nemotron-3 Super and Ultra models</a>, and now Microsoft has built its first in-house reasoning model, called <a href="https://microsoft.ai/pdf/mai-thinking-1.pdf">MAI-Thinking-1</a>, using it. All of these models have reported meaningful gains in accuracy without sacrificing inference throughput or latency using this architecture.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TTfe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TTfe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 424w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 848w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 1272w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TTfe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png" width="1456" height="1041" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1041,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:169432,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201275982?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TTfe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 424w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 848w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 1272w, https://substackcdn.com/image/fetch/$s_!TTfe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0b3d4be-336e-44b3-ba8f-a000e44c9e36_1564x1118.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture of Microsoft&#8217;s MAI-Base-1, where the blocks labeled &#8216;Sparse MoE&#8217; represent a Latent MoE layer in which 8 of 512 experts are activated per token in a compressed latent space. (<a href="https://microsoft.ai/pdf/mai-thinking-1.pdf">Source</a>)</figcaption></figure></div><p>Latent Mixture-of-Experts is an improvement over the popular <strong><a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Mixture-of-Experts (MoE) architecture</a>. </strong> Let&#8217;s build our foundations by understanding the MoE architecture in depth before we learn about Latent MoE.</p><div><hr></div><h3>What is the MoE <strong>architecture?</strong></h3><p>LLMs based on the Mixture-of-Experts (MoE) architecture<strong> </strong>contain multiple small feed-forward networks (called&nbsp;<strong>Experts</strong>) instead of a conventional large feed-forward network in their Transformers. These experts handle different tokens by using another network, called a&nbsp;<strong>Router</strong>, that selects which expert to use for each token. </p><p>MoE enables LLMs to scale their parameter count while keeping the compute cost, or the number of <a href="https://en.wikipedia.org/wiki/Floating_point_operations_per_second">Floating-point Operations (FLOPs)</a> per token, fixed. This is because each token is not processed by all experts in the model as in conventional dense LLMs.</p><p>In an MoE LLM with &#8216;N&#8217; experts, the router directs each token towards only the top-K selected experts, so the active parameter count (which determines FLOPs) stays fixed while the total parameter count of the model (which stores knowledge) can grow enormously.</p><p>A great example of an MoE LLM is <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">DeepSeek-V4-Pro</a>, which has 1.6 trillion parameters, but only 49 billion are activated per token during inference.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j1yn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j1yn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 424w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 848w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1272w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png" width="728" height="379.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:759,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j1yn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 424w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 848w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1272w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture of the MoE transformer with 8 experts and a router that selects 2 experts for each token (Top-K = 2) (<a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Source</a>)</figcaption></figure></div><div><hr></div><h3>What works best for Mixture-of-Experts models</h3><h4>1. Memory bandwidth is the real bottleneck for small batches</h4><p>Consider the following <a href="https://en.wikipedia.org/wiki/Roofline_model">Roofline plot</a>, which shows the maximum performance a workload can achieve as a function of its arithmetic intensity (how much computation is performed per byte of data moved in memory). This plot tells where the workload is limited by memory bandwidth or by compute.</p><p>It is built using <a href="https://huggingface.co/Qwen/Qwen3-235B-A22B">Qwen3-235B-A22B</a> served on NVIDIA GB200 GPUs connected over <a href="https://en.wikipedia.org/wiki/NVLink">NVLink</a>. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!idNd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!idNd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 424w, https://substackcdn.com/image/fetch/$s_!idNd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 848w, https://substackcdn.com/image/fetch/$s_!idNd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 1272w, https://substackcdn.com/image/fetch/$s_!idNd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!idNd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png" width="1456" height="856" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:856,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:161787,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201275982?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!idNd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 424w, https://substackcdn.com/image/fetch/$s_!idNd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 848w, https://substackcdn.com/image/fetch/$s_!idNd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 1272w, https://substackcdn.com/image/fetch/$s_!idNd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5910ccc0-9508-4915-bc86-2a18e26de698_1994x1172.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>When served in a <strong>low-latency setting</strong>, such as responding to a small number of requests, often from just one user at a time, the real bottleneck for MoE LLM inference is <strong>memory bandwidth</strong> rather than compute.</p><p>This means that when only a few tokens are being processed by the MoE model, the GPU spends almost all its time loading model parameters from memory rather than performing computations. The GPU compute units sit idle most of the time, waiting for parameters to be loaded.</p>
      <p>
          <a href="https://www.intoai.pub/p/latent-mixture-of-experts">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[I'm not writing another cringe "Fable 5" post]]></title><description><![CDATA[And instead, teaching other "uncool" and useful concepts that actually matter.]]></description><link>https://www.intoai.pub/p/fable</link><guid isPermaLink="false">https://www.intoai.pub/p/fable</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Thu, 11 Jun 2026 14:42:14 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!8Yke!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8Yke!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8Yke!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 424w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 848w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 1272w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8Yke!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png" width="1456" height="817" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:817,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1278111,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/201603241?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8Yke!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 424w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 848w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 1272w, https://substackcdn.com/image/fetch/$s_!8Yke!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed6f4afd-8409-44d5-ae97-5137441556a1_1650x926.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Anthropic released Fable 5 a few days ago, and everyone is going crazy over it.</p><p><strong>Except me. </strong></p><p>I have an unpopular opinion about this release. </p><p>Reading its <a href="https://www-cdn.anthropic.com/d00db56fa754a1b115b6dd7cb2e3c342ee809620.pdf">system card</a> taught me <strong>nothing</strong> about AI engineering or development.</p><p>There are some interesting observational findings that might scare or excite you depending on your perspective (I feel neutral about them), but they make no real difference to your knowledge or skills.</p><p>Here is what the &#8220;Training data and process section&#8221; says:</p><blockquote><p><em>Mythos 5 and Fable 5 were trained on a proprietary mix of publicly available information from the internet, public and private datasets, and synthetic data generated by other models. </em></p><p><em>Throughout the training process, we used several data cleaning and filtering methods, including deduplication and classification.</em></p><p><em>&#8230;</em></p><p><em>After the pretraining process, the model underwent substantial post-training and fine-tuning, with the goal of making it an assistant whose behavior aligns with the values described in Claude&#8217;s constitution.</em></p><p><em>Claude is multilingual and will typically respond in the same language as the user&#8217;s input. Output quality varies by language. The model outputs text only.</em></p></blockquote><p>This is the most generic text I&#8217;ve ever seen around model training. The team basically reveals nothing about how the model was developed. </p><p>You could read the following instead to learn about LLMs are trained.</p><div class="embedded-post-wrap" data-attrs="{&quot;id&quot;:201470936,&quot;url&quot;:&quot;https://blog.algomaster.io/p/how-llms-are-actually-trained&quot;,&quot;publication_id&quot;:2202268,&quot;publication_name&quot;:&quot;AlgoMaster Newsletter&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Byzl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F662bfc37-956c-4898-b9ee-62676ecfafa1_256x256.png&quot;,&quot;title&quot;:&quot;How LLMs are Actually Trained&quot;,&quot;truncated_body_text&quot;:&quot;In the last lesson, we learned how the Transformer architecture powers an LLM. In this one, we will discuss how LLMs are trained from scratch to become helpful assistants.&quot;,&quot;date&quot;:&quot;2026-06-11T04:50:19.518Z&quot;,&quot;like_count&quot;:34,&quot;comment_count&quot;:1,&quot;bylines&quot;:[{&quot;id&quot;:83602743,&quot;name&quot;:&quot;Ashish Pratap Singh&quot;,&quot;handle&quot;:&quot;ashishps&quot;,&quot;previous_name&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bec4e97e-29d0-4080-b529-db1f5fc4d1d2_1536x1536.jpeg&quot;,&quot;bio&quot;:&quot;Software Engineer | Prev @ Amazon. Write about Coding, System Design, Interviews and Software Engineering. LinkedIn: 225k+, Youtube: 225k+, GitHub: 65k+ stars&quot;,&quot;profile_set_up_at&quot;:&quot;2023-09-06T15:43:22.327Z&quot;,&quot;reader_installed_at&quot;:&quot;2024-02-25T09:01:41.030Z&quot;,&quot;publicationUsers&quot;:[{&quot;id&quot;:2217309,&quot;user_id&quot;:83602743,&quot;publication_id&quot;:2202268,&quot;role&quot;:&quot;admin&quot;,&quot;public&quot;:true,&quot;is_primary&quot;:true,&quot;publication&quot;:{&quot;id&quot;:2202268,&quot;name&quot;:&quot;AlgoMaster Newsletter&quot;,&quot;subdomain&quot;:&quot;algomaster&quot;,&quot;custom_domain&quot;:&quot;blog.algomaster.io&quot;,&quot;custom_domain_optional&quot;:false,&quot;hero_text&quot;:&quot;Master Coding and System Design Interviews. Level up your Software Engineering career. Subscribe and get a FREE System Design Interview Handbook in your inbox.&quot;,&quot;logo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/662bfc37-956c-4898-b9ee-62676ecfafa1_256x256.png&quot;,&quot;author_id&quot;:83602743,&quot;primary_user_id&quot;:83602743,&quot;theme_var_background_pop&quot;:&quot;#9A6600&quot;,&quot;created_at&quot;:&quot;2023-12-25T16:53:36.646Z&quot;,&quot;email_from_name&quot;:&quot;Ashish Pratap Singh&quot;,&quot;copyright&quot;:&quot;Ashish Pratap Singh&quot;,&quot;founding_plan_name&quot;:&quot;Lifetime Access&quot;,&quot;community_enabled&quot;:true,&quot;invite_only&quot;:false,&quot;payments_state&quot;:&quot;enabled&quot;,&quot;language&quot;:null,&quot;explicit&quot;:false,&quot;homepage_type&quot;:&quot;newspaper&quot;,&quot;is_personal_mode&quot;:false,&quot;logo_url_wide&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/433be119-d483-45ad-a105-5927579ec14a_1352x264.png&quot;}},{&quot;id&quot;:2799315,&quot;user_id&quot;:83602743,&quot;publication_id&quot;:2757289,&quot;role&quot;:&quot;admin&quot;,&quot;public&quot;:true,&quot;is_primary&quot;:false,&quot;publication&quot;:{&quot;id&quot;:2757289,&quot;name&quot;:&quot;AI Minded&quot;,&quot;subdomain&quot;:&quot;aiminded&quot;,&quot;custom_domain&quot;:&quot;blog.aiminded.io&quot;,&quot;custom_domain_optional&quot;:false,&quot;hero_text&quot;:&quot;AI Minded is a weekly newsletter that breaks down AI concepts, tools, and trends &#8212; in a way that&#8217;s simple, practical, and easy to follow.&quot;,&quot;logo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b7bf6a50-0504-4bf7-abd6-2871f65aed79_1024x1024.png&quot;,&quot;author_id&quot;:83602743,&quot;primary_user_id&quot;:null,&quot;theme_var_background_pop&quot;:&quot;#FF5CD7&quot;,&quot;created_at&quot;:&quot;2024-07-03T06:52:47.515Z&quot;,&quot;email_from_name&quot;:&quot;Ashish from AI Minded&quot;,&quot;copyright&quot;:&quot;Ashish Pratap Singh&quot;,&quot;founding_plan_name&quot;:null,&quot;community_enabled&quot;:true,&quot;invite_only&quot;:false,&quot;payments_state&quot;:&quot;disabled&quot;,&quot;language&quot;:null,&quot;explicit&quot;:false,&quot;homepage_type&quot;:&quot;newspaper&quot;,&quot;is_personal_mode&quot;:false,&quot;logo_url_wide&quot;:null}}],&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100,&quot;status&quot;:{&quot;bestsellerTier&quot;:100,&quot;subscriberTier&quot;:null,&quot;leaderboard&quot;:null,&quot;vip&quot;:false,&quot;badge&quot;:{&quot;type&quot;:&quot;bestseller&quot;,&quot;tier&quot;:100},&quot;subscriber&quot;:null}},{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;handle&quot;:&quot;drashishbamania&quot;,&quot;previous_name&quot;:&quot;Ashish Bamania&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;profile_set_up_at&quot;:&quot;2023-07-05T19:46:05.031Z&quot;,&quot;reader_installed_at&quot;:&quot;2023-07-05T21:22:43.807Z&quot;,&quot;is_guest&quot;:true,&quot;bestseller_tier&quot;:100,&quot;status&quot;:{&quot;bestsellerTier&quot;:100,&quot;subscriberTier&quot;:1,&quot;leaderboard&quot;:null,&quot;vip&quot;:false,&quot;badge&quot;:{&quot;type&quot;:&quot;bestseller&quot;,&quot;tier&quot;:100},&quot;subscriber&quot;:null},&quot;primaryPublicationId&quot;:1813260,&quot;primaryPublicationName&quot;:&quot;Into AI&quot;,&quot;primaryPublicationUrl&quot;:&quot;https://www.intoai.pub&quot;,&quot;primaryPublicationSubscribeUrl&quot;:&quot;https://www.intoai.pub/subscribe?&quot;}],&quot;utm_campaign&quot;:null,&quot;belowTheFold&quot;:true,&quot;type&quot;:&quot;newsletter&quot;,&quot;language&quot;:&quot;en&quot;,&quot;source&quot;:null}" data-component-name="EmbeddedPostToDOM"><a class="embedded-post" native="true" href="https://blog.algomaster.io/p/how-llms-are-actually-trained?utm_source=substack&amp;utm_campaign=post_embed&amp;utm_medium=web"><div class="embedded-post-header"><img class="embedded-post-publication-logo" src="https://substackcdn.com/image/fetch/$s_!Byzl!,w_56,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F662bfc37-956c-4898-b9ee-62676ecfafa1_256x256.png" loading="lazy"><span class="embedded-post-publication-name">AlgoMaster Newsletter</span></div><div class="embedded-post-title-wrapper"><div class="embedded-post-title">How LLMs are Actually Trained</div></div><div class="embedded-post-body">In the last lesson, we learned how the Transformer architecture powers an LLM. In this one, we will discuss how LLMs are trained from scratch to become helpful assistants&#8230;</div><div class="embedded-post-cta-wrapper"><span class="embedded-post-cta">Read more</span></div><div class="embedded-post-meta">24 days ago &#183; 34 likes &#183; 1 comment &#183; Ashish Pratap Singh and Dr. Ashish Bamania</div></a></div><p>In <a href="https://www.anthropic.com/institute/recursive-self-improvement">another blog post</a>, they vaguely talk about self-improving systems but never share any meaningful algorithmic or systems-level details, <a href="https://www.intoai.pub/i/199462318/3-sia-self-improving-ai-with-harness-and-weight-updates">unlike other recent open-source research papers</a>. </p><p>Instead, they seem more interested in <a href="https://www.reuters.com/business/anthropic-says-ai-labs-need-coordinated-plan-halt-development-if-risks-rise-2026-06-04/">slowing AI progress</a> (except for themselves) and limiting open-source growth.</p><p>So I am deciding not to write about this model and its release, even when I might lose the chance to be viral and gain a few thousand subscribers.</p><div class="pullquote"><p><em>&#8216;Into AI&#8217; is, and will always be, about substance over salesmanship.</em></p></div><p>I would rather write and tell you about something unexciting yet deeply useful than give in to the hype.</p><p><strong>I really appreciate you being here and value your time.</strong></p><p><em>Cheers!</em></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!USc3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!USc3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 424w, https://substackcdn.com/image/fetch/$s_!USc3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 848w, https://substackcdn.com/image/fetch/$s_!USc3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 1272w, https://substackcdn.com/image/fetch/$s_!USc3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!USc3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png" width="1344" height="202" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:202,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!USc3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 424w, https://substackcdn.com/image/fetch/$s_!USc3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 848w, https://substackcdn.com/image/fetch/$s_!USc3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 1272w, https://substackcdn.com/image/fetch/$s_!USc3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3211f577-61ef-4c17-903c-86c2e124ab1d_1344x202.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (1-6 June 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-1-6-june</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-1-6-june</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 08 Jun 2026 11:45:09 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!0pUz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!0pUz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!0pUz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 424w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 848w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!0pUz!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!0pUz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 424w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 848w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!0pUz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F96fca390-1e79-41ff-b1d6-dd2dc3e0008b_1863x1048.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>10. <strong>Dreaming: Better memory for a more helpful ChatGPT</strong></h3><p>OpenAI released its more capable and scalable system for synthesizing memory in ChatGPT, called <strong>Dreaming.</strong></p><p>Dreaming is a method used by ChatGPT to automatically curate memories in the background by referencing chat history.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!f1Kp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!f1Kp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 424w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 848w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 1272w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!f1Kp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png" width="1456" height="784" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:784,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1037102,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!f1Kp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 424w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 848w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 1272w, https://substackcdn.com/image/fetch/$s_!f1Kp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fade2502f-8369-4724-ae9b-56a6dae1e609_2554x1376.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://openai.com/index/chatgpt-memory-dreaming/">Source</a></figcaption></figure></div><p>The newly released Dreaming system:</p><ul><li><p>Retains useful user context more effectively</p></li><li><p>Helps ChatGPT better follow user preferences and constraints</p></li><li><p>Stays up-to-date as situations change</p></li></ul><p>Users can also review their memories through a summary page, where they can see, update, and correct what ChatGPT remembers about them.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rBEc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rBEc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 424w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 848w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 1272w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rBEc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp" width="1456" height="971" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:971,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Memory summary modal showing a personalized overview of a user&#8217;s work, hobbies, travel interests, and community involvement, with options to correct or dismiss specific details.&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Memory summary modal showing a personalized overview of a user&#8217;s work, hobbies, travel interests, and community involvement, with options to correct or dismiss specific details." title="Memory summary modal showing a personalized overview of a user&#8217;s work, hobbies, travel interests, and community involvement, with options to correct or dismiss specific details." srcset="https://substackcdn.com/image/fetch/$s_!rBEc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 424w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 848w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 1272w, https://substackcdn.com/image/fetch/$s_!rBEc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b267159-3246-4e92-9bae-7c3d6cc7e8f7_2640x1760.webp 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://openai.com/index/chatgpt-memory-dreaming/">Source</a></figcaption></figure></div><p>Read more about this release <a href="https://openai.com/index/chatgpt-memory-dreaming/">using this link</a>.</p><div><hr></div><p><strong>Into AI is a reader-supported newsletter. Gain access to deeper, members-only content by becoming a paid subscriber today.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3>9. MAI-Thinking-1: Building a Hill-Climbing Machine</h3><p>Microsoft introduced <strong>MAI-Thinking-1</strong>, a reasoning model built from scratch based on the <a href="https://arxiv.org/pdf/2512.20856">Latent</a> <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts">Mixture-of-Experts</a> architecture with 1T parameters (35B active).</p><p>It is the first model developed using the <strong>Hill-climbing machine</strong>. This is an iterative system for building and improving data pipelines, training infrastructure, RL environments and rewards, evaluation suites, and safety tests, which allows advancing AI while grounding progress around human needs from the ground up.</p><p>MAI-Thinking-1 performs strongly among models of similar size on STEM reasoning and coding tasks (52.8% on SWE-Bench Pro, 97.0% on AIME 2025, and 87.7% on LiveCodeBench v6).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GfCE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GfCE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 424w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 848w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 1272w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GfCE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png" width="1456" height="1105" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1105,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:255197,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!GfCE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 424w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 848w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 1272w, https://substackcdn.com/image/fetch/$s_!GfCE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41463685-857c-4905-8e03-3824906c6ffa_1784x1354.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://microsoft.ai/pdf/mai-thinking-1.pdf">using this link</a>.</p><div><hr></div><h3>8. Benchmark Everything Everywhere All at Once</h3><p>This paper introduces the <strong>Benchmark Agent</strong>, a fully autonomous agentic system for building benchmarks. This system can orchestrate a complete benchmark construction pipeline, from user query analysis and subtask design to data annotation and quality control.</p><p>Evaluations with multiple experiments (human evaluation, LLM-as-a-judge assessment, and consistency checks) show that Benchmark Agent can generate high-quality benchmarks across text, multimodal, audio, image, and domain-specific reasoning tasks with minimal human involvement.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JzkX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JzkX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 424w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 848w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 1272w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JzkX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png" width="1456" height="848" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:848,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1462792,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JzkX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 424w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 848w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 1272w, https://substackcdn.com/image/fetch/$s_!JzkX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F796f004f-6605-4e58-a67e-14fe9ae5cd4b_2264x1318.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.06462">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. Audio Interaction Model</h3><p>This research paper introduces <strong>Audio-Interaction</strong>, a single model that unifies offline audio tasks (such as speech recognition, translation, and audio Q&amp;A) with real-time streaming tasks (such as live voice chat and proactive response).</p><p>The model operates using an always-on&nbsp;<em>&#8216;</em>perceive-decide-respond&#8217; loop, continuously listening to sound, the environment, and instructions, and deciding when to respond based on streaming semantics. The model can manage real-time ASR, streaming audio instruction following, voice dialogue, and proactive help.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-oIu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-oIu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 424w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 848w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 1272w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-oIu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png" width="1456" height="594" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:594,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1388446,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!-oIu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 424w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 848w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 1272w, https://substackcdn.com/image/fetch/$s_!-oIu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F80601aa5-1a69-4982-bf30-aa39fded0ecf_2746x1120.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>To make this Audio Interaction model possible, researchers use <strong>Soundflow</strong>, an end-to-end framework for streaming-native data construction, comprehension-aware training, and asynchronous low-latency inference.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rrEb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rrEb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 424w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 848w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 1272w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rrEb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png" width="1456" height="809" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:809,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:608550,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rrEb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 424w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 848w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 1272w, https://substackcdn.com/image/fetch/$s_!rrEb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3ac6a645-8274-407e-8037-e9928588500d_2132x1184.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Across 8 benchmarks, Audio-Interaction has competitive performance on mainstream audio tasks while offering capabilities previously inaccessible to Large Audio Language Models (LALMs).</p><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.05121">using this link</a>.</p><div><hr></div><h3>6. Reasoning Structure of Large Language Models</h3><p>LLMs are usually evaluated using metrics such as final-answer accuracy or token count, but even with identical scores, these models can have fundamentally different reasoning structures.</p><p>This research paper addresses this problem by introducing a scalable logic-puzzle benchmark and a pipeline that converts unstructured reasoning traces into verifiable reasoning graphs of claims and dependencies, whose topology can be quantitatively analyzed.</p><p>Building on this, a reasoning-efficiency metric is defined to quantify the concentration of the model&#8217;s logical flow. This acts as a practical tool for diagnosing failure modes and comparing how reasoning scales with puzzle difficulty.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4-j2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4-j2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 424w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 848w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 1272w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4-j2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png" width="1456" height="337" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:337,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:267594,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4-j2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 424w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 848w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 1272w, https://substackcdn.com/image/fetch/$s_!4-j2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7eab1f01-55a6-470b-80ad-c0a2f92229a0_2738x634.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.03883">using this link</a>.</p><div><hr></div><h3>5. Do Transformers Need Three Projections?</h3><p>This research paper examines whether the standard Transformer attention really needs separate query, key, and value projections. </p><p>This is done by comparing three types of shared projections:</p><ul><li><p>Q-K=V (Shared key-value)</p></li><li><p>Q=K-V (Shared query-key)</p></li><li><p> Q=K=V (Fully shared single projection)</p></li></ul><p>Experiments across synthetic tasks, vision, and language modeling show that Transformers with shared projections perform on par with, or occasionally better than, the standard Transformer.</p><p>In language modeling, shared key-value projection (Q-K=V) achieves a 50% KV cache reduction with only a 3.1% increase in perplexity.</p><p>These transformers also preserve attention directionality because keys and values can share similar representation spaces and attention operates in a low-rank regime. </p><p>Since projection sharing is complementary to head-sharing (<a href="https://www.intoai.pub/p/grouped-query-attention">GQA</a>/<a href="https://www.intoai.pub/p/multi-query-attention">MQA</a>), combining Q-K=V with&nbsp;GQA-4&nbsp;and MQA leads to cache reductions&nbsp;of 87.5%&nbsp;and&nbsp;96.9%, respectively. This has direct, quantifiable benefits for inference memory, which are especially valuable for edge deployment of LLMs.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8M5R!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8M5R!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 424w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 848w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 1272w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8M5R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png" width="1456" height="397" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:397,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:609591,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8M5R!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 424w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 848w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 1272w, https://substackcdn.com/image/fetch/$s_!8M5R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F768b4898-4edd-4e41-8fff-ffbc653cb465_2872x784.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.04032">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>4. Nemotron 3 Ultra</h3><p>NVIDIA introduced its most capable model yet, called <strong>Nemotron 3 Ultra</strong>. This is a 550B-parameter (55 billion active) Mixture-of-Experts Hybrid Mamba-Attention language model with a 1M-token context window.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FKVr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FKVr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 424w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 848w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 1272w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FKVr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png" width="1456" height="354" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fc51940a-f379-4b90-823a-addceaa091a5_2640x642.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:354,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:171824,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FKVr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 424w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 848w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 1272w, https://substackcdn.com/image/fetch/$s_!FKVr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc51940a-f379-4b90-823a-addceaa091a5_2640x642.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Nemotron 3 Ultra combines multiple key technologies such as:</p><ul><li><p>Latent-MoE architecture </p></li><li><p>Multi Token Prediction (MTP)</p></li><li><p>NVFP4 pre-training</p></li><li><p>Multi-environment RLVR</p></li><li><p><a href="https://www.emergentmind.com/topics/multi-teacher-on-policy-distillation-mopd">Multi-teacher On-Policy Distillation (MOPD)</a> for post-training</p></li><li><p>Reasoning budget control</p></li></ul><p>Nemotron 3 Ultra delivers up to 6&#215; higher inference throughput than SOTA publicly available LLMs while achieving similar accuracy, which makes it ideal for long-running autonomous agentic tasks. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZC9w!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZC9w!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 424w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 848w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 1272w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZC9w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png" width="1456" height="1049" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1049,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:410696,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZC9w!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 424w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 848w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 1272w, https://substackcdn.com/image/fetch/$s_!ZC9w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F493e4e18-37da-4412-a996-abbd0c35d009_1818x1310.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://research.nvidia.com/labs/nemotron/files/NVIDIA-Nemotron-3-Ultra-Technical-Report.pdf">using this link</a>.</p><div><hr></div><h3>3. Cosmos 3</h3><p>This research paper introduces <strong>Cosmos 3</strong>, a family of omnimodal world models by NVIDIA. These models are designed to jointly process and generate language, image, video, audio, and action sequences using a single Mixture-of-Transformers architecture.</p><p>The goal of the model is to serve as a general-purpose backbone for physical AI, combining capabilities that are usually split across vision-language models, video generators, world simulators, and robot policy/action models.</p><p>Cosmos 3 achieves SOTA results across multiple understanding and generation tasks, and the post-trained Cosmos 3 models are ranked as the best open-source Text-to-Image and Image-to-Video models by <a href="https://artificialanalysis.ai/">Artificial Analysis</a> and the best policy model by <a href="https://robo-arena.github.io/">RoboArena</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GUAU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GUAU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 424w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 848w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 1272w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GUAU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png" width="1456" height="661" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:661,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:326568,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!GUAU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 424w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 848w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 1272w, https://substackcdn.com/image/fetch/$s_!GUAU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F33489f99-a731-4d04-ab0a-00250db283d3_2620x1190.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.02800">using this link</a>.</p><div><hr></div><h3>2. Agents&#8217; Last Exam</h3><p>This research paper introduces&nbsp;<strong>Agents&#8217; Last Exam (ALE)</strong>, a benchmark designed to evaluate AI agents on long-horizon, economically valuable, real-world tasks with verifiable outcomes.</p><p>Built by 250+ industry experts, it spans 55 targeted sub-industries and includes 1,500+ tasks covering most major fields of professional work performed on a computer.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RQrB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RQrB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 424w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 848w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 1272w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RQrB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png" width="1456" height="683" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:683,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1860817,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RQrB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 424w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 848w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 1272w, https://substackcdn.com/image/fetch/$s_!RQrB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06c19ef3-fd85-49f6-b1d1-c124d1aec28a_2374x1114.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Results show that today&#8217;s mainstream agentic models are still far from solving the toughest tasks on this benchmark, with an average full pass rate of only 2.6%. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Gawd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Gawd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 424w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 848w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 1272w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Gawd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png" width="1456" height="668" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:668,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:272714,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200990571?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Gawd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 424w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 848w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 1272w, https://substackcdn.com/image/fetch/$s_!Gawd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe645dce1-fe08-4208-8c4b-ddf4987ada90_2882x1322.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Rather than serving as a static benchmark, ALE is intended to grow continuously as new workflows and industries are onboarded, making it a tool for measuring real-world agent capability relevant to GDP impact.</p><p>Read more about this research paper <a href="https://arxiv.org/pdf/2606.05405">using this link</a>.</p><div><hr></div><h3>1. <strong>MiniMax M3</strong></h3><p>This blog post introduces <strong>MiniMax M3</strong>, currently the first and only open-weight model that brings the following three capabilities together:</p><ol><li><p>Frontier-level performance on specialized tasks such as coding and agentic work</p></li><li><p>Ultra-long context windows of up to 1M tokens</p></li><li><p>A natively multimodal model that supports image and video input and can operate on a desktop computer</p></li></ol><p>The model uses <strong>MiniMax Sparse Attention (MSA)</strong>, a sparse attention mechanism that enables long-context processing at a much lower per-token compute and large speedups in prefill and decoding.</p><ul><li><p>MiniMax M3 surpasses GPT-5.5 and Gemini 3.1 Pro and approaches Opus 4.7 on <a href="https://arxiv.org/abs/2509.16941">SWE-Bench Pro</a>.</p></li><li><p>On SVG-Bench, an internal benchmark that comprehensively evaluates SVG generation performance, MiniMax M3 surpasses Opus 4.7. </p></li><li><p>M3 also achieves top scores on <a href="https://arxiv.org/abs/2604.06132">Claw-Eval</a>, an end-to-end evaluation framework for autonomous agents, and on OmniDocBench, a multimodal benchmark.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Zhkp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Zhkp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 424w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 848w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Zhkp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg" width="1456" height="674" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:674,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Zhkp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 424w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 848w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!Zhkp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f0e17c6-a49b-4cd8-b64a-2e21c288439a_2338x1082.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://www.minimax.io/blog/minimax-m3">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read. </p><p>If you found it valuable, click the like button &#10084;&#65039; and consider subscribing for more such content every week. </p><p>If you have any questions or suggestions, feel free to leave a comment below.</p><p><strong>Into AI is a reader-supported newsletter. Gain access to deeper, members-only content by becoming a paid subscriber today.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[Distributed Training of Llama, Explained Simply]]></title><description><![CDATA[A short and simple lesson on the techniques used to train LLMs like Meta Llama on massive GPU clusters.]]></description><link>https://www.intoai.pub/p/distributed-training-of-llama-explained</link><guid isPermaLink="false">https://www.intoai.pub/p/distributed-training-of-llama-explained</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 05 Jun 2026 11:27:54 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!k_v2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!k_v2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!k_v2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 424w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 848w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 1272w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!k_v2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png" width="1456" height="645" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:645,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:334522,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200488145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!k_v2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 424w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 848w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 1272w, https://substackcdn.com/image/fetch/$s_!k_v2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff90de4a8-8269-4b0c-bd3a-7dec14c14305_2216x982.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Understanding distributed setups for LLM training and inference is one of the biggest advantages that you can have as an engineer today. This is what we will work towards in this lesson by studying how <a href="https://arxiv.org/pdf/2407.21783">Meta&#8217;s Llama 3 models</a> were trained in a distributed setting.</p><div class="comment" data-attrs="{&quot;url&quot;:&quot;https://open.substack.com/&quot;,&quot;commentId&quot;:262493695,&quot;comment&quot;:{&quot;id&quot;:262493695,&quot;date&quot;:&quot;2026-05-21T00:03:03.335Z&quot;,&quot;edited_at&quot;:&quot;2026-05-21T00:45:27.948Z&quot;,&quot;body&quot;:&quot;&#128640; You have a great chance of getting hired at a frontier AI company/ lab if you're amazing at Distributed AI/ML. \n\nHere are 5 resources to get started. \n\n\n\n\n\nA Systems View of LLMs on TPUs (JAX / Google scaling book) &#8594; https://jax-ml.github.io/scaling-book/\n\n\n\nThe Ultra-Scale Playbook: Training LLMs on GPU Clusters (Hugging Face) &#8594; https://huggingface.co/spaces/nanotron/ultrascale-playbook\n\n\n\nTraining Deep Learning Models on Multiple GPUs &#8594; https://www.intoai.pub/p/distributed-data-parallel\n\n\n\nDistributed Machine Learning Patterns by Terry Tang &#8594; https://www.manning.com/books/distributed-machine-learning-patterns\n\n\n\nDeep Learning at Scale by Suneeta Mall &#8594; https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/\n\n(Bookmark them and share them with others. &#9851;&#65039;)&quot;,&quot;body_json&quot;:{&quot;attrs&quot;:{&quot;schemaVersion&quot;:&quot;v1&quot;},&quot;type&quot;:&quot;doc&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;&#128640; You have a great chance of getting hired at a frontier AI company/ lab if you're amazing at Distributed AI/ML. &quot;}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Here are 5 resources to get started. &quot;}]},{&quot;attrs&quot;:{&quot;start&quot;:1},&quot;type&quot;:&quot;orderedList&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;A Systems View of LLMs on TPUs (JAX / Google scaling book) &#8594; &quot;},{&quot;type&quot;:&quot;text&quot;,&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;class&quot;:&quot;note-link&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;href&quot;:&quot;https://jax-ml.github.io/scaling-book/&quot;}}],&quot;text&quot;:&quot;https://jax-ml.github.io/scaling-book/&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;The Ultra-Scale Playbook: Training LLMs on GPU Clusters (Hugging Face) &#8594; &quot;},{&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;href&quot;:&quot;https://huggingface.co/spaces/nanotron/ultrascale-playbook&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;class&quot;:&quot;note-link&quot;}}],&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;https://huggingface.co/spaces/nanotron/ultrascale-playbook&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Training Deep Learning Models on Multiple GPUs &#8594; &quot;},{&quot;text&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;type&quot;:&quot;text&quot;,&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;href&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;class&quot;:&quot;note-link&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;}}]}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Distributed Machine Learning Patterns by Terry Tang &#8594; &quot;},{&quot;marks&quot;:[{&quot;attrs&quot;:{&quot;href&quot;:&quot;https://www.manning.com/books/distributed-machine-learning-patterns&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;class&quot;:&quot;note-link&quot;,&quot;target&quot;:&quot;_blank&quot;},&quot;type&quot;:&quot;link&quot;}],&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;https://www.manning.com/books/distributed-machine-learning-patterns&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;text&quot;:&quot;Deep Learning at Scale by Suneeta Mall &#8594; &quot;,&quot;type&quot;:&quot;text&quot;},{&quot;text&quot;:&quot;https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/&quot;,&quot;marks&quot;:[{&quot;attrs&quot;:{&quot;class&quot;:&quot;note-link&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;href&quot;:&quot;https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;},&quot;type&quot;:&quot;link&quot;}],&quot;type&quot;:&quot;text&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;(Bookmark them and share them with others. &#9851;&#65039;)&quot;}]}]},&quot;restacks&quot;:1,&quot;reaction_count&quot;:7,&quot;children_count&quot;:0,&quot;attachments&quot;:[{&quot;id&quot;:&quot;6acafb25-72bf-4118-8ff2-1d00b57d9139&quot;,&quot;type&quot;:&quot;image&quot;,&quot;imageUrl&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/17c3952a-6fc4-48c3-9fad-58d47278fb53_2200x968.png&quot;,&quot;imageWidth&quot;:2200,&quot;imageHeight&quot;:968,&quot;explicit&quot;:false}],&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;user_id&quot;:155457308,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;user_bestseller_tier&quot;:100,&quot;userStatus&quot;:{&quot;bestsellerTier&quot;:100,&quot;subscriberTier&quot;:1,&quot;leaderboard&quot;:null,&quot;vip&quot;:false,&quot;badge&quot;:{&quot;type&quot;:&quot;bestseller&quot;,&quot;tier&quot;:100},&quot;paidPublicationIds&quot;:[1815472,2833541,63213],&quot;subscriber&quot;:null}},&quot;source&quot;:null,&quot;forumChannel&quot;:null}" data-component-name="CommentPlaceholder"></div><p>Let&#8217;s begin!</p><div><hr></div><h3>How big are modern-day GPU clusters?</h3><p>Building an LLM requires thousands of GPUs working together in sync and months of training time. This is because no single GPU has enough memory to store the model and optimizer parameters, nor does it have the compute to train a billion-parameter model on trillions of tokens in any reasonable amount of time.</p><p>The Llama 3-405B model was trained on a cluster of 16,000 GPUs (each with 80GB of <a href="https://www.synopsys.com/glossary/what-is-high-bandwitdth-memory-3.html">HBM3</a>) across multiple servers. Each server contained 8 GPUs and 2 CPUs, with the GPUs connected via&nbsp;<a href="https://www.nvidia.com/en-gb/data-center/nvlink/">NVLink</a>. It took 54 days and <a href="https://github.com/meta-llama/llama-models/blob/main/models/llama3_1/MODEL_CARD.md">30.84M GPU hours</a> to train this model.</p><div><hr></div><h3>How does LLM training occur on GPU clusters?</h3><p>Four different types of parallelism were used to train the Llama 3-405B model. These are combined together, resulting in a technique called <strong>4D parallelism</strong>.</p><p>This technique efficiently distributes computation across multiple GPUs and ensures that the following fit within each GPU&#8217;s HBM (memory):</p><ul><li><p>Model parameters</p></li><li><p>Optimizer states</p></li><li><p>Gradients</p></li><li><p>Activations</p></li></ul><p>The four parallelism techniques used in 4D parallelism are:</p><ol><li><p>Data parallelism (DP)</p></li><li><p>Context parallelism (CP)</p></li><li><p>Tensor parallelism (TP)</p></li><li><p>Pipeline parallelism (PP)</p></li></ol><p>These last two (TP and PP) are also known as&nbsp;<strong>Model parallelism</strong> techniques as they split the model across GPUs.</p><p>Let&#8217;s discuss them one by one.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>1. Data Parallelism (DP)</h4><p>The idea behind <strong><a href="https://www.intoai.pub/p/distributed-data-parallel">Data parallelism (DP)</a></strong> is to replicate the model across multiple GPUs and run the forward and backward passes in parallel on small and different batches of training data on each GPU. This is then followed by synchronizing the gradients between all GPUs.</p><p>If this were a small model being trained, DP would have worked well. But a 405B model can&#8217;t be replicated on each GPU with a mere 80 GB of memory.</p><p>This is why Llama&#8217;s 4D parallelism uses a type of Data parallelism called <strong>FSDP (Fully Sharded Data Parallelism)</strong>, where, <strong>instead of replicating the model</strong> across GPUs (like in DP), FSDP <strong>shards</strong> the model parameters, optimizer states, and gradients across GPUs and temporarily gathers them on demand when they're needed for computation.</p><div><hr></div><h4>2. Context Parallelism (CP)</h4><p>Training LLMs on long sequences (128K+ tokens per sequence) can exceed the memory that is available per GPU. This is because of the <a href="https://www.intoai.pub/p/causal-mha">attention mechanism</a> in LLMs, which scales quadratically with sequence length, as every token attends to every other token in the sequence.</p><p>This bottleneck is solved by Context parallelism (CP), which <strong>divides the input sequence into smaller segments</strong> and splits them across multiple GPUs.</p><p>You can read more about CP </p><div><hr></div><h4>3. Tensor Parallelism (TP)</h4><p>Tensor parallelism (TP) <strong>splits individual weight tensors</strong> along the model&#8217;s hidden dimension into multiple chunks and distributes them across GPUs. </p><p>This means that each GPU stores and computes only a fraction of matrix multiplication operations. This matrix splitting can occur column-wise or row-wise depending on which layer they are a part of in the transformer block. Partial calculations are followed by synchronization to combine the results across all GPUs.</p><div><hr></div><h4>4. Pipeline Parallelism (PP)</h4><p>Pipeline parallelism (PP) <strong>splits the layers</strong> of the LLM into partitions called <strong>stages</strong> (groups of model layers) across multiple GPUs and trains them with small batches of data (<strong>micro-batches</strong>) as in an assembly line/ pipeline. In this way, each GPU only needs to store and process a portion of the model, which significantly reduces the memory requirements per GPU.</p><div><hr></div><h3>How do these come together in 4D parallelism?</h3><p>For training Llama 3-405B, GPUs in a cluster are divided into groups and labeled using a vector as <code>[TP, CP, PP, DP]</code>, where <code>DP</code> is actually FSDP.</p><p>The following example shows a cluster with 16 GPUs, each of which is assigned a unique label.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!17y_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!17y_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 424w, https://substackcdn.com/image/fetch/$s_!17y_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 848w, https://substackcdn.com/image/fetch/$s_!17y_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 1272w, https://substackcdn.com/image/fetch/$s_!17y_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!17y_!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png" width="1200" height="544.7802197802198" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:661,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:242069,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200488145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!17y_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 424w, https://substackcdn.com/image/fetch/$s_!17y_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 848w, https://substackcdn.com/image/fetch/$s_!17y_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 1272w, https://substackcdn.com/image/fetch/$s_!17y_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a0049b5-5233-49a8-b9dd-f2bdbe293d6f_2640x1198.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A GPU&#8217;s position in 4D parallelism is represented as a vector, [D1, D2, D3, D4], where D(i) is the index on the i-th parallelism dimension. In this example, GPU0 [TP0, CP0, PP0, DP0] and GPU1 [TP1, CP0, PP0, DP0] are in the same TP group, GPU0 and GPU2 are in the same CP group, GPU0 and GPU4 are in the same PP group, and GPU0 and GPU8 are in the same DP group. (<a href="https://arxiv.org/abs/2407.21783">Source</a>)</figcaption></figure></div><p>The vector label intentionally arranges the four dimensions of parallelism from the highest to the lowest communication bandwidth each requires (TP &gt; CP &gt; PP &gt; DP/FSDP).</p><p>TP requires a high-bandwidth / low latency connection between GPUs to function well. This is why it is implemented within a single server where <a href="https://www.nvidia.com/en-gb/data-center/nvlink/">NVLink</a> connects the 8 GPUs together.</p><p>On the other hand, PP and FSDP have lower bandwidth requirements and tolerate communication over a <a href="https://en.wikipedia.org/wiki/Multi-hop_routing">multi-hop network</a> with higher latency than NVLink.</p><p>The team used Meta&#8217;s <a href="https://arxiv.org/abs/2510.20171">NCCLX collective communication framework</a> to optimize this 4D parallelized large-scale LLM training.</p><div><hr></div><h3>Are there more dimensions to parallelism?</h3><p>Yes, there are! </p><p>Llama 3 models are dense models. This means it contains fully connected (dense) feed-forward layers in its Transformer architecture.</p><p>But if they were <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Mixture-of-Experts (MoE) models</a>, there&#8217;s another parallelism technique that could be applied. It&#8217;s called <strong>Expert Parallelism (EP)</strong>.</p><p>A MoE model contains multiple small feed-forward networks, called Experts, that handle different tokens, using another network, called a Router, that selects which Expert to use for each token.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j1yn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j1yn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 424w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 848w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1272w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png" width="1456" height="759" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:759,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:161622,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/200488145?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!j1yn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 424w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 848w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1272w, https://substackcdn.com/image/fetch/$s_!j1yn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd2edc797-e726-4aa8-9158-da9dbfef55c0_2192x1142.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture of the MoE transformer with 8 experts and a router (<a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Source</a>)</figcaption></figure></div><p>Because an MoE model can have hundreds of Experts, all of them won't fit on a single GPU. This is the bottleneck that Expert Parallelism (EP) works around. EP spreads the experts across GPUs, so each GPU holds the weights of a subset of experts at a time.</p><p>When a token is processed by the MoE layer and a router selects the expert(s) best suited to it, the token is routed to the GPU containing those experts. These experts perform the computation and send the result back to the token's original GPU.</p><p>Expert Parallelism (EP), when combined with other parallelism techniques, is referred to as <strong>5D parallelism</strong>.</p><div><hr></div><h3>TL;DR</h3><p>To summarise:</p><ul><li><p>Meta used 4D parallelism to train Llama 3 models</p></li><li><p>4D parallelism used in this case consists of:</p><ul><li><p>Fully Sharded Data Parallelism (FSDP)</p></li><li><p>Context Parallelism (CP)</p></li><li><p>Two Model Parallelism techniques called Tensor Parallelism (TP) and Pipeline Parallelism (PP)</p></li></ul></li><li><p>Fully Sharded Data Parallelism (FSDP) shards model parameters, gradients, and optimizer states across GPUs.</p></li><li><p>Context Parallelism (CP) divides the input data along the sequence length dimension to handle very long text inputs.</p></li><li><p>Tensor Parallelism (TP) splits individual weight tensors and activations within a single layer along the model&#8217;s hidden dimension.</p></li><li><p>Pipeline Parallelism (PP) splits the model's layers sequentially into groups called stages.</p></li><li><p><strong>Expert Parallelism (EP)</strong> is used to train <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Mixture-of-Experts (MoE) models</a>. EP splits different experts across GPUs.</p></li><li><p>EP combined with 4D parallelism is referred to as 5D parallelism.</p></li></ul><div><hr></div><h3>Further Reading</h3><ul><li><p><a href="https://arxiv.org/abs/2304.11277">PyTorch FSDP: Experiences on Scaling Fully Sharded Data Parallel</a></p></li><li><p><a href="https://arxiv.org/abs/2310.01889">Ring Attention with Blockwise Transformers for Near-Infinite Context</a></p></li><li><p><a href="https://arxiv.org/abs/1811.06965">GPipe: Efficient Training of Giant Neural Networks using Pipeline Parallelism</a></p></li><li><p><a href="https://arxiv.org/abs/1909.08053">Megatron-LM: Training Multi-Billion Parameter Language Models Using Model Parallelism</a></p></li><li><p><a href="https://arxiv.org/abs/2504.14960">MoE Parallel Folding: Heterogeneous Parallelism Mappings for Efficient Large-Scale MoE Model Training with Megatron Core</a></p></li><li><p><a href="https://arxiv.org/abs/2407.21783">The Llama 3 Herd of Models</a></p></li><li><p>More articles on distributed training in the newsletter:</p></li></ul><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;898be2d9-dfd6-44e2-9d6a-bff6428dd61f&quot;,&quot;caption&quot;:&quot;We have long ago moved away from training production-grade deep learning models on a single machine.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Learn to train deep learning models on multiple GPUs&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-02-26T13:06:02.566Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!YyVB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:189120446,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:2,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;93e2a8a1-af10-44eb-b8ff-14a7565d8c2a&quot;,&quot;caption&quot;:&quot;&#128075;&#127995; Hey there!&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Learn to train a deep learning model on multiple GPUs with Distributed PyTorch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-03-08T11:33:47.617Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!xxxe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/train-a-cnn-with-pytorch-ddp&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:189239741,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><p><strong>Support my writing</strong> and join the <strong>paid tier today</strong> to get access to all posts in this newsletter:</p><ul><li><p>&#128126; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#128104;&#8205;&#128300; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#127752; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#128218; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#128640; <a href="https://www.intoai.pub/p/build-and-train-a-diffusion-llm">Train a Diffusion LLM from scratch</a></p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (24-31 May 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-24-31-may</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-24-31-may</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 01 Jun 2026 11:58:55 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!obta!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!obta!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!obta!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 424w, https://substackcdn.com/image/fetch/$s_!obta!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 848w, https://substackcdn.com/image/fetch/$s_!obta!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!obta!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!obta!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!obta!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 424w, https://substackcdn.com/image/fetch/$s_!obta!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 848w, https://substackcdn.com/image/fetch/$s_!obta!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!obta!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0253d238-1383-4ed8-be2e-c66abbca099a_1863x1048.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>10. Language Models Need Sleep</h3><p>The <a href="https://www.intoai.pub/p/self-attention?utm_source=publication-search">attention mechanism</a> in Transformer-based LLMs scales poorly with increasing context length due to its quadratic complexity.</p><p>This research paper proposes a sleep-like mechanism to address this problem, in which an LLM periodically converts recent context into persistent fast weights before clearing its KV cache.</p><p>These fast weights are implemented as State-space model (SSM) blocks, and during sleep, <code>N</code> offline recurrent passes over the accumulated context update these weights.</p><p>During inference, this shifts extra computation to sleep while preserving the latency of wake-time prediction.</p><p>When tested on synthetic tasks such as cellular automata, multi-hop graph retrieval, and a realistic math reasoning task, both regular transformers and vanilla SSM-attention hybrid models fail. </p><p>In contrast, this is not true for the sleep-augmented SSM-attention hybrid model, where increasing sleep duration (number of recurrent passes <code>N</code>) improves performance, with the largest gains on tasks that require deeper reasoning.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MQIA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MQIA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 424w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 848w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 1272w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MQIA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png" width="1456" height="729" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:729,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:256830,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!MQIA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 424w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 848w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 1272w, https://substackcdn.com/image/fetch/$s_!MQIA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4761ed1f-8c2e-48c9-945d-0561d4a6f55b_2374x1188.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.26099">using this link</a>.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts in this newsletter:</p><ul><li><p>&#128126; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#127752; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#128218; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#128104;&#8205;&#128300; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#9000;&#65039; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li><li><p>&#128640; Train a Diffusion LLM from scratch (out this week)</p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3>9. LocateAnything: Fast and High-Quality Vision-Language Grounding with Parallel Box Decoding</h3><p>This research paper presents <strong>LocateAnything</strong>, a vision-language grounding and detection framework that replaces slow token-by-token coordinate generation with fast Parallel Box Decoding.</p><p>This method treats boxes or points as atomic geometric units, decoding them in one step, and  improves both decoding throughput and localization consistency.</p><p>Researchers also developed a scalable data engine and curated <strong>LocateAnything-Data</strong>, a large-scale dataset with more than 138M training samples, which substantially increases data diversity for high-precision localization. </p><p>Evals show that LocateAnything advances the speed-accuracy frontier, achieving significantly higher decoding throughput while improving high-IoU (Intersection over Union) localization quality across different benchmarks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_jB6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_jB6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 424w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 848w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 1272w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_jB6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png" width="1456" height="989" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:989,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2617051,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_jB6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 424w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 848w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 1272w, https://substackcdn.com/image/fetch/$s_!_jB6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1933395a-a181-4161-b1b6-93fcee57c8ac_2012x1366.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://www.arxiv.org/pdf/2605.27365">using this link</a>.</p><div><hr></div><h3>8. The MiniMax-M2 Series</h3><p>This research paper presents the <strong>MiniMax-M2 series</strong> of highly sparse <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Mixture-of-Experts LLM</a> models designed for efficient agentic work.</p><p>The flagship M2.7 model has:</p><ul><li><p>229.9B total parameters</p></li><li><p>Activates <strong>only 9.8B per token</strong></p></li><li><p>A 192K-token context and multi-token prediction for faster inference</p></li></ul><p>This series uses three important components:</p><ul><li><p>An agent-driven data pipeline that produces large-scale, verifiable trajectories across agentic coding and agentic co-work</p></li><li><p>Forge, a scalable agent-native RL system for long-horizon agentic tasks </p></li><li><p>A self-evolution mechanism in the M2.7 model, where the model autonomously debugs training runs and modifies its own scaffold. </p></li></ul><p>Evals show that this series of models produces frontier-tier performance on agentic coding, deep search, office-task, and reasoning benchmarks, despite activating so few parameters per token.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AYIE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AYIE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 424w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 848w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 1272w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AYIE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png" width="1456" height="840" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:840,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:484979,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AYIE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 424w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 848w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 1272w, https://substackcdn.com/image/fetch/$s_!AYIE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c422737-f1c9-4105-a30a-071458184d74_2306x1330.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The <a href="https://www.minimax.io/blog/minimax-m3">MiniMax M3 model is also out today</a>, and we will discuss it in the next edition of the newsletter.</p><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.26494">using this link</a>.</p><div><hr></div><h3>7. InstructSAM: Segment Any Instance with Any Instructions</h3><p>This research paper presents <strong>InstructSAM</strong>, a 2B vision-language segmentation model that enables users to segment objects using free-form instructions that include attributes, relations, counting, exclusion, or implicit intent, rather than only simple object names.</p><p>It connects a VLM to <a href="https://ai.meta.com/research/sam3/">SAM3</a> using parallel learnable instance queries, enabling it to reason about the instruction and produce multiple instance masks in&nbsp;a single forward pass. </p><p>Researchers also introduce&nbsp;<strong>Inst2Seg</strong>, a large instruction-based segmentation dataset and benchmark consisting of free-form instructions with instance-level masks.</p><p>On this benchmark, InstructSAM outperforms previous end-to-end methods and SAM3&#8217;s agentic pipeline while being much faster (1.1 seconds inference time compared to 29.6 seconds for SAM3-Agent-Qwen3-VL-2B).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AN84!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AN84!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 424w, https://substackcdn.com/image/fetch/$s_!AN84!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 848w, https://substackcdn.com/image/fetch/$s_!AN84!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!AN84!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AN84!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png" width="1456" height="723" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:723,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1127619,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AN84!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 424w, https://substackcdn.com/image/fetch/$s_!AN84!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 848w, https://substackcdn.com/image/fetch/$s_!AN84!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!AN84!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c2f7936-16d9-44c1-aa75-6a4f2cc70718_2498x1240.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.26102">using this link</a>.</p><div><hr></div><h3>6. PowLU: An Activation Function for Stable Pre-Training of LLM</h3><p>SwiGLU is a widely used activation function in LLMs today. For large positive inputs, SwiGLU behaves roughly like the quadratic function x&#178;, which gives it strong nonlinearity and expressive capacity. </p><p>However, this property also leads to numerical instability as the input or model scale increases, especially in low-precision LLM training, because it widens the output range and worsens outliers.</p><p>To address this, this research paper proposes a new activation function called the <strong>Power Linear Unit (PowLU)</strong> for large-scale LLM pre-training.</p><p>PowLU uses a rational power function to achieve adaptive nonlinearity while reducing extreme amplification, making training more stable.</p><p>Scaling-law experiments and experiments on the Ling 7.9B and 124B&nbsp;models show that PowLU achieves results competitive with SwiGLU and SwiGLU-Clip, while improving the stability and scalability of large-scale LLM training.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!OBl3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!OBl3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 424w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 848w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!OBl3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png" width="1456" height="659" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:659,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:351494,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!OBl3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 424w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 848w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!OBl3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb57dd53b-9e68-416b-bbe2-c26196ed83ca_2478x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.25704">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>5. When Does LeJEPA Learn a World Model?</h3><p><a href="https://arxiv.org/abs/2511.08544">LeJEPA</a> is a self-supervised learning method that combines a predictive alignment loss with Gaussian regularization (SIGReg).</p><p>This research paper presents a theory of when LeJEPA captures a representation that truly becomes a world model or a faithful map of the world&#8217;s latent structure.</p><p>The paper proves that, under Gaussian latent variables, stationary additive-noise transitions, alignment loss, and Gaussian regularization, LeJEPA can identify the true latent variables up to a linear rotation, making the representation useful for planning. </p><p>Experiments in toy settings, high-dimensional latent spaces, and pixel-based robotic control support the theory.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FdVb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FdVb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 424w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 848w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 1272w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FdVb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png" width="1456" height="533" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:533,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1670995,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FdVb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 424w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 848w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 1272w, https://substackcdn.com/image/fetch/$s_!FdVb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F354bc30b-c946-4f8c-a168-f5e5109a1c83_2536x928.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LMeF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LMeF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 424w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 848w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LMeF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png" width="1456" height="613" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:613,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:320630,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LMeF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 424w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 848w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 1272w, https://substackcdn.com/image/fetch/$s_!LMeF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe32b5556-f2b6-45ae-918e-a002c7ff0482_2632x1108.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.26379">using this link</a>.</p><div><hr></div><h3>4. <strong>AutoScientists: Self-Organizing Agent Teams for Long-Running Scientific Experimentation</strong></h3><p>This research paper presents <strong>AutoScientists</strong>, a decentralized multi-agent system designed for long-running scientific experimentation. </p><p>In this system, AI agents share the experimental state, self-organize to form teams around promising hypotheses, critique proposals before using experimental compute, share successes and track failures to prevent repetition, and reorganize as the evidence changes.</p><p>This contrasts with existing agentic approaches that either follow a single research trajectory or coordinate through a central planner with fixed objectives.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zEZ9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zEZ9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 424w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 848w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 1272w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zEZ9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png" width="1456" height="692" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:692,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:341422,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zEZ9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 424w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 848w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 1272w, https://substackcdn.com/image/fetch/$s_!zEZ9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10a7d885-8791-4d77-84eb-7c0c3fe41e4a_2452x1166.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Evals show AutoScientists achieves a mean leaderboard percentile of 74.4% on <a href="https://www.biorxiv.org/content/10.1101/2025.09.01.673319v2">BioML-Bench</a>, improving over the strongest AI agent by +8.33%.</p><p>For GPT training optimization, it reaches a target validation bits-per-byte 1.9x faster than <a href="https://github.com/karpathy/autoresearch">Autoresearch</a>. </p><p>It also discovers a method for ACE2-Spike binding that improves over the current SOTA model by +12.5% in Spearman correlation.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!P_IF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!P_IF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 424w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 848w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!P_IF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png" width="1456" height="549" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:549,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:301038,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!P_IF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 424w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 848w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 1272w, https://substackcdn.com/image/fetch/$s_!P_IF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d8ba86e-e4f5-483b-8699-848634a18477_2778x1048.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.28655">using this link</a>.</p><div><hr></div><h3>3. SIA: Self-Improving AI with Harness &amp; Weight Updates</h3><p>This research paper introduces <strong>SIA</strong>, a self-improving AI framework in which a language-model agent (the Feedback-Agent) improves both the&nbsp;agent harness&nbsp;and the model&#8217;s&nbsp;weights&nbsp;using LoRA/RL updates.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QCiI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QCiI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 424w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 848w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 1272w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QCiI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png" width="1456" height="932" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:932,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:192902,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QCiI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 424w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 848w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 1272w, https://substackcdn.com/image/fetch/$s_!QCiI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a8fc38a-7714-4647-ba66-0e03e34ed1e8_1860x1190.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In Chinese legal charge classification, low-level GPU kernel optimization, and single-cell RNA denoising tasks, SIA-W+H (combined weight and harness updates) works better than the approach that only improves the harness (SIA-H).</p><p>SIA-W+H achieves 25.1% over prior SOTA on <a href="https://arxiv.org/abs/2309.16289">LawBench</a>, 12.4% faster GPU kernels than prior SOTA, and 20.4% over prior SOTA on denoising.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-_gp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-_gp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 424w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 848w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-_gp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png" width="1456" height="684" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/587e1114-934d-4670-942e-c6400adb9104_2554x1200.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:684,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:573161,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-_gp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 424w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 848w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!-_gp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F587e1114-934d-4670-942e-c6400adb9104_2554x1200.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.27276">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3>2. Self-Improving Language Models with Bidirectional Evolutionary Search</h3><p>This research paper introduces <strong>Bidirectional Evolutionary Search (BES)</strong>, a search framework for self-improving LLMs and agentic systems for post-training sample generation and  inference.</p><p>Commonly used methods such as best-of-N sampling and tree search have limitations because they rely on sparse verification signals and primarily explore high-probability candidates constructed through autoregressive expansion.</p><p>BES addresses this issue using:</p><ul><li><p><strong>Forward evolutionary search</strong>, which augments standard expansion with evolution operators that recombine partial trajectories to generate candidates that are difficult to obtain from a single model rollout</p></li><li><p><strong>Backward goal decomposition</strong>, which divides tasks into checkable subgoals, producing dense intermediate feedback that guides forward search</p></li></ul><p>Theoretically, this approach increases exploration and can exponentially reduce the number of required samples to find a correct answer, leading to consistent improvements on tough post-training tasks and open problem-solving benchmarks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-EXT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-EXT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 424w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 848w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 1272w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-EXT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png" width="1456" height="857" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:857,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:482183,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199462318?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-EXT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 424w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 848w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 1272w, https://substackcdn.com/image/fetch/$s_!-EXT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47720d55-0d8a-472c-9239-3226a281ff83_2084x1226.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/pdf/2605.28814">using this link</a>.</p><div><hr></div><h3>1. <strong>Introducing Claude Opus 4.8</strong></h3><p>Anthropic released Claude Opus 4.8, their new flagship model, with improvements across benchmarks compared to their previous models and competitors.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6SbT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6SbT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 424w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 848w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 1272w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6SbT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp" width="1456" height="780" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:780,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!6SbT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 424w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 848w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 1272w, https://substackcdn.com/image/fetch/$s_!6SbT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fbefd7b-1245-46fa-a862-a9f0043ca3c2_2600x1392.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://www.anthropic.com/news/claude-opus-4-8">Source</a></figcaption></figure></div><p>One of the most prominent improvements in Opus 4.8 is its honesty. Opus 4.8 is less likely to make unsupported claims and is about 4X less likely than its predecessor to let flaws in code it has written go unremarked.</p><p>Opus 4.8&#8217;s rates of misaligned behavior (such as deception or cooperation with misuse) are substantially lower than Opus 4.7's and are similar to those of Anthropic&#8217;s best-aligned model, <a href="https://red.anthropic.com/2026/mythos-preview/">Claude Mythos Preview</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3p9I!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3p9I!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3p9I!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3p9I!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!3p9I!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe661aaf8-da8c-458f-b43b-47337eaedb7b_3840x2160.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://www.anthropic.com/news/claude-opus-4-8">Source</a></figcaption></figure></div><p>It also comes with a fast mode (where the model can work at 2.5&#215; the speed), which is  3X cheaper than it was for previous models.</p><p>Read more about this release <a href="https://www.anthropic.com/news/claude-opus-4-8">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read. Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-24-31-may?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-24-31-may?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p>]]></content:encoded></item><item><title><![CDATA['Tensorwise' goes live!]]></title><description><![CDATA[Read to give it a try? You'd love it!]]></description><link>https://www.intoai.pub/p/tensorwise-goes-live</link><guid isPermaLink="false">https://www.intoai.pub/p/tensorwise-goes-live</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sun, 31 May 2026 10:10:58 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!ZvaB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZvaB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZvaB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 424w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 848w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 1272w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZvaB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png" width="1456" height="609" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:609,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:91637,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199959817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZvaB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 424w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 848w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 1272w, https://substackcdn.com/image/fetch/$s_!ZvaB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4aaa71c1-0f29-4f5f-b107-af57c57bf784_1852x774.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>&#128075;&#127995; Hi everyone!</p><p>I&#8217;ve been quietly building something on the side for the last few months, and it&#8217;s finally ready for you to try out.</p><p>I really appreciate your subscribing to the publication, and I am personally inviting you to try it &#129303;</p><div><hr></div><h3><strong>Tensorwise</strong> is a crossword puzzle game for AI enthusiasts just like you!</h3><p>It keeps you up to date with the latest in AI tech and research without overwhelming you with new terminology.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dxPZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dxPZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 424w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 848w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dxPZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png" width="1178" height="1156" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1156,&quot;width&quot;:1178,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:138836,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198681103?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!dxPZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 424w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 848w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!dxPZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae44bd5-ca97-4c00-b6cf-254fc99c6ee7_1178x1156.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>One of the coolest features is that you can learn more about each term and better understand them with the attached resource.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Zx49!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Zx49!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 424w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 848w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 1272w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Zx49!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png" width="1170" height="538" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/caa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:538,&quot;width&quot;:1170,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:102082,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198681103?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!Zx49!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 424w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 848w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 1272w, https://substackcdn.com/image/fetch/$s_!Zx49!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaa3b844-ec84-4ac1-b1a9-cf2b0d658c2f_1170x538.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Tensorwise also lets you compete with other players for a spot on the leaderboard! <br>(<em>And, I might bring in prizes for you all in the future for scoring the best.</em>)</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9Y3u!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9Y3u!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 424w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 848w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 1272w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9Y3u!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png" width="1456" height="751" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:751,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:513097,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198681103?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F47843193-0fb9-48a6-b8e7-328546d2b601_2032x968.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!9Y3u!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 424w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 848w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 1272w, https://substackcdn.com/image/fetch/$s_!9Y3u!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F339622b2-7992-4bf9-b93e-a3d6a3272bf7_1581x815.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>A quick note:  <strong>All crosswords are handmade by me and not AI-generated</strong>. I can&#8217;t provide enough evidence for this because <a href="https://sudoku.logicalintelligence.com/">current frontier LLMs are terrible at grid-like environments</a> and cannot generate reliable technical crosswords. So if you doubt me, try getting an LLM to make a decent puzzle yourself.</p><p>Excited to give it a try and become more <em><strong>Tensor-wiser</strong></em>?</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.tensorwise.dev/&quot;,&quot;text&quot;:&quot;Try Tensorwise&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.tensorwise.dev/"><span>Try Tensorwise</span></a></p><p>&#127808; I&#8217;d love to hear how well you did, what you enjoyed, and which clue made you swear at your screen. (Also, if something broke.)</p><p>Cheers!</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Nt4r!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Nt4r!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 424w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 848w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 1272w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Nt4r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp" width="1344" height="202" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:202,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5632,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198681103?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!Nt4r!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 424w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 848w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 1272w, https://substackcdn.com/image/fetch/$s_!Nt4r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0003a7a7-a693-4633-bff8-aafbdb669ca5_1344x202.webp 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[10 Confusing LLM Concepts, Explained Simply]]></title><description><![CDATA[The role of CPU/ GPU/ TPU in LLM workflows, Pruning, Quantization, and more]]></description><link>https://www.intoai.pub/p/10-confusing-llm-concepts-explained</link><guid isPermaLink="false">https://www.intoai.pub/p/10-confusing-llm-concepts-explained</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sat, 30 May 2026 12:15:55 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!ELt1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ELt1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ELt1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 424w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 848w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ELt1!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:1536,&quot;width&quot;:2308,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:586081,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa3764a-8383-4813-a72a-6e5d7cb6323b_2752x1536.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ELt1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 424w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 848w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!ELt1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa3de93a4-efd4-4f7c-9829-718acdc4e156_2308x1536.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>10. On-policy vs. Off-policy learning</h3><p>A Policy is the strategy by which an AI agent chooses actions in a given state. For an LLM, the model is the policy itself.</p><p>With on-policy learning for an LLM, the model learns from its own responses/ outputs. </p><p>A commonly used approach to train LLMs to improve them in math/ coding tasks goes like this:</p><ul><li><p>Given a query or prompt, a model produces a group of responses </p></li><li><p>The responses are scored using a verifier or a reward model</p></li><li><p>An algorithm like <a href="https://www.intoai.pub/i/174013730/from-ppo-to-grpo">GRPO (Group Relative Policy Optimization)</a> is used to train the model to produce responses that score above average while pushing down below-average responses.</p></li></ul><p>In contrast to the above, with off-policy learning, an LLM learns from the responses/outputs that are not generated by itself. These could come from a stronger model (the teacher model), a different version of the same model, or a dataset.</p><p>A commonly used approach for training LLMs to improve their performance in specific domains is called <a href="https://en.wikipedia.org/wiki/Knowledge_distillation">Distillation</a>. In this approach:</p><ul><li><p>Responses or reasoning traces are generated using a strong (teacher) model</p></li><li><p>A weaker (student) model is trained on those to imitate the teacher's responses</p></li></ul><p>There&#8217;s also a recently introduced learning approach that's gaining popularity and combines on-policy and off-policy learning. It is called <a href="https://thinkingmachines.ai/blog/on-policy-distillation/">On-policy distillation</a>. Curious readers are encouraged to learn more about it.</p><div><hr></div><p>The images used in this lesson come from my book <strong>LLMs In 100 Images</strong>, which is a collection of 100 easy-to-follow visuals that explain the most important concepts you need to master to understand LLMs today.</p><p><strong><a href="https://bamaniaashish.gumroad.com/l/llmbook/LLMFLASH30">Grab your copy today at a 30% discount using this link.</a></strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_ysS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_ysS!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" width="1200" height="600" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:728,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>9. Pretraining vs. Mid-training vs. Post-training</h3><p>All three are phases of training LLMs before they are released to end users. </p><p><strong>Pretraining</strong> is the initial training phase that teaches a model the structure of a language and gives it basic factual knowledge of the world.</p><p>Pretraining occurs on massive datasets such as&nbsp;<a href="https://commoncrawl.org/">Common Crawl</a>/ <a href="https://huggingface.co/spaces/HuggingFaceFW/blogpost-fineweb-v1">FineWeb</a>,&nbsp;which consists of trillions of tokens from the web. This process is usually highly compute-intensive, and a pre-trained model is called the &#8220;Base model&#8221;.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!feP6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!feP6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!feP6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!feP6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!feP6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!feP6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:74410,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!feP6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!feP6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!feP6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!feP6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe882ddc2-f11a-4385-8b73-afcbce54e906_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Mid-training</strong> is the next phase, which involves further training on the base model using higher-quality, domain-specific data to improve its capabilities. This data might focus on domains like health, law, math, or code, or on improving reasoning, extending context length, or adding a new language.</p><p><strong>Post-training</strong> is the final phase that teaches a model to become a useful and human-value-aligned assistant, rather than one that just produces the next token. Some common post-training techniques include:</p><ul><li><p><strong>Supervised fine-tuning (SFT)</strong> on datasets consisting of instruction-response pairs</p></li><li><p>RL training methods like <strong><a href="https://www.intoai.pub/p/rlhf">RLHF</a></strong> to teach a model to produce human-value aligned responses,  or <strong><a href="https://www.intoai.pub/i/175171526/lets-understand-how-llms-are-conventionally-pre-trained">RLVR</a></strong> to teach a model to reason well through math/ code related problems to correctly solve them</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Gla4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Gla4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Gla4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:59516,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Gla4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!Gla4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F988078cf-ab84-417c-bfdb-473ed71b56af_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>8. Zero-shot vs. One-shot vs. Few-shot prompting</h3><p>All three are approaches for prompting an LLM to complete a task well.</p><p><strong>Zero-shot prompting</strong> is when a user describes a task in the prompt and the model uses what it has already learned during its training to complete it.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!5oo3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!5oo3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!5oo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:41192,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!5oo3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!5oo3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F08e58526-0e3b-4790-9887-91279505bc34_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>One-shot prompting</strong> is when a user provides a single example of how to complete a task in the prompt.</p><p><strong>Few-shot prompting</strong> is when a user describes how to complete a task with several examples in the prompt.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-rtV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-rtV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-rtV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/de4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:43950,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-rtV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!-rtV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde4f0d41-5da9-43c0-835e-e965ee256ec9_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>7. CPU vs. GPU vs. TPU in LLM workflows</h3><p>The role of different semiconductor chips in LLM training and inference is frequently confused.</p><p><strong>CPU (Central Processing Unit)</strong> is the standard chip that acts as the brain of every modern computer. It contains a few powerful cores that are perfect for executing tasks with sequential and branching logic at very low latency.</p><p>However, a CPU is not optimized for the parallel matrix multiplication operations that take place during LLM training or inference. Its role is in orchestration, which involves data loading and preprocessing/postprocessing, tokenization/detokenization, scheduling work onto GPUs/TPUs, running the training loop's control logic, and serving infrastructure.</p><p>CPUs have <a href="https://www.amd.com/en/blogs/2026/agentic-ai-brings-new-attention-to-cpus-in-the-ai-data.html">recently found a great use case</a> in agentic workflows. This is because these workflows are dominated by tasks such as tool calls, API requests, file or database reads, and parsing outputs rather than heavy parallel computations. This is exactly where CPUs shine.</p><p><strong>GPUs (Graphics Processing Units)</strong> are<strong> </strong>the masters of parallel computing. They have thousands of cores that are individually slower than CPU cores, but together produce massive throughput on parallel computations, especially matrix or tensor operations, which are core to LLM training and inference.</p><p>Modern GPUs also come with dedicated cores and memory for matrix multiplications. NVIDIA GPUs can be programmed with&nbsp;<a href="https://en.wikipedia.org/wiki/CUDA">CUDA</a>&nbsp;to implement highly efficient kernels (programs written for the GPU) for deep learning algorithms (for example, <a href="https://arxiv.org/abs/2205.14135">FlashAttention</a>).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jgsc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jgsc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 424w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 848w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 1272w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jgsc!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png" width="1200" height="595.5176093916756" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:930,&quot;width&quot;:1874,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:314164,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66a930e7-2d18-4d80-ab3c-faeaffc21006_1874x954.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jgsc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 424w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 848w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 1272w, https://substackcdn.com/image/fetch/$s_!jgsc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F810f8f4d-a194-4301-8fdd-8eea4709ce3c_1874x930.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Tensor Processing Units (TPUs)</strong> are specialized chips designed by Google, optimized for fast and efficient tensor/matrix multiplication operations. These chips are particularly well-suited to <a href="https://www.intoai.pub/p/google-jax-ai-stack">Google's JAX ecosystem</a>, giving it an advantage in large-scale training and inference.</p><p>A great book to learn more about TPUs <a href="https://jax-ml.github.io/scaling-book/">can be found here</a>.</p><p>5 resources to get started with programming LLMs at scale using GPUs/TPUs are listed below.</p><div class="comment" data-attrs="{&quot;url&quot;:&quot;https://open.substack.com/&quot;,&quot;commentId&quot;:262493695,&quot;comment&quot;:{&quot;id&quot;:262493695,&quot;date&quot;:&quot;2026-05-21T00:03:03.335Z&quot;,&quot;edited_at&quot;:&quot;2026-05-21T00:45:27.948Z&quot;,&quot;body&quot;:&quot;&#128640; You have a great chance of getting hired at a frontier AI company/ lab if you're amazing at Distributed AI/ML. \n\nHere are 5 resources to get started. \n\n\n\n\n\nA Systems View of LLMs on TPUs (JAX / Google scaling book) &#8594; https://jax-ml.github.io/scaling-book/\n\n\n\nThe Ultra-Scale Playbook: Training LLMs on GPU Clusters (Hugging Face) &#8594; https://huggingface.co/spaces/nanotron/ultrascale-playbook\n\n\n\nTraining Deep Learning Models on Multiple GPUs &#8594; https://www.intoai.pub/p/distributed-data-parallel\n\n\n\nDistributed Machine Learning Patterns by Terry Tang &#8594; https://www.manning.com/books/distributed-machine-learning-patterns\n\n\n\nDeep Learning at Scale by Suneeta Mall &#8594; https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/\n\n(Bookmark them and share them with others. &#9851;&#65039;)&quot;,&quot;body_json&quot;:{&quot;attrs&quot;:{&quot;schemaVersion&quot;:&quot;v1&quot;},&quot;type&quot;:&quot;doc&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;&#128640; You have a great chance of getting hired at a frontier AI company/ lab if you're amazing at Distributed AI/ML. &quot;}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Here are 5 resources to get started. &quot;}]},{&quot;attrs&quot;:{&quot;start&quot;:1},&quot;type&quot;:&quot;orderedList&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;A Systems View of LLMs on TPUs (JAX / Google scaling book) &#8594; &quot;},{&quot;type&quot;:&quot;text&quot;,&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;class&quot;:&quot;note-link&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;href&quot;:&quot;https://jax-ml.github.io/scaling-book/&quot;}}],&quot;text&quot;:&quot;https://jax-ml.github.io/scaling-book/&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;The Ultra-Scale Playbook: Training LLMs on GPU Clusters (Hugging Face) &#8594; &quot;},{&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;href&quot;:&quot;https://huggingface.co/spaces/nanotron/ultrascale-playbook&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;class&quot;:&quot;note-link&quot;}}],&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;https://huggingface.co/spaces/nanotron/ultrascale-playbook&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Training Deep Learning Models on Multiple GPUs &#8594; &quot;},{&quot;text&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;type&quot;:&quot;text&quot;,&quot;marks&quot;:[{&quot;type&quot;:&quot;link&quot;,&quot;attrs&quot;:{&quot;href&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;class&quot;:&quot;note-link&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;}}]}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Distributed Machine Learning Patterns by Terry Tang &#8594; &quot;},{&quot;marks&quot;:[{&quot;attrs&quot;:{&quot;href&quot;:&quot;https://www.manning.com/books/distributed-machine-learning-patterns&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;,&quot;class&quot;:&quot;note-link&quot;,&quot;target&quot;:&quot;_blank&quot;},&quot;type&quot;:&quot;link&quot;}],&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;https://www.manning.com/books/distributed-machine-learning-patterns&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]},{&quot;type&quot;:&quot;listItem&quot;,&quot;content&quot;:[{&quot;content&quot;:[{&quot;text&quot;:&quot;Deep Learning at Scale by Suneeta Mall &#8594; &quot;,&quot;type&quot;:&quot;text&quot;},{&quot;text&quot;:&quot;https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/&quot;,&quot;marks&quot;:[{&quot;attrs&quot;:{&quot;class&quot;:&quot;note-link&quot;,&quot;target&quot;:&quot;_blank&quot;,&quot;href&quot;:&quot;https://suneeta-mall.github.io/projects/oreilly_deep_learning_at_scale/&quot;,&quot;rel&quot;:&quot;nofollow ugc noopener&quot;},&quot;type&quot;:&quot;link&quot;}],&quot;type&quot;:&quot;text&quot;}],&quot;type&quot;:&quot;paragraph&quot;}]}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;(Bookmark them and share them with others. &#9851;&#65039;)&quot;}]}]},&quot;restacks&quot;:1,&quot;reaction_count&quot;:7,&quot;children_count&quot;:0,&quot;attachments&quot;:[{&quot;id&quot;:&quot;6acafb25-72bf-4118-8ff2-1d00b57d9139&quot;,&quot;type&quot;:&quot;image&quot;,&quot;imageUrl&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/17c3952a-6fc4-48c3-9fad-58d47278fb53_2200x968.png&quot;,&quot;imageWidth&quot;:2200,&quot;imageHeight&quot;:968,&quot;explicit&quot;:false}],&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;user_id&quot;:155457308,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;user_bestseller_tier&quot;:100,&quot;userStatus&quot;:{&quot;bestsellerTier&quot;:100,&quot;subscriberTier&quot;:1,&quot;leaderboard&quot;:null,&quot;vip&quot;:false,&quot;badge&quot;:{&quot;type&quot;:&quot;bestseller&quot;,&quot;tier&quot;:100},&quot;paidPublicationIds&quot;:[1815472,2833541,63213],&quot;subscriber&quot;:null}},&quot;source&quot;:null,&quot;forumChannel&quot;:null}" data-component-name="CommentPlaceholder"></div><div><hr></div><h3>6. Words vs. Tokens</h3><p>Words are units of language that humans understand. Tokens, on the other hand, are units of language that LLMs work with.</p><p>Tokens are created by a <strong><a href="https://www.intoai.pub/p/build-an-llm-tokenizer">Tokenizer</a></strong> and can be whole words, subwords, or single characters. The complete set of all tokens that an LLM is familiar with is called its Vocabulary.</p><p>OpenAI models use tokenizers based on the <a href="https://en.wikipedia.org/wiki/Byte-pair_encoding">Byte Pair Encoding (BPE) algorithm</a> that convert human language into subwords for an LLM to work with.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ii5m!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ii5m!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 424w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 848w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 1272w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ii5m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png" width="1456" height="534" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/83492380-73e7-46cb-8829-39e018ca517b_1548x568.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:534,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:55067,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ii5m!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 424w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 848w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 1272w, https://substackcdn.com/image/fetch/$s_!Ii5m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F83492380-73e7-46cb-8829-39e018ca517b_1548x568.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Human language is broken down into subwords that LLMs work with, using a BPE-based tokenizer (<a href="https://platform.openai.com/tokenizer">Source</a>)</figcaption></figure></div><div><hr></div><h3>5. Logits vs. Probabilities</h3><p>LLMs produce raw scores called logits for each token in their vocabulary at each step of text generation. These scores are real numbers that can be negative, zero, or positive.</p><p>Logits are converted to probabilities using the&nbsp;<a href="https://en.wikipedia.org/wiki/Softmax_function">softmax function</a>, which exponentiates each logit and then divides by the sum of those exponentials.</p><p>The resulting probabilities are normalized, which means that they range between 0 and 1 and sum to 1. Each probability is the model&#8217;s estimated chance of being the next token in the sequence.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Czto!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Czto!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!Czto!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!Czto!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!Czto!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Czto!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:48229,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Czto!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!Czto!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!Czto!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!Czto!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff9cbbb6d-98d9-459b-a3bd-50aea7d580ce_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>4. Context window vs. Memory</h3><p>Memory is a broad term, while the context window is a specific type of memory that LLMs use by default.</p><p>The context window is the maximum number of tokens (both input and generated) an LLM can process at once. It is the working memory of an LLM that the model attends to during use. Once a chat session ends, this information stored in the context window is discarded.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!aEZw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!aEZw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!aEZw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:50789,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!aEZw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!aEZw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8caa2968-2ea9-49aa-8d88-656765857a5e_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Memory, on the other hand, is a broad term that includes:</p><ol><li><p><strong><a href="https://en.wikipedia.org/wiki/Baddeley%27s_model_of_working_memory">Working memory</a></strong><a href="https://en.wikipedia.org/wiki/Baddeley%27s_model_of_working_memory">,</a> or <strong>Short-term memory</strong>, that stores information about what the LLM is thinking about during ongoing problem-solving. This is the LLM&#8217;s context window.</p></li><li><p><strong>Long-term memory</strong>, which is further divided into three:</p><ul><li><p><strong><a href="https://en.wikipedia.org/wiki/Procedural_memory">Procedural memory</a></strong>: This stores rules or skills that can be applied to working memory to determine the LLM&#8217;s behavior.</p></li><li><p><strong><a href="https://en.wikipedia.org/wiki/Semantic_memory">Semantic memory</a></strong>: This stores general facts and knowledge about the world.</p></li><li><p><strong><a href="https://en.wikipedia.org/wiki/Episodic_memory">Episodic memory</a></strong>: This stores sequences of the LLM&#8217;s past behaviors and interactions.</p></li></ul></li></ol><p>A few examples of long-term memory systems are <a href="https://arxiv.org/abs/2501.13956">Zep</a>, <a href="https://www.intoai.pub/p/mem0">Mem0</a>, and&nbsp;<a href="https://langchain-ai.github.io/langmem/">LangMem</a>.</p><p>You can read more about LLM memory using the following links.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;6fc7ff0f-276f-47c9-8e64-7d2b1899f470&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Memory For AI Agents: Everything That You Need To Know (Part-1)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-02-04T10:30:36.271Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/ai-memory&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186643994,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:10,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;66a6f8b9-2895-49a5-88cc-5da6da270de3&quot;,&quot;caption&quot;:&quot;In the last part of this series, we learned about what memory is and why modern AI systems need it.&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;What is Mem0 and how does it work&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2026-02-10T11:25:54.373Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/mem0&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186846978,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:10,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>3. LLM vs. AI agents</h3><p>An LLM is the model that generates output/ response for a given input/ prompt. It does not remember its past actions beyond its <a href="https://en.wikipedia.org/wiki/Context_window">context window</a> and cannot fetch external data.</p><p>An AI agent, on the other hand, has agency. It can take actions to work towards a goal by using an LLM (the &#8220;brain&#8221;) in a loop.</p><p>An AI agent:</p><ul><li><p>Can call tools (web search, code execution, etc.) to access new data</p></li><li><p>Has memory to remember its past actions</p></li><li><p>Uses a <a href="https://arxiv.org/pdf/2210.03629">control loop</a> to plan (using an LLM), act, observe, and decide the next action until its goal is achieved</p></li><li><p>Can work with other AI agents to achieve a goal</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XIvx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XIvx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XIvx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:62692,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199514132?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XIvx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!XIvx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F05c5f668-c365-4c94-925c-bc1702ba7340_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>2. Inference vs. Decoding</h3><p>Inference is the process of running an LLM to produce outputs. It involves processing the given prompt, running a forward pass, and generating the output tokens. Modern LLM inference also involves batching multiple user requests, processing them efficiently on available hardware to achieve higher throughput, and streaming responses back token by token.</p><p>LLM inference takes place in two stages:</p><ul><li><p><strong>Prefill</strong>: Processing the given prompt to build the <a href="https://www.intoai.pub/p/llm-optimizations">KV cache</a> for the LLM to speed up text generation in the subsequent steps</p></li><li><p><strong>Decode/ Decoding</strong>: Text generation by producing tokens one at a time</p></li></ul><p>Decoding is a part of Inference, which involves choosing how each output token is generated.</p><p>At every step of text generation, an LLM outputs a probability distribution over all the tokens in its vocabulary, and the decoding strategy tells which token to pick from it as the next token.</p><p>Some commonly used decoding strategies are:</p><ul><li><p>Greedy</p></li><li><p>Beam search</p></li><li><p>Top-P sampling</p></li><li><p>Top-K sampling</p></li></ul><p>A hyperparameter called <a href="https://www.ibm.com/think/topics/llm-temperature">Temperature</a> can be used with any sampling-based decoding strategy to control the randomness or creativity of the generation process.</p><p>You can read about these in detail using the link below.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;ce3c07e2-cd26-45f9-bd80-2857d6a0cff6&quot;,&quot;caption&quot;:&quot;Into AI thrives thanks to the support of paid subscribers. If you want to access exclusive analysis, in-depth guides, and help this work continue, consider becoming a paid member today. Your support truly makes a difference!&quot;,&quot;cta&quot;:null,&quot;showBylines&quot;:true,&quot;showDescription&quot;:true,&quot;showImage&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Top 4 Decoding Strategies In LLMs Explained Simply&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;Author of &#8216;Into AI&#8217; &#8594; a bestselling newsletter helping engineers become 100&#215; better in AI | Ex-CTO&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:100}],&quot;post_date&quot;:&quot;2025-10-17T12:04:56.382Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!6KOw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe731fdea-885b-463b-8bce-5d32ac1d5ef0_2400x1067.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/decoding-strategies-in-llms&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:176405190,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:5,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><h3>1. Pruning vs. Quantization</h3><p>Pruning and Quantization are both <a href="https://en.wikipedia.org/wiki/Model_compression">model compression</a> techniques that reduce an LLM's memory and compute requirements while preserving accuracy.</p><p><a href="https://en.wikipedia.org/wiki/Model_compression#Pruning">Pruning</a> means reducing the number of parameters in a model by setting their values to zero or by removing entire neurons, attention heads, or full layers. The removed ones are usually parameters that contribute little to the model's output and are not very important for accuracy.</p><p><a href="https://huggingface.co/docs/optimum/en/concept_guides/quantization">Quantization</a>, on the other hand, keeps all the parameters but reduces their numerical precision. For example, instead of representing weights as 32 or 16-bit floating-point numbers, they are converted to lower-precision numbers, such as 8 or 4-bit integers. In this way, each parameter is represented using fewer bits, requiring less memory and compute to run.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts in this newsletter:</p><ul><li><p>&#128126; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#127752; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#128218; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#128104;&#8205;&#128300; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#9000;&#65039; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li><li><p>&#128640; Train a Diffusion LLM from scratch (out next week)</p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (17-23 May 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-17-23-may</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-17-23-may</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 26 May 2026 15:12:11 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!bHPM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bHPM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bHPM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bHPM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2528105,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bHPM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!bHPM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc36ee404-38f6-4d1e-9c4d-00564764997c_1672x941.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><h3>10. Vector Policy Optimization</h3><p>This research paper introduces <strong>Vector Policy Optimization (VPO)</strong>, an RL post-training method for LLMs. VPO improves test-time search by preserving diversity rather than narrowing to a single high-reward answer. </p><p>Unlike GRPO, which focuses on optimizing a single scalar reward, VPO employs vector-valued rewards (such as per-test-case correctness, multiple user preferences, or sub-task scores) and trains the model to produce sets of solutions that cater to different reward trade-offs across the Pareto frontier. </p><p>VPO matches or exceeds the performance of strong scalar RL baselines on test-time search (best@k and pass@k metrics) across multi-hop question answering, logic reasoning, navigation, tool use, and coding. These improvements become more significant as the search budget increases. </p><p>For evolutionary search, VPO models unlock problems that GRPO models cannot solve at all.</p><p>According to the authors, as test-time search becomes more standardized, optimizing for diversity may need to become the default post-training objective.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!GBkc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!GBkc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 424w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 848w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!GBkc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png" width="1456" height="732" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:732,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:422122,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!GBkc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 424w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 848w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!GBkc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97cd82c3-32c1-4e7c-8644-af5920ad2234_2264x1138.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.22817">using this link</a>.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts on this newsletter:</p><ul><li><p>&#128126; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#127752; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#128218; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#128104;&#8205;&#128300; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#9000;&#65039; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li><li><p>&#128640; Train a Diffusion LLM from scratch (out next week)</p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3>9. Code as Agent Harness</h3><p>This survey argues that in modern agentic AI systems, code is no longer just an output produced by LLMs. Instead, it is the executable, inspectable, and stateful substrate that allows agents to reason, act, model environments, receive feedback, and verify their progress. </p><p>To systematically study this perspective, the authors present a three-layer taxonomy that includes: </p><ul><li><p>Harness interfaces, where code connects agents to reasoning, action, and environment modeling</p></li><li><p>Harness mechanisms, which include planning, memory, and tool use for long-horizon execution, together with feedback-driven control and optimization that make harness reliable and adaptive</p></li><li><p>Scaling the harness, from single-agent systems to multi-agent settings, where shared code artifacts support multi-agent coordination, review, and verification</p></li></ul><p>Across these layers, the authors summarize representative methods and practical applications of <em>Code as agent harness</em>, and outline open challenges for harness engineering.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lEmW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lEmW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 424w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 848w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 1272w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lEmW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png" width="1456" height="978" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:978,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1121813,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lEmW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 424w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 848w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 1272w, https://substackcdn.com/image/fetch/$s_!lEmW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6d5a81e-2de5-44dc-b010-9332f7bd02e0_1918x1288.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.18747">using this link</a>.</p><div><hr></div><h3>8. Tokenization via Convex Relaxations</h3><p>This research paper introduces a new tokenization algorithm called <strong>ConvexTok</strong>. </p><p>Current tokenization algorithms, such as BPE and Unigram, are greedy algorithms that make locally optimal decisions without considering the resulting vocabulary as a whole.</p><p>ConvexTok, instead, formulates the construction of a tokenizer as a linear program and solves it using a convex optimization tool.</p><p>It improves intrinsic tokenization metrics, bits-per-byte (BpB), and downstream task performance of LLMs. It also allows users to certify how close their tokenizer is to the global optimum. Empirically, ConvexTok tokenizers are within 1% of optimal for common vocabulary sizes.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!F3Zm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!F3Zm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 424w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 848w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 1272w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!F3Zm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png" width="1456" height="710" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:710,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:381055,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!F3Zm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 424w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 848w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 1272w, https://substackcdn.com/image/fetch/$s_!F3Zm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0519bdf2-0678-410a-b291-6283beadd1e7_2532x1234.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.22821">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. Probabilistic Tiny Recursive Model</h3><p>This research paper introduces <strong>Probabilistic TRM (PTRM)</strong>, which improves <a href="https://www.intoai.pub/p/tiny-recursive-model">Tiny Recursive Models</a> by adding Gaussian noise during inference, to create multiple stochastic latent rollouts rather than a single deterministic trajectory.</p><p>The model then uses its existing Q head to select the most promising answer, avoiding bad solutions without retraining or task-specific augmentations. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!sULF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!sULF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 424w, https://substackcdn.com/image/fetch/$s_!sULF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 848w, https://substackcdn.com/image/fetch/$s_!sULF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!sULF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!sULF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png" width="1456" height="631" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:631,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:371883,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!sULF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 424w, https://substackcdn.com/image/fetch/$s_!sULF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 848w, https://substackcdn.com/image/fetch/$s_!sULF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!sULF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcfc5c818-bb83-426a-8ecd-292f68a35154_2590x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>PTRM improves accuracy across multiple benchmarks, including Sudoku-Extreme (87.4% to 98.75%) and Pencil Puzzle Bench (62.6% to 91.2%). </p><p>On Pencil Puzzle Bench, PTRM achieves nearly double the accuracy of frontier LLMs (91.2% vs. 55.1%) at less than 0.0001x the cost, using only 7M parameters!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kSe-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kSe-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 424w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 848w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 1272w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kSe-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png" width="1456" height="861" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:861,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:271202,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kSe-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 424w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 848w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 1272w, https://substackcdn.com/image/fetch/$s_!kSe-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F381005bf-73f1-4b10-b857-0b631f77f9ab_1860x1100.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.19943">using this link</a>.</p><div><hr></div><h3>6. A Bitter Lesson for Data Filtering</h3><p>This research paper studies data filtering for LLM pretraining in high-compute, data-scarce settings and challenges the common belief that filtering data to include only high-quality information is essential.</p><p>Experiments show that with enough compute, the best data filter is no data filter. Sufficiently trained large parameter LLMs not only tolerate low-quality data but also benefit from it.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tZcW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tZcW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 424w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 848w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 1272w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tZcW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png" width="1456" height="608" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:608,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:497222,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tZcW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 424w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 848w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 1272w, https://substackcdn.com/image/fetch/$s_!tZcW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8eb16c7-71d4-4379-9394-ffa581f1a7d2_2556x1068.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.19407">using this link</a>.</p><div><hr></div><h3>5. Strong Teacher Not Needed? On Distillation in LLM Pretraining</h3><p>In this research paper, the authors challenge a general assumption in Knowledge distillation, that stronger teachers lead to better students.</p><p>Their findings show that with proper mixing of the language modeling and knowledge distillation losses, even small and undertrained teachers improve larger students.</p><p>Pushing a strong teacher further, through more parameters or more training tokens, can saturate or even reverse the distillation gains.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!PKS3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!PKS3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 424w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 848w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 1272w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!PKS3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png" width="1456" height="676" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:676,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:762247,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!PKS3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 424w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 848w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 1272w, https://substackcdn.com/image/fetch/$s_!PKS3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9f8b3c9c-a97d-46a5-99d1-efdba308e843_2128x988.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>It is further observed that distillation helps the student model generalize more (perform better on out-of-distribution and downstream tasks) than it helps improve on the training data (in-domain tasks).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wFmT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wFmT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 424w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 848w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 1272w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wFmT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png" width="1456" height="677" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:677,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:580279,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wFmT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 424w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 848w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 1272w, https://substackcdn.com/image/fetch/$s_!wFmT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0a2ef962-a141-4f27-9858-1996154fdeb2_2678x1246.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.23857">using this link</a>.</p><div><hr></div><h3>4. HRM-Text: Efficient Pretraining Beyond Scaling</h3><p>This research paper introduces <strong>HRM-Text</strong>, a language model that replaces standard Transformers with a <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Recurrent Model (HRM)</a>, an architecture inspired by the human brain that uses slow (strategic) and fast (execution) layers.</p><p>To stabilize training for language modeling, the authors introduce <em>MagicNorm</em> and warmup deep credit assignment. </p><p>Alongside this, rather than standard raw-text pretraining, the model is trained exclusively on instruction-response pairs, using a task-completion objective and PrefixLM masking.</p><p>A 1B parameter HRM-Text model trained from scratch with a $1,500 budget achieves 60.7% on MMLU, 81.9% on ARC-C, 82.2% on DROP, 84.5% on GSM8K, and 56.2% on MATH. </p><p>Despite using ~100-900x fewer training tokens and ~96-432x less compute than standard baselines, HRM-Text performs competitively with 2&#8211;7B parameter open models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IXLZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IXLZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 424w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 848w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 1272w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IXLZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png" width="1456" height="836" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:836,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:428416,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!IXLZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 424w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 848w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 1272w, https://substackcdn.com/image/fetch/$s_!IXLZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F38a4945e-7dc8-44fa-9aaa-b3e2a8bb97be_1878x1078.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.20613">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>3. Gated DeltaNet-2</h3><p>This research paper introduces <strong>Gated DeltaNet-2</strong>, which improves linear-attention models by separating two memory operations, erasing old information and writing new information, that previously introduced <a href="https://arxiv.org/abs/2412.06464">Gated Delta Networks</a> and <a href="https://arxiv.org/abs/2510.26692">Kimi Delta Attention (KDA)</a> tied together.</p><p>It uses separate channel-wise erase and write gates while supporting efficient recurrent and parallel training. This allows the model to manage compressed long-context memory better without the high cost of standard attention. </p><p>With 1.3B parameters, Gated DeltaNet-2 outperforms Mamba-2, Gated DeltaNet, KDA, and Mamba-3 variants in language modeling, commonsense reasoning, and retrieval, particularly in long-context needle-in-a-haystack tasks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vesE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vesE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 424w, https://substackcdn.com/image/fetch/$s_!vesE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 848w, https://substackcdn.com/image/fetch/$s_!vesE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 1272w, https://substackcdn.com/image/fetch/$s_!vesE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vesE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png" width="1456" height="830" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ebccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:830,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:352019,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!vesE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 424w, https://substackcdn.com/image/fetch/$s_!vesE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 848w, https://substackcdn.com/image/fetch/$s_!vesE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 1272w, https://substackcdn.com/image/fetch/$s_!vesE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Febccc1fb-7848-4c26-8d8f-613843623a5d_2284x1302.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.22791">using this link</a>.</p><div><hr></div><h3>2. Advancing Mathematics Research with AI-Driven Formal Proof Search</h3><p>This research paper presents AlphaProof Nexus, a framework that uses LLM-based agents to generate and iteratively improve Lean formal proofs using compiler feedback, optional <a href="https://www.nature.com/articles/s41586-025-09833-y">AlphaProof</a> calls, and evolutionary search.</p><p>In a large-scale test on open research problems, the strongest agent solved 9 of 353 Erd&#337;s problems, proved 44 of 492 OEIS conjectures, and contributed to multiple other open mathematical problems.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!q7lv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!q7lv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 424w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 848w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!q7lv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png" width="1456" height="882" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:882,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:272548,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!q7lv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 424w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 848w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!q7lv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1eb3173c-01c4-490e-94c5-7982e567ab6c_1878x1138.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.22763v1">using this link</a>.</p><div><hr></div><h3>1. <strong>An OpenAI model has disproved a central conjecture in discrete geometry</strong></h3><p>This blog post reports that an internal reasoning model at OpenAI has disproved <a href="https://en.wikipedia.org/wiki/Unit_distance_graph">Erd&#337;s&#8217;s long-standing unit distance conjecture</a> in discrete geometry, that the maximum number of unit-distance pairs among <code>n</code> points in the plane were bounded by n&#185;&#8314;&#7506;&#8317;&#185;&#8318;.</p><p>The model produced an infinite set of point configurations with at least n&#185;&#8314;&#948; unit distances for some fixed &#948; greater than 0, using unexpected tools from algebraic number theory, such as class field towers and Golod&#8211;Shafarevich theory.</p><p>The proof has been verified by external mathematicians, and this is a major milestone where an AI system has autonomously resolved an important open mathematical problem, revealing a surprising connection between algebraic number theory and discrete geometry.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!s9rV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!s9rV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 424w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 848w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 1272w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!s9rV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png" width="1456" height="780" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:780,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:281699,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/199211863?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!s9rV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 424w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 848w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 1272w, https://substackcdn.com/image/fetch/$s_!s9rV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc466b805-4ba7-40e8-89e4-908a565546dd_2228x1194.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this <a href="https://cdn.openai.com/pdf/74c24085-19b0-4534-9c90-465b8e29ad73/unit-distance-proof.pdf">using this link</a>.</p><div><hr></div><p>This newsletter edition is completely free to read. Show your love by liking it, restacking it, and sharing it with others! &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-17-23-may?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-17-23-may?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p>]]></content:encoded></item><item><title><![CDATA[Tensorwise: Early release for paid subscribers 🥳]]></title><description><![CDATA[You're getting to try this first!]]></description><link>https://www.intoai.pub/p/tensorwise-early-release-for-paid</link><guid isPermaLink="false">https://www.intoai.pub/p/tensorwise-early-release-for-paid</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Thu, 21 May 2026 12:02:50 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!l3tU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!l3tU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!l3tU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 424w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 848w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 1272w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!l3tU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png" width="2148" height="884" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:884,&quot;width&quot;:2148,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:105422,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198681103?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cd75149-fee4-4036-8364-cb4386a9ef24_2148x908.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!l3tU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 424w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 848w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 1272w, https://substackcdn.com/image/fetch/$s_!l3tU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c00ce95-d895-4ebb-bcc6-9e075afb6812_2148x884.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>&#128075;&#127995; Hi everyone!</p><p>I've been quietly building something on the side for the last few months, and it's finally ready for you to try out. </p>
      <p>
          <a href="https://www.intoai.pub/p/tensorwise-early-release-for-paid">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Cross-Entropy Loss in LLMs, Explained Visually]]></title><description><![CDATA[A visual guide to understand how LLMs are trained using the cross-entropy loss, step by step.]]></description><link>https://www.intoai.pub/p/cross-entropy-loss-in-llms-explained</link><guid isPermaLink="false">https://www.intoai.pub/p/cross-entropy-loss-in-llms-explained</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 20 May 2026 11:43:17 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!J7Df!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!J7Df!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!J7Df!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!J7Df!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2939462,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/198384175?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!J7Df!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!J7Df!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7020d920-024f-4c8b-b53c-065e3a714b5a_1672x941.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>LLMs are trained to predict next tokens well. </p><p>Given all the classes/ tokens in the vocabulary, an LLM is trained to pick the right one at each training step. This is essentially a <strong><a href="https://en.wikipedia.org/wiki/Multiclass_classification">Multi-class classification problem</a></strong> in machine learning.</p><p>The <strong>Categorical Cross-entropy loss </strong>(or simply<strong> </strong>the<strong> Cross-entropy loss</strong>) is used to train an LLM to solve this multi-class classification problem.</p>
      <p>
          <a href="https://www.intoai.pub/p/cross-entropy-loss-in-llms-explained">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (10-16 May 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-10-16-may</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-10-16-may</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 18 May 2026 09:52:01 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!X9D4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!X9D4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!X9D4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!X9D4!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png" width="1200" height="675" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:2845198,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!X9D4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 424w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 848w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 1272w, https://substackcdn.com/image/fetch/$s_!X9D4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F761f6ce9-b092-45ae-b601-f9eab4699ade_1672x941.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>10. Achieving Gold-Medal-Level Olympiad Reasoning via Simple and Unified Scaling</h3><p>This research paper introduces a simple method for converting a post-trained reasoning LLM into a rigorous Olympiad-level solver.</p><p>The method first uses a reverse-perplexity curriculum for SFT to instill rigorous proof-search and self-checking behaviors, then scales these behaviors through a two-stage RL pipeline that progresses from RL with verifiable rewards to more delicate proof-level RL, and finally boosts solving performance with test-time scaling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!y8JH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!y8JH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 424w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 848w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 1272w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!y8JH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png" width="1456" height="664" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:664,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:699746,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!y8JH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 424w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 848w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 1272w, https://substackcdn.com/image/fetch/$s_!y8JH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b4d9c5a-a717-474f-8280-b1462883f24c_2792x1274.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The authors train a 30B model, <strong>SU-01</strong>, using this method, which reasons on difficult problems with trajectories exceeding 100K tokens, achieving gold-medal-level scores on IMO 2025 and USAMO 2026.</p><p>This model also shows strong generalization of scientific reasoning to domains beyond mathematics and physics.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!URAK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!URAK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 424w, https://substackcdn.com/image/fetch/$s_!URAK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 848w, https://substackcdn.com/image/fetch/$s_!URAK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 1272w, https://substackcdn.com/image/fetch/$s_!URAK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!URAK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png" width="1456" height="738" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:738,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:289867,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!URAK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 424w, https://substackcdn.com/image/fetch/$s_!URAK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 848w, https://substackcdn.com/image/fetch/$s_!URAK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 1272w, https://substackcdn.com/image/fetch/$s_!URAK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F411ca401-f340-4313-90dd-9d10921df6a6_2286x1158.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.13301">using this link</a>.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts on this newsletter:</p><ul><li><p>&#127828; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#127829; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#127839; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#129391; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#127849; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127827; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3>9. Is Grep All You Need? How Agent Harnesses Reshape Agentic Search</h3><p>This research paper examines how different search tools impact LLM agents when handling retrieval-heavy tasks. </p><p>The authors conducted two experiments:</p><ul><li><p>Compare grep and vector retrieval on a 116-question sample from LongMemEval, using a custom agent harness (Chronos) and provider-native CLI harnesses (Claude Code, Codex, and Gemini CLI)</p></li><li><p>Compare grep-only and vector-only retrieval while progressively mixing in additional unrelated conversation history, so that each query is embedded in more distracting material alongside the passages that matter. </p></li></ul><p>Across Chronos and the provider CLIs, grep generally leads to higher accuracy than vector retrieval in the first experiment. </p><p>At the same time, overall scores still depend heavily on the agent harness and how the tool results are presented to the model, even when the underlying conversation data are the same.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!i3-I!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!i3-I!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 424w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 848w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 1272w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!i3-I!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png" width="1456" height="851" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:851,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:409967,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!i3-I!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 424w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 848w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 1272w, https://substackcdn.com/image/fetch/$s_!i3-I!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdad60a93-a91c-46b2-a514-f7affe0585a7_2020x1180.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.15184">using this link</a>.</p><div><hr></div><h3>8. &#948;-mem: Efficient Online Memory for Large Language Models</h3><p>&#948;-mem is a lightweight online memory system for LLMs that lets them reuse past information without expanding the context window or retraining the entire model. </p><p>&#948;-mem compresses past information into a fixed-size state matrix updated by delta-rule learning, and uses its readout to generate low-rank corrections to the LLM&#8217;s attention computation during generation. </p><p>With only an 8 &#215; 8 online memory state, &#948;-mem improves the average score to 1.10&#215; that of the LLM backbone and 1.15&#215; that of the strongest non-&#948;-mem memory baseline.</p><p>It achieves larger gains on memory-heavy benchmarks, reaching 1.31&#215; on <a href="https://github.com/HUST-AI-HYZ/MemoryAgentBench">MemoryAgentBench</a> and 1.20&#215; on <a href="https://snap-research.github.io/locomo/">LoCoMo</a>, while largely preserving general capabilities.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!e_-z!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!e_-z!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 424w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 848w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 1272w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!e_-z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png" width="1456" height="803" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:803,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:455520,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!e_-z!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 424w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 848w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 1272w, https://substackcdn.com/image/fetch/$s_!e_-z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe3c7f3a3-7b9f-45f0-8f7d-06f748d73635_2316x1278.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.12357">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. <strong>FutureSim: Replaying World Events to Evaluate Adaptive Agents</strong></h3><p>This research paper introduces <strong>FutureSim</strong>, a benchmark for testing whether AI agents can learn over time by replaying real-world news events in order and predicting future outcomes beyond their knowledge limits.</p><p>Agents can only use the news available up to each simulated date, update predictions as new information comes in, and are scored based on accuracy and Brier skill score. </p><p>The results show that the best agent&#8217;s accuracy is 25%, and many have worse Brier skill scores than making no prediction at all.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TqP_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TqP_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 424w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 848w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 1272w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TqP_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png" width="1456" height="706" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:706,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:505197,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TqP_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 424w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 848w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 1272w, https://substackcdn.com/image/fetch/$s_!TqP_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa4602660-1c98-4a72-9f6c-d26510ddf871_2418x1172.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.15188">using this link</a>.</p><div><hr></div><h3>6. World Action Models</h3><p>World Action Models (WAMs) are the next step beyond Vision-Language-Action robot policies that, instead of directly mapping observations and language to actions, model future world states and actions together, giving embodied agents a form of predictive physical foresight.</p><p>This survey is the first systematic account of the WAMs landscape, clarifying key architectural paradigms and their trade-offs, and identifying open challenges and future opportunities for this rapidly evolving field.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Y_4L!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Y_4L!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 424w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 848w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 1272w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Y_4L!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png" width="1456" height="861" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:861,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:683743,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Y_4L!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 424w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 848w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 1272w, https://substackcdn.com/image/fetch/$s_!Y_4L!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdf386ad-c347-4879-9b97-9658a58d4f9b_2218x1312.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.12090">using this link</a>.</p><div><hr></div><h3>5. The Truth Lies Somewhere in the Middle (of the Generated Tokens)</h3><p>The research paper suggests that, for autoregressive LLMs, the best semantic representation is often not the final generated token or a prompt token, but the mean-pooled hidden states across the generated tokens.</p><p>This finding is quantified through kernel alignment to reference spaces in language, vision, and protein domains, where results indicate that improvements from mean pooling are consistent with information being distributed across generated tokens rather than localized to a single position.</p><p>Furthermore, representations coming from generated tokens outperform those from prompt tokens.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!N37C!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!N37C!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 424w, https://substackcdn.com/image/fetch/$s_!N37C!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 848w, https://substackcdn.com/image/fetch/$s_!N37C!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 1272w, https://substackcdn.com/image/fetch/$s_!N37C!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!N37C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png" width="1354" height="816" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e101f071-7449-409a-a143-0ae903ef3c50_1354x816.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:816,&quot;width&quot;:1354,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:393363,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!N37C!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 424w, https://substackcdn.com/image/fetch/$s_!N37C!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 848w, https://substackcdn.com/image/fetch/$s_!N37C!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 1272w, https://substackcdn.com/image/fetch/$s_!N37C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe101f071-7449-409a-a143-0ae903ef3c50_1354x816.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.09969">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>4. MetaBackdoor: Exploiting Positional Encoding as a Backdoor Attack Surface in LLMs</h3><p>The research paper presents <strong>MetaBackdoor</strong>, a new type of LLM backdoor attack that relies on positional information, particularly input or conversation length, as the trigger instead of suspicious words or hidden characters. </p><p>This means a model might act normally with ordinary text but switch to a harmful mode once the prompt or chat history reaches a particular length. This switch could lead to leaking system prompts, private context, or making harmful tool calls. </p><p>While current backdoor defenses typically search for odd input content, this attack shows that even clean, semantically normal inputs can trigger hidden behavior via the model&#8217;s positional encoding.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jrP4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jrP4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 424w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 848w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 1272w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jrP4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png" width="1294" height="780" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:780,&quot;width&quot;:1294,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:255759,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jrP4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 424w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 848w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 1272w, https://substackcdn.com/image/fetch/$s_!jrP4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F830305d4-0c2b-48ae-ba0d-a5c2808ce613_1294x780.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.15172">using this link</a>.</p><div><hr></div><h3>3.LLMs Improving LLMs: Agentic Discovery for Test-Time Scaling</h3><p>This research paper presents <strong>AutoTTS</strong>, a framework in which LLM agents autonomously discover better test-time scaling strategies, rather than relying on humans. </p><p>AutoTTS views test-time scaling as a controller search problem based on pre-collected reasoning traces. In this way, strategies can be tested cheaply without needing to call the LLM repeatedly. </p><p>Experiments on mathematical reasoning benchmarks show that the discovered strategies improve the overall accuracy&#8211;cost tradeoff over strong manually designed baselines. </p><p>The discovered strategies generalize to held-out benchmarks and model scales, while the entire discovery costs only $39.9 and 160 minutes.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!54u7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!54u7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 424w, https://substackcdn.com/image/fetch/$s_!54u7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 848w, https://substackcdn.com/image/fetch/$s_!54u7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 1272w, https://substackcdn.com/image/fetch/$s_!54u7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!54u7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png" width="1456" height="624" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:624,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:635635,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!54u7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 424w, https://substackcdn.com/image/fetch/$s_!54u7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 848w, https://substackcdn.com/image/fetch/$s_!54u7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 1272w, https://substackcdn.com/image/fetch/$s_!54u7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91a276c8-e8c3-455e-af44-647072dda009_2526x1082.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.08083">using this link</a>.</p><div><hr></div><h3>2. VGGT-&#937;</h3><p>The research paper presents <strong>VGGT-&#937;</strong>, a larger and more efficient 3D reconstruction model for static and dynamic scenes. </p><p>It builds on the <a href="https://arxiv.org/abs/2503.11651">Visual Geometry Grounded Transformer (VGGT)</a> by simplifying the design, using compact registers to share scene information across frames, and reducing training memory to about 30% of the previous model. </p><p>This allows it to train on significantly more data and achieve better reconstruction and camera estimation, including a reported 77% improvement in camera accuracy on <a href="https://sintel.is.tue.mpg.de/">Sintel</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eSHi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eSHi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 424w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 848w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 1272w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eSHi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png" width="1456" height="732" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:732,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1191672,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eSHi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 424w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 848w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 1272w, https://substackcdn.com/image/fetch/$s_!eSHi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef7dbbe3-e8f3-45c4-b0c2-e4dd171b59be_2460x1236.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.15195">using this link</a>.</p><div><hr></div><h3>1. ELF: Embedded Language Flows</h3><p>The paper introduces <strong>ELF (Embedded Language Flows)</strong>, a new class of diffusion-based language model that generates text primarily in continuous embedding space instead of directly using discrete tokens. </p><p>It employs Flow Matching to clean up embeddings, removing noise and turning them into clear language representations, only converting them back into tokens at the final step. </p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!k_BV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!k_BV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 424w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 848w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 1272w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!k_BV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png" width="1456" height="358" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:358,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:368234,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!k_BV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 424w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 848w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 1272w, https://substackcdn.com/image/fetch/$s_!k_BV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa01185ff-bc51-48e2-8144-bef788d4a869_2280x560.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>ELF surpasses previous discrete and continuous diffusion language models, achieving better generation quality with fewer sampling steps and using 10X fewer training tokens.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Iesj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Iesj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 424w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 848w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 1272w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Iesj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png" width="1456" height="265" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:265,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:159720,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197974392?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Iesj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 424w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 848w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 1272w, https://substackcdn.com/image/fetch/$s_!Iesj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1be4afdc-4046-40e4-9cb7-514a91d4b4b9_2228x406.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2605.10938">using this link</a>.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts on this newsletter:</p><ul><li><p>&#127828; <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">What makes DeekSeek-V4 so good?</a></p></li><li><p>&#127829; <a href="https://www.intoai.pub/p/diffusion-llms-explained-simply">Diffusion LLMs, Explained Simply</a></p></li><li><p>&#127839; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p>&#129391; <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">Build and Train a Mixture-of-Experts (MoE) LLM from scratch</a></p></li><li><p>&#127849; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127827; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p>]]></content:encoded></item><item><title><![CDATA[What makes DeekSeek-V4 so good?]]></title><description><![CDATA[If Uber had used DeepSeek-V4 instead of Claude, their 2026 AI budget would have lasted 7 years rather than only 4 months!]]></description><link>https://www.intoai.pub/p/what-makes-deekseek-v4-so-good</link><guid isPermaLink="false">https://www.intoai.pub/p/what-makes-deekseek-v4-so-good</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 15 May 2026 17:02:06 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!83mq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!83mq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!83mq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 424w, https://substackcdn.com/image/fetch/$s_!83mq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 848w, https://substackcdn.com/image/fetch/$s_!83mq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 1272w, https://substackcdn.com/image/fetch/$s_!83mq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!83mq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png" width="1456" height="628" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:628,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:288596,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!83mq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 424w, https://substackcdn.com/image/fetch/$s_!83mq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 848w, https://substackcdn.com/image/fetch/$s_!83mq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 1272w, https://substackcdn.com/image/fetch/$s_!83mq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8f8a877-47be-477c-8325-5e4dfe62c2c1_2376x1024.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>DeepSeek recently released the preview of two models in its V4 series:</p><ul><li><p><strong>DeepSeek-V4-Pro</strong> with 1.6T parameters (49B activated during inference)</p></li><li><p><strong>DeepSeek-V4-Flash</strong> with 284B parameters (13B activated during inference)</p></li></ul><p>Both models are <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts">Mixtures-of-Experts (MoE) models</a> and support a context length of&nbsp;1 million tokens.</p><p>These models use multiple architectural and optimization updates, with the four main ones being:</p><ol><li><p>A new hybrid Attention architecture that uses <strong>Compressed Sparse Attention (CSA)</strong> and <strong>Heavily Compressed Attention (HCA)</strong> to reduce the memory bottleneck</p></li><li><p><strong><a href="https://arxiv.org/abs/2512.24880">Manifold-Constrained Hyper-Connections (mHC)</a> </strong>to<strong> </strong>strengthen conventional residual connections, which improve the stability of signal propagation across the layers while preserving model expressivity</p></li><li><p><strong><a href="https://kellerjordan.github.io/posts/muon/">Muon Optimizer</a> </strong>for faster convergence and greater training stability</p></li></ol><p>These major updates (alongside many others) make these models highly efficient. DeepSeek-V4-Pro requires only 27% of the single-token inference FLOPs&nbsp;and&nbsp;10% of the KV cache&nbsp;compared with&nbsp;<a href="https://arxiv.org/abs/2512.02556">DeepSeek-V3.2</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!is-6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!is-6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 424w, https://substackcdn.com/image/fetch/$s_!is-6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 848w, https://substackcdn.com/image/fetch/$s_!is-6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 1272w, https://substackcdn.com/image/fetch/$s_!is-6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!is-6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png" width="1456" height="757" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:757,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:928315,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!is-6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 424w, https://substackcdn.com/image/fetch/$s_!is-6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 848w, https://substackcdn.com/image/fetch/$s_!is-6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 1272w, https://substackcdn.com/image/fetch/$s_!is-6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54db41a0-5ff4-4645-846b-16c691cfda00_2692x1400.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Benchmark performance, inference FLOPs, and KV cache size of DeepSeek-V4 and baselines (<a href="https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/blob/main/DeepSeek_V4.pdf">Source</a>)</figcaption></figure></div><p>And according to the <a href="https://x.com/sdrzn/status/2047537442719428969">following post on X</a>, if Uber had used DeepSeek instead of Claude, their 2026 AI budget would have lasted 7 years rather than only 4 months. That&#8217;s incredibly efficient!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!uox_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!uox_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 424w, https://substackcdn.com/image/fetch/$s_!uox_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 848w, https://substackcdn.com/image/fetch/$s_!uox_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 1272w, https://substackcdn.com/image/fetch/$s_!uox_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!uox_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png" width="1456" height="851" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:851,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:386715,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!uox_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 424w, https://substackcdn.com/image/fetch/$s_!uox_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 848w, https://substackcdn.com/image/fetch/$s_!uox_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 1272w, https://substackcdn.com/image/fetch/$s_!uox_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc2345084-0925-4408-a70b-ca5c2b32c468_1626x950.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>The magic of Mixture-of-Experts</h3><p>In an LLM, the feed-forward network (FFN) in the Transformer block is often the most computationally demanding and holds the largest number of parameters.</p><p>This is why a dense feed-forward network is frequently replaced by a Mixture-of-Experts (MoE) feed-forward network. Mixture-of-Experts network improves computational efficiency by activating only a subset of expert modules during inference.</p><p>DeepSeek models <a href="https://arxiv.org/abs/2412.19437">from V2 onwards</a> (including V4) use <strong>DeepSeekMoE</strong>, which splits the FFN into many smaller experts and routes each token to only a few of them (Routed Experts), while keeping some experts active for all tokens (Shared Experts). This significantly reduces its computational cost.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1_gX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1_gX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 424w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 848w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 1272w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1_gX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png" width="1456" height="477" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:477,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:173949,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1_gX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 424w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 848w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 1272w, https://substackcdn.com/image/fetch/$s_!1_gX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F314cc92c-f266-41e4-8eee-11f8350864e7_1966x644.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture of DeepSeekMoE (<a href="https://arxiv.org/abs/2412.19437">Source</a>)</figcaption></figure></div><p>But given that V4 supports a 1M-token context window (compared to V3.2&#8217;s 128K), the attention mechanism becomes a significant bottleneck at inference.</p><p>This is where the new attention architecture innovated by DeepSeek-V4 shines! V4 models use a hybrid attention mechanism combining:</p><ul><li><p>Compressed Sparse Attention (CSA)</p></li><li><p>Heavily Compressed Attention (HCA)</p></li></ul><p>To understand how these attention mechanisms work and improve V4's efficiency so well, we will have to understand them from the ground up. Let&#8217;s start with the very basics and build from there, step by step.</p><div><hr></div><h3>What is Attention?</h3><p>The <a href="https://www.intoai.pub/p/self-attention">(Self) Attention mechanism</a> in a Transformer-based LLM helps the model understand how each word/ token relates to every other word/ token in a text sequence.</p><p>Given the embedding of each token:</p><ul><li><p>It is first projected into Query (Q), Key (K), and Value (V) vectors.</p></li><li><p>The query vector is compared (technically via the dot product) with keys generated from itself and from previous tokens. This comparison produces attention scores that measure how closely the tokens are related to this one.</p></li><li><p>The scores are scaled, <a href="https://www.intoai.pub/p/causal-mha">masked to block a token from attending to future tokens</a>, passed through softmax, and then used to compute a weighted sum of the value vectors.</p></li><li><p>This weighted sum is the token&#8217;s context-aware representation, called <strong>Masked and Scaled Dot-Product Self-Attention</strong>.</p></li></ul><p>Since every token is attending to itself and all the previous tokens, for an input sequence length of N, the attention mechanism processing it has an O(<code>N</code><sup>2</sup>) computational complexity. This quadratic complexity isn&#8217;t ideal when processing long text sequences.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ih4R!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ih4R!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 424w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 848w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 1272w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ih4R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png" width="728" height="272" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:544,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ih4R!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 424w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 848w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 1272w, https://substackcdn.com/image/fetch/$s_!Ih4R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1ab64779-95e1-4f62-9e31-93492fd702ab_2522x942.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Calculations taking place in Self-attention</figcaption></figure></div><div><hr></div><h3>What is <strong>Multi-head </strong>Attention?</h3><p>To improve performance, instead of performing attention operations once, these are done several times in parallel using different learned projections of the same token embeddings. </p><p>Each unit of attention calculation in this case is called a <strong>Head</strong>, and the mechanism is called <strong><a href="https://www.intoai.pub/p/mha?utm_source=publication-search">Multi-head Attention (MHA)</a></strong>.</p><p>Each attention head has its own Query, Key, and Value projections, and each head computes masked self-attention independently. The outputs of all heads are then concatenated into a single representation. </p><p>The use of multiple heads helps capture different kinds of semantic relationships between the tokens.</p><p>There are two efficient types of MHA that reduce the number of&nbsp;Key/Value&nbsp;projections stored and used during inference. These are:</p><ul><li><p><strong><a href="https://www.intoai.pub/p/multi-query-attention">Multi-Query Attention (MQA)</a></strong>, where each attention head has its own Query, but all Query heads share the same Key and Value.</p></li><li><p><strong><a href="https://www.intoai.pub/p/grouped-query-attention">Grouped-Query Attention (GQA)</a></strong>, where Queries are split into groups, and each group of Query heads shares one Key and Value head.</p></li></ul><p>Both MQA and GQA make inference faster and cheaper, but at the expense of generation quality.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!b-yB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!b-yB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 424w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 848w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 1272w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!b-yB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png" width="1456" height="554" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:554,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:233353,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!b-yB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 424w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 848w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 1272w, https://substackcdn.com/image/fetch/$s_!b-yB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb5a9467c-1c69-4905-a210-0497fdd7dfb2_2476x942.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MHA vs. GQA vs MQA (<a href="https://arxiv.org/abs/2305.13245">Source</a>)</figcaption></figure></div><p>Attention mechanisms are made much more efficient using a KV cache.</p><div><hr></div><h3>KV cache to the rescue</h3><p>Note that at each step of text generation/ inference, we need to calculate the K and V vectors for all previous tokens. </p><p>How about we store the&nbsp;Key&nbsp;and&nbsp;Value&nbsp;vectors from previous tokens, and reuse them later? That&#8217;s exactly what a KV cache does.</p><p>When using a KV cache:</p><ul><li><p>For each generated token, we store its K and V vectors in the cache.</p></li><li><p>Then, in the next token generation step, we compute only the new token&#8217;s Q, K, and V.</p></li><li><p>Next, we reuse the cached KV of previous tokens, with the new token&#8217;s Q attending to both the cached KV and its newly calculated KV. This saves on lots of compute during inference.</p></li></ul><div><hr></div><h3>Towards Multi-head Latent Attention</h3><p>Released in 2024, <a href="https://arxiv.org/abs/2405.04434">DeepSeek-V2</a> introduced an efficient version of attention called <strong><a href="https://www.intoai.pub/p/multi-head-latent-attention-is-the">Multi-head Latent Attention (MLA)</a></strong> that improves KV cache by not caching full Keys and Values. Instead, it stores a much smaller latent representation (one latent KV entry per token) and reconstructs the full KV information from it when needed.</p><p>This reduces the memory required to store the full KV cache, making the inference much faster. MLA is therefore also used in the later model, <a href="https://arxiv.org/abs/2412.19437v2">DeepSeek-V3</a>.</p><p>Check out the following illustration to see how MLA compares to the other attention variants.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Fvr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Fvr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 424w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 848w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 1272w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Fvr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png" width="728" height="207" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:414,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:277329,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!2Fvr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 424w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 848w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 1272w, https://substackcdn.com/image/fetch/$s_!2Fvr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd5e15a3e-859f-45f3-beb0-8aebbb88a0b0_2734x778.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">MHA vs GQA vs MQA vs MLA (<a href="https://arxiv.org/abs/2405.04434">Source</a>)</figcaption></figure></div><p>The complete architecture of MLA with the latent KV representation is shown below. I have also written a deep dive post on how it works, which <a href="https://www.intoai.pub/p/multi-head-latent-attention-is-the">you can read here</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RHbk!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RHbk!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 424w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 848w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 1272w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RHbk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png" width="1456" height="762" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/acbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:762,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:189780,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RHbk!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 424w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 848w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 1272w, https://substackcdn.com/image/fetch/$s_!RHbk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Facbd720a-a38c-4e95-8f7a-4211bf3c8278_1594x834.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture of Multi-head Latent Attention (MLA) (<a href="https://arxiv.org/abs/2405.04434">Source</a>)</figcaption></figure></div><div><hr></div><h3>Sliding Window to improve Attention further</h3><p>Till now, we have learned how:</p><ul><li><p>KV caching helps reduce the compute and memory requirements for attention at inference</p></li><li><p>Building on this and using a latent representation of the KV cache in MLA helps further reduce the memory requirements</p></li></ul><p>There&#8217;s another way to reduce the compute and memory requirements of attention. This is by reducing the number of previous tokens that a token needs to attend to, which brings us to the <strong>Sliding window attention (SWA)</strong>.</p><p>It was <a href="https://arxiv.org/abs/2004.05150">introduced in the &#8216;</a><strong><a href="https://arxiv.org/abs/2004.05150">Longformer&#8217;</a></strong><a href="https://arxiv.org/abs/2004.05150"> paper</a> in 2020. In SWA, each token attends only to a fixed-size window of previous tokens, rather than all previous ones.</p><p>We previously discussed how the full Attention has an O(<code>N</code><sup>2</sup>) computational complexity, as every token attends to itself and all the previous tokens, for an input sequence length of <code>N</code>.</p><p>With SWA, for an input sequence of <code>N</code> and window size (fixed-sized window of previous tokens that a token attends to) of <code>w</code>, the computational complexity goes down to O(<code>Nw</code>).</p><p>Although it is efficient, since each token attends to a window of nearby tokens, some long-range information in language modeling is lost. Hence, it is often used in combination with:</p><ul><li><p>Full attention layers</p></li><li><p>Global window (where some tokens are allowed to attend to the entire sequence of previous tokens)</p></li></ul><p>SWA can also use a dilated window pattern, where each token attends to nearby previous tokens but skips some positions in a regular pattern. This helps a token attend to more distant previous tokens at roughly the same compute cost.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!azAY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!azAY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 424w, https://substackcdn.com/image/fetch/$s_!azAY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 848w, https://substackcdn.com/image/fetch/$s_!azAY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 1272w, https://substackcdn.com/image/fetch/$s_!azAY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!azAY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png" width="1456" height="333" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:333,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:192637,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/196789511?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!azAY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 424w, https://substackcdn.com/image/fetch/$s_!azAY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 848w, https://substackcdn.com/image/fetch/$s_!azAY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 1272w, https://substackcdn.com/image/fetch/$s_!azAY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F637ee5cf-ee88-4ade-9d46-1bd8fc1fb3af_2746x628.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">Full attention vs Sliding window attention and its variants (<a href="https://arxiv.org/abs/2004.05150">Source</a>)</figcaption></figure></div><p>A popular example is the <a href="https://arxiv.org/pdf/2508.10925">GPT-oss family of models</a>, which uses SWA with a window size of 128 tokens in alternating layers with full attention (GQA with a configuration of 64 query heads and 8 KV heads).</p><div><hr></div><h3>Moving to DeepSeek Sparse Attention</h3>
      <p>
          <a href="https://www.intoai.pub/p/what-makes-deekseek-v4-so-good">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[This Week In AI Research (3-9 May 26) 🗓️]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-3-9-may</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-3-9-may</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 11 May 2026 12:44:43 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!AkcT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AkcT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AkcT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 424w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 848w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 1272w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AkcT!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png" width="1200" height="526.6483516483516" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:639,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:801536,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AkcT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 424w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 848w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 1272w, https://substackcdn.com/image/fetch/$s_!AkcT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F670d459b-4039-4108-ad76-6f57fc2f65ea_2692x1182.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>1. ProgramBench: Can Language Models Rebuild Programs From Scratch?</h3><p>ProgramBench is a benchmark by Meta researchers that measures the ability of software engineering agents to develop software holistically.</p><p>Given only a program and its documentation, agents must architect and implement a codebase that matches the reference executable&#8217;s behavior. </p><p>Evaluation of 9 frontier LLMs across 200 tasks, ranging from compact CLI tools to widely used software such as FFmpeg, SQLite, and the PHP interpreter, shows that none fully resolve any task, and the best model passes 95% of tests on only 3% of tasks. </p><p>Models favor monolithic, single-file implementations, which are very different from how humans write code.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!cRUT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!cRUT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 424w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 848w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 1272w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!cRUT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png" width="1456" height="640" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:640,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:524643,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!cRUT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 424w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 848w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 1272w, https://substackcdn.com/image/fetch/$s_!cRUT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F01469ca2-1fe9-4b4b-ad95-324d1c74f33f_2650x1164.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.03546">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>2. AI Co-Mathematician</h3><p>This research from Google DeepMind introduces the AI co-mathematician, a workbench that helps mathematicians use AI agents for open-ended research.</p><p>The AI co-mathematician is optimized for mathematical workflows, ranging from idea generation and literature searches to computational exploration, theorem proving, and theory building. </p><p>Early tests show the AI co-mathematician helped researchers solve open mathematical problems, find new research directions, and discover overlooked literature references.</p><p>Alongside this, the AI co-mathematician scored 48% on <a href="https://epoch.ai/benchmarks/frontiermath-tier-4">FrontierMath Tier 4</a>, a hard problem-solving benchmark. This is the highest score of all AI systems evaluated.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IQuK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IQuK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 424w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 848w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IQuK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png" width="1456" height="748" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:748,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:483088,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!IQuK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 424w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 848w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!IQuK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9437ed18-c740-4985-a756-699a72d7d83b_2562x1316.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.06651">using this link</a>.</p><div><hr></div><h3>3. Continuous Latent Diffusion Language Model</h3><p>This research paper presents <strong>Cola DLM</strong>, a continuous latent diffusion language model that generates text by compressing global semantic structure into a continuous latent space (using a Text VAE) and decoding it into text (using a block-causal DiT).</p><p>Cola DLM proves effective and shows strong scaling behavior for text generation across 4 research questions, 8 benchmarks, strictly matched ~2B-parameter autoregressive and LLaDA baselines, and scaling curves up to about 2000 EFLOPs.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!yorq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!yorq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 424w, https://substackcdn.com/image/fetch/$s_!yorq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 848w, https://substackcdn.com/image/fetch/$s_!yorq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 1272w, https://substackcdn.com/image/fetch/$s_!yorq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!yorq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png" width="1456" height="705" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:705,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:939944,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!yorq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 424w, https://substackcdn.com/image/fetch/$s_!yorq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 848w, https://substackcdn.com/image/fetch/$s_!yorq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 1272w, https://substackcdn.com/image/fetch/$s_!yorq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F11195352-aaa1-41dc-882d-251c6e353043_2680x1298.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.06548">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>4. Manifold Steering Reveals the Shared Geometry of Neural Network Representation and Behavior</h3><p>This research paper finds that the geometry of a model&#8217;s internal activations causally shapes its outputs rather than merely correlating with them. </p><p>The authors introduce <strong>Manifold steering</strong>, where interventions move along the curved manifold of natural activations rather than in a straight Euclidean direction.</p><p>Across language-model reasoning tasks, in-context learning tasks, and a video world model, steering along the activation manifold produces output behaviors that stay on the model&#8217;s natural &#8220;behavior manifold.&#8221; In contrast, ordinary linear steering often moves through unnatural off-manifold areas, resulting in distorted outputs.</p><p>Researchers also show that this relationship works in reverse. Optimizing for desired behavioral trajectories recovers curved activation paths that align with the internal manifold. </p><p>This means that reliable model control should focus less on finding a single steering direction and more on understanding and following the model&#8217;s underlying representational geometry.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!5F2N!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!5F2N!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 424w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 848w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!5F2N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png" width="1456" height="952" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:952,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:477007,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!5F2N!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 424w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 848w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 1272w, https://substackcdn.com/image/fetch/$s_!5F2N!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3aa47e7d-0dd5-4892-81cd-810a7bce33e5_1768x1156.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.05115">using this link</a>.</p><div><hr></div><h3>5.MolmoAct2</h3><p>This research paper presents <strong>MolmoAct2</strong>, a fully open VLA robot control model designed for real-world deployment and use.</p><p>It builds on <strong>MolmoAct</strong> with a new embodied-reasoning backbone, <strong>Molmo2-ER</strong>, which is trained on 3.3 million samples with a specialize-then-rehearse recipe.</p><p>The release also includes:</p><ul><li><p>Three new robotics datasets</p></li><li><p><strong>MolmoAct2-FAST Tokenizer</strong>, an open-weight, open-data action tokenizer trained on millions of trajectories across five embodiments</p></li><li><p>A new KV-conditioned VLA architecture </p></li><li><p><strong>MolmoAct2-Think</strong>, an adaptive-depth reasoning variant that maintains spatial reasoning while massively reducing latency</p></li></ul><p>MolmoAct2 outperforms strong baselines, including &#960;0.5, while Molmo2-ER surpasses GPT-5 and Gemini Robotics ER-1.5 across 13 embodied-reasoning benchmarks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oGLI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oGLI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 424w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 848w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 1272w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oGLI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png" width="1456" height="700" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:700,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1908226,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oGLI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 424w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 848w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 1272w, https://substackcdn.com/image/fetch/$s_!oGLI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F15dd33cc-e651-4a5d-9ed9-d7bf7e472592_2682x1290.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.02881v2">using this link</a>.</p><div><hr></div><h3>6. Model Spec Midtraining</h3><p>The research paper presents <strong>Model Spec Midtraining (MSM)</strong>, a method that improves how alignment training generalizes by teaching a model its intended behavior rules before fine-tuning. </p><p>Instead of relying only on examples of aligned behavior, MSM trains models on synthetic documents that explain the Model Spec or constitution. This approach allows later demonstrations to be interpreted through that framework. </p><p>The authors demonstrate that the same fine-tuning data can generalize in very different ways based on the specific spec taught during mid-training. They show that MSM can reduce safety-related failures. For instance, agentic misalignment in Qwen3-32B decreases from 54% to 7%, which surpasses a deliberative alignment baseline of 14%.</p><p>The authors also show that specs perform better when they clarify the values behind the rules and provide specific guidance rather than vague suggestions.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XX_3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XX_3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 424w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 848w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 1272w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XX_3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png" width="1456" height="670" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:670,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:510635,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XX_3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 424w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 848w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 1272w, https://substackcdn.com/image/fetch/$s_!XX_3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586b09ac-a7ee-480a-90ec-18af8403c32a_2622x1206.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.02087">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. Mamoda2.5</h3><p>The research paper presents <strong>Mamoda2.5</strong>, a unified AR&#8211;Diffusion framework that integrates multimodal understanding and generation within a single architecture.</p><p>The model&#8217;s Diffusion Transformer backbone uses a fine-grained Mixture-of-Experts (MoE) architecture (128 experts, Top-8 routing), resulting in a 25B-parameter model that activates only 3B parameters, significantly reducing training costs while scaling up model capacity.</p><p>Mamoda2.5 achieves top-tier generation performance on VBench 2.0 and sets a new record in video editing quality, beating evaluated open-source models and matching the performance of current top-tier proprietary models, including the Kling O1 on OpenVE-Bench.</p><p>Compared to open-source baselines, Mamoda2.5 achieves up to 95.9&#215; faster video editing inference. This is due to a joint few-step distillation and reinforcement learning framework that compresses the 30-step editing model into a 4-step model, greatly accelerating model inference.</p><p>In real-world applications, Mamoda2.5 has been successfully deployed for content moderation and creative restoration tasks in advertising scenarios, achieving a 98% success rate in an internal advertising video editing scenario.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DhY2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DhY2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 424w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 848w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 1272w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DhY2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png" width="1456" height="872" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:872,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:446460,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DhY2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 424w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 848w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 1272w, https://substackcdn.com/image/fetch/$s_!DhY2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d5b7e-7827-434c-bdef-eda7a104b8bf_1894x1134.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.02641v1">using this link</a>.</p><div><hr></div><h3>8. Superintelligent Retrieval Agent</h3><p>This research paper introduces <strong>SuperIntelligent Retrieval Agent (SIRA)</strong>, a retrieval agent that replaces slow multi-round searches with one effective, corpus-aware lexical query. </p><p>Instead of simply adding related words to a user query, SIRA combines an LLM with document-frequency statistics to select terms that help differentiate likely evidence from misleading corpus matches. </p><p>It improves documents offline by adding missing vocabulary, predicts terms that could serve as evidence but are not included in the query, filters out weak or overly common terms, and then performs a single weighted BM25 retrieval call combining the original query with the validated expansion.</p><p>In tests across 10 BEIR and QA benchmarks, SIRA outperformed dense retrievers and multi-round retrieval systems while remaining training-free, efficient, and interpretable.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Mye!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Mye!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 424w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 848w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 1272w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Mye!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png" width="1456" height="886" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:886,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:384151,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Mye!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 424w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 848w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 1272w, https://substackcdn.com/image/fetch/$s_!2Mye!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57440ba4-12f4-476d-9e9c-74073c8a216d_1932x1176.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.06647">using this link</a>.</p><div><hr></div><h3>9. Nonsense Helps</h3><p>This research paper presents <strong>Lorem Perturbation for Exploration (LoPE)</strong>, a simple RL training technique for reasoning models that adds random Lorem Ipsum-style &#8220;nonsense&#8221; text to the prompt and resamples when GRPO gets stuck because all the sampled answers to a difficult question fail. </p><p>These irrelevant changes to the prompt can lead the model down different reasoning paths and improve exploration more than just resampling the original prompt. </p><p>Experiments across 1.7B, 4B, and 7B models show that LoPE significantly outperforms resampling with the original prompts. Further analysis reveals that other Latin-based random sequences with low perplexity are also effective perturbations.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qRST!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qRST!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 424w, https://substackcdn.com/image/fetch/$s_!qRST!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 848w, https://substackcdn.com/image/fetch/$s_!qRST!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 1272w, https://substackcdn.com/image/fetch/$s_!qRST!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qRST!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png" width="1456" height="759" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:759,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:856361,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qRST!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 424w, https://substackcdn.com/image/fetch/$s_!qRST!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 848w, https://substackcdn.com/image/fetch/$s_!qRST!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 1272w, https://substackcdn.com/image/fetch/$s_!qRST!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F16f7c410-55f5-4ef3-a498-7433f85ec8f8_2434x1268.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.05566">using this link</a>.</p><div><hr></div><h3>10. Retrieval from Within</h3><p>This research presents <strong>INTRA (INTrinsic Retrieval via Attention)</strong>, a RAG-style approach in which an attention-based encoder-decoder retrieves evidence from its precomputed internal representations rather than relying on a separate external retriever. </p><p>The decoder attention queries score the encoded evidence chunks, which are then reused directly as context for generation. </p><p>INTRA outperforms strong engineered retrieval pipelines on both evidence recall and end-to-end answer quality on question-answering benchmarks. </p><p>This shows that attention-based models already possess a retrieval mechanism that can be elicited, rather than added as an external module.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BEIZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BEIZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 424w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 848w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 1272w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BEIZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png" width="1456" height="738" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:738,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:452902,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/197085688?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BEIZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 424w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 848w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 1272w, https://substackcdn.com/image/fetch/$s_!BEIZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F740e04e9-d793-4f72-bd71-10ed6dd28909_2540x1288.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2605.05806">using this link</a>.</p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts on this newsletter and 100x your AI engineering skills.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><p>You can also read my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gK6Y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gK6Y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 424w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 848w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 1272w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gK6Y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png" width="1456" height="271" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:271,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!gK6Y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 424w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 848w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 1272w, https://substackcdn.com/image/fetch/$s_!gK6Y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f7b9d6b-e2f6-446a-8954-ad0ce3efc1a6_1600x298.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item></channel></rss>