<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[Into AI]]></title><description><![CDATA[Helping you become a 100X AI Engineer]]></description><link>https://www.intoai.pub</link><image><url>https://substackcdn.com/image/fetch/$s_!Ea4T!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png</url><title>Into AI</title><link>https://www.intoai.pub</link></image><generator>Substack</generator><lastBuildDate>Sun, 05 Apr 2026 21:32:56 GMT</lastBuildDate><atom:link href="https://www.intoai.pub/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[Dr. Ashish Bamania]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[intoai@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[intoai@substack.com]]></itunes:email><itunes:name><![CDATA[Dr. Ashish Bamania]]></itunes:name></itunes:owner><itunes:author><![CDATA[Dr. Ashish Bamania]]></itunes:author><googleplay:owner><![CDATA[intoai@substack.com]]></googleplay:owner><googleplay:email><![CDATA[intoai@substack.com]]></googleplay:email><googleplay:author><![CDATA[Dr. Ashish Bamania]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[🗓️ This Week In AI Research (22-28 March 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-22-28-march</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-22-28-march</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 31 Mar 2026 13:42:20 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!xUmn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xUmn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xUmn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 424w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 848w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 1272w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xUmn!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png" width="1200" height="524.1758241758242" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:636,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:1525869,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192396162?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xUmn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 424w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 848w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 1272w, https://substackcdn.com/image/fetch/$s_!xUmn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8d60a8f4-efe0-4bcf-b63b-aec49f0f55a3_2734x1194.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>1. <strong>TRIBE v2</strong></h3><p>This research paper from Meta introduces <strong>TRIBE v2</strong>, a tri-modal (video, audio, and language) foundation model capable of predicting human brain activity across a variety of naturalistic and experimental conditions. </p><p>Trained on a unified dataset of over 1,000 hours of fMRI across 720 subjects, the model accurately predicts high-resolution brain responses to new stimuli, tasks, and subjects, exceeding the performance of traditional linear encoding models and delivering multi-fold improvements in accuracy. </p><p>Additionally, TRIBE v2 enables in silico experimentation. Tested on seminal visual and neuro-linguistic paradigms, it recovers a variety of results established by decades of empirical research. </p><p>Finally, by extracting interpretable latent features, TRIBE v2 reveals the fine-grained topography of multisensory integration. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XFXC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XFXC!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 424w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 848w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 1272w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XFXC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png" width="1456" height="1007" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1007,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1966435,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192396162?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XFXC!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 424w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 848w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 1272w, https://substackcdn.com/image/fetch/$s_!XFXC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56cf5fb0-7706-4f2f-a1a5-fbcac3e22c5b_1946x1346.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://scontent-lhr8-1.xx.fbcdn.net/v/t39.2365-6/657045057_4517522795185706_3804826277082824742_n.pdf?_nc_cat=107&amp;ccb=1-7&amp;_nc_sid=3c67a6&amp;_nc_ohc=EuCY89GgEVoQ7kNvwHlcKCj&amp;_nc_oc=Adp1xRgJ9l8TGnVR6eSP8AKOcfLA3yVKqO_FNLl61zPbzrqpq4Z2ysgCrlGKqfDc1Qw&amp;_nc_zt=14&amp;_nc_ht=scontent-lhr8-1.xx&amp;_nc_gid=xumAXL0x8BiYK-KcECPKkA&amp;_nc_ss=7a30f&amp;oh=00_AfxS3r99kJiWCeqPFMFtcEbqY2BHkWGAtMi7ym6j8OvC-A&amp;oe=69CECBF2">using this link</a>.<strong> </strong></p><div><hr></div><p><strong>Join the paid tier today</strong> to get access to all posts on this newsletter, including:</p><ul><li><p>&#129438; Build OpenClaw from Scratch (upcoming)</p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#129412; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127880; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p>and hundreds more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join 'Into AI' premium today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join 'Into AI' premium today</span></a></p><div><hr></div><h3><strong>2. MolmoWeb</strong></h3><p>This research from Allen AI introduces<strong> MolmoWeb</strong>, two completely open-sourced visual web agents built on <strong><a href="https://allenai.org/molmo">Molmo 2</a>,</strong> along with the weights, training data (MolmoWebMix), code, and evaluation tools used to build them. </p><p>Given a task instruction and a live webpage, MolmoWeb observes the page through screenshots (without relying on HTML, accessibility trees, or specialized APIs), predicts the next step, and executes browser actions such as clicking, typing, or scrolling.</p><p>The 4B and 8B MolmoWeb models achieve state-of-the-art results among open-weight web agents, such as <a href="https://www.microsoft.com/en-us/research/blog/fara-7b-an-efficient-agentic-model-for-computer-use/">Fara-7B</a>. They also outperform agents built on much larger proprietary models like GPT-4o that rely on both annotated screenshots and structured page data.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!To0q!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!To0q!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 424w, https://substackcdn.com/image/fetch/$s_!To0q!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 848w, https://substackcdn.com/image/fetch/$s_!To0q!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 1272w, https://substackcdn.com/image/fetch/$s_!To0q!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!To0q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png" width="1456" height="838" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:838,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:437948,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192396162?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!To0q!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 424w, https://substackcdn.com/image/fetch/$s_!To0q!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 848w, https://substackcdn.com/image/fetch/$s_!To0q!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 1272w, https://substackcdn.com/image/fetch/$s_!To0q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7b83f572-bc4b-4b5b-98d7-646311ad3ab7_2136x1230.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://allenai.org/papers/molmoweb">using this link</a>.</p><div><hr></div><h3><strong>3. SAM 3.1</strong></h3>
      <p>
          <a href="https://www.intoai.pub/p/this-week-in-ai-research-22-28-march">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Openclaw deletes the entire mailbox of the Director of Safety and Alignment at Meta Superintelligence Labs 🦞]]></title><description><![CDATA[Why OpenClaw went rogue on her mailbox and 10 ways to ensure this never happens to you.]]></description><link>https://www.intoai.pub/p/openclaw-deletes-the-entire-mailbox</link><guid isPermaLink="false">https://www.intoai.pub/p/openclaw-deletes-the-entire-mailbox</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sat, 28 Mar 2026 01:23:08 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!l32x!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>&#9728;&#65039; It&#8217;s a beautiful February day in 2026, and everything seems to be working well for Summer Yue.</p><p>Then, suddenly, OpenClaw <a href="https://x.com/summeryue0/status/2025774069124399363">starts deleting important emails</a> from her main mailbox!</p><p>For context, <a href="https://www.linkedin.com/in/summeryyue/">Summer Yue</a> is the director of Safety and Alignment at Meta Superintelligence Labs.</p><p>And, this was despite her instructing OpenClaw to &#8220;always confirm before acting&#8221;.</p><p>She panicked and ordered OpenClaw to stop, but the bot was completely out of control.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!l32x!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!l32x!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 424w, https://substackcdn.com/image/fetch/$s_!l32x!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 848w, https://substackcdn.com/image/fetch/$s_!l32x!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 1272w, https://substackcdn.com/image/fetch/$s_!l32x!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!l32x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png" width="1456" height="997" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:997,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1406965,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!l32x!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 424w, https://substackcdn.com/image/fetch/$s_!l32x!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 848w, https://substackcdn.com/image/fetch/$s_!l32x!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 1272w, https://substackcdn.com/image/fetch/$s_!l32x!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F48fe0e31-fa31-4797-bbc1-08a42a7f4ddf_1740x1192.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"></figcaption></figure></div><p>When the bot didn&#8217;t listen, and she couldn&#8217;t stop the ongoing mailbox deletion using her phone, she ran to her Mac mini, where her OpenClaw lived, and terminated all the processes.</p><p>She describes it in her own words: </p><blockquote><p>&#8220;<em>I couldn&#8217;t stop it from my phone. I had to RUN to my Mac mini like I was defusing a bomb.</em>&#8221;</p></blockquote><p>Summer was previously working with OpenClaw using a toy inbox, and the bot functioned really well. But that unfortunate day, she decided to use OpenClaw on her real mailbox with the following prompt: </p><blockquote><p> <em>&#8220;Check this inbox too and suggest what you would archive or delete, don&#8217;t action until I tell you to.&#8221;</em></p></blockquote><p>And that&#8217;s what made the bot go completely haywire!</p><p><strong>But why? &#129300;</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>A Little Intro to Context Management</h3><p>To understand what went wrong, we need to understand how the LLM that acts as OpenClaw's brain manages its context.</p><p>An LLM uses its context window to store instructions and previous messages from an ongoing conversation. The context window functions as its <a href="https://www.intoai.pub/p/ai-memory">short-term or working memory</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Pgvn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Pgvn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 424w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 848w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 1272w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Pgvn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png" width="725.2578125" height="225.64683314732142" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:453,&quot;width&quot;:1456,&quot;resizeWidth&quot;:725.2578125,&quot;bytes&quot;:105429,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Pgvn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 424w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 848w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 1272w, https://substackcdn.com/image/fetch/$s_!Pgvn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90df23b0-c0b9-40f0-9659-fc93229e8d6b_2380x740.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Although massive, these context windows are limited spaces that can fill up very quickly as the conversation grows.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8sqd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8sqd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8sqd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Image from the author&#8217;s book titled &#8216;<strong><a href="https://bamaniaashish.gumroad.com/l/llmbook/">LLMs In 100 Images</a>.</strong>&#8217;</figcaption></figure></div><p>As the context window approaches its end, response quality declines because the large amount of stored information makes it harder for the LLM to attend to it all. </p><p>An LLM tends to remember the beginning and end well, but starts <a href="https://arxiv.org/abs/2307.03172">forgetting the information buried in the middle</a> of its context window. This shows up as hallucinations or the LLM ignoring instructions.</p><p>Once the context window is fully filled, the LLM stops answering user queries.</p><p>To ensure that such context window exhaustion is delayed as much as possible, previous conversation history goes through either:</p><ul><li><p><strong>Trimming: </strong>Where only a certain number of the most recent conversations are kept, and the previous ones are discarded before passing to an LLM at every turn.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZrCE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZrCE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 424w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 848w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 1272w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZrCE!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png" width="1200" height="392.3076923076923" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:476,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:137969,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZrCE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 424w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 848w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 1272w, https://substackcdn.com/image/fetch/$s_!ZrCE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b603ad5-5b53-478f-88d8-01d80a299444_2638x862.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Trimming conversations to keep only the last 2 recent ones and discarding the previous. (Other context for the LLM is omitted from the image for clarity.)</figcaption></figure></div><ul><li><p><strong>Summarization/ Compaction</strong>: Where the previous conversations are summarized and passed to the LLM at each turn, rather than the complete conversation history.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!peyW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!peyW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 424w, https://substackcdn.com/image/fetch/$s_!peyW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 848w, https://substackcdn.com/image/fetch/$s_!peyW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 1272w, https://substackcdn.com/image/fetch/$s_!peyW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!peyW!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png" width="1200" height="360.16483516483515" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:437,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:155759,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!peyW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 424w, https://substackcdn.com/image/fetch/$s_!peyW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 848w, https://substackcdn.com/image/fetch/$s_!peyW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 1272w, https://substackcdn.com/image/fetch/$s_!peyW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5be0f9c7-042b-4486-9567-1b9d113cef14_2784x836.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Summarise all conversations using the same LLM or another one, and give this summary to the LLM at each turn rather than the complete conversation history. (Other context for the LLM is omitted from the image for clarity.)</figcaption></figure></div><ul><li><p><strong>Pruning: </strong>Where old tool results (file reads, API responses, browser results) are selectively removed from the context while leaving all user queries and LLM responses untouched.</p></li></ul><div><hr></div><h3>So What Really Went Wrong With OpenClaw?</h3><p>Since Summer was previously using a small toy mailbox with OpenClaw, things worked pretty well. But when she moved the bot to use her actual mailbox, which was quite large, the context window filled up quickly, triggering <strong>Context compaction</strong>.</p><p>This led the bot to lose its original safety instruction on not taking action without confirmation, resulting in the deletion of hundreds of important emails.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Oca1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Oca1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 424w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 848w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 1272w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Oca1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png" width="1456" height="830" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:830,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:717860,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Oca1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 424w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 848w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 1272w, https://substackcdn.com/image/fetch/$s_!Oca1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F82af6a76-7d88-46e3-a1ad-c29a6b550497_1740x992.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Not really! It will still happen again if you don&#8217;t do the following.</figcaption></figure></div><div><hr></div><h3>10 Tips to Not End Up in the Same Place</h3><p>If the director of Safety and Alignment at Meta Superintelligence Labs can make this mistake, this might happen to you as well. But there are ways to make sure that you do not end up in the same place. Here are 10 of them.</p><ol><li><p>Put your security instructions in the <code>MEMORY.md</code> file rather than just in the conversation. This file is not compacted and acts as a persistent memory for the OpenClaw.<br></p></li><li><p>Then, you have the <code>AGENTS.md</code> file that is the standard operating and procedural rulebook that defines operational rules, security policies, and scope boundaries for OpenClaw to follow. Like <code>MEMORY.md</code>, it is loaded into context at the start of every session and survives compaction. Add an instruction here for the bot to check memory and get explicit confirmation before performing any destructive action.<br></p></li><li><p>OpenClaw can build a small vector index over <code>MEMORY.md</code> and <code>memory/*.md</code> so that semantic queries can find related notes even when the wording differs. Make sure that this is enabled in the <code>~/.openclaw/openclaw.json </code>file.</p></li></ol><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;json&quot;,&quot;nodeId&quot;:&quot;944e7b97-4d1e-4877-a40b-67570eaeb433&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-json">{
  "agents": {
    "defaults": {
      "memorySearch": {
        "enabled": true,
        "provider": "local"
      }
    }
  }
}</code></pre></div><p></p><ol start="4"><li><p>OpenClaw has a mechanism called <strong><a href="https://docs.openclaw.ai/concepts/memory#automatic-memory-flush-pre-compaction-ping">Pre-compaction memory flush</a></strong>&nbsp;that is automatically triggered in the background as the context window is about to fill up. This saves all the important details from context to disk before compaction starts. <br><br>To enable this, go to the <code>~/.openclaw/openclaw.json</code> file and make sure it contains the following:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;json&quot;,&quot;nodeId&quot;:&quot;88749993-e21c-495c-8d85-07cc99e6fc76&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-json">{
  "agents": {
    "defaults": {
      "compaction": {
        "reserveTokensFloor": 20000, // Tokens reserved as buffer before compaction triggers
        "memoryFlush": {
          "enabled": true, 
          "softThresholdTokens": 4000, // // How far before the reserve floor the flush triggers
          "systemPrompt": "Session nearing compaction. Store durable memories now.", 
          "prompt": "Write any lasting notes to memory/YYYY-MM-DD.md; reply with NO_REPLY if nothing to store."  
        }
      }
    }
  }
}</code></pre></div><p></p></li><li><p>Enable <strong><a href="https://docs.openclaw.ai/gateway/configuration-reference#agents-defaults-contextpruning">Context pruning</a></strong>, which removes old tool results from the context before sending to the bot. Add the following to the <code>~/.openclaw/openclaw.json</code> file:</p></li></ol><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;json&quot;,&quot;nodeId&quot;:&quot;3a9b385e-7d80-4493-8129-b2d8caacb950&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-json">{
  &#8220;agents&#8221;: {
    &#8220;defaults&#8221;: {
      &#8220;contextPruning&#8221;: {
        &#8220;mode&#8221;: &#8220;cache-ttl&#8221;, // Automatically remove tool results after TTL
        &#8220;ttl&#8221;: &#8220;6h&#8221; // Keep tool results for 6 hours
      }
    }
  }
}</code></pre></div><p></p><ol start="6"><li><p>Use the <code>/context list</code> or <code>/context detail </code>commands in chat regularly to learn <a href="https://docs.openclaw.ai/concepts/context">what is in the bot's context window</a>&nbsp;and whether any contextual files are truncated or missing.<br></p></li><li><p>Always save important instructions manually to <code>MEMORY.md</code> file by typing &#8220;Save this to <code>MEMORY.md</code>&#8220; in your chat with OpenClaw.<br></p></li><li><p>Don&#8217;t wait for compaction to happen automatically. Instead, perform it manually. Use the <code>/compact</code> command in chat when sessions feel stale, or context is getting bloated, and then give your new instructions to the bot.<br></p></li><li><p>If you still end up with OpenClaw misbehaving, sending <code>/stop</code> in the chat <a href="https://docs.openclaw.ai/tools/subagents#stopping">aborts the session</a> and stops any active sub-agent runs spawned from it, including the nested children.<br></p></li><li><p>Last but not least, if none of the above works, rush to the machine running OpenClaw and pull the power plug. This should most definitely stop it. <br></p></li></ol><p>A very helpful lesson on OpenClaw memory <a href="https://velvetshark.com/openclaw-memory-masterclass">can be found here</a> if you&#8217;re interested in learning about it in more detail.</p><div><hr></div><p>P.S. <strong>This post isn't meant to mock Summer Yue, but it&#8217;s quite the opposite.</strong> She is one of the very few people who openly admitted a mistake in public that most would hide, given her job role. Thanks to her, we're all better engineers because she chose transparency over ego.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BvN1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BvN1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 424w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 848w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 1272w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BvN1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png" width="1456" height="619" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:619,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:576852,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/192264782?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BvN1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 424w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 848w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 1272w, https://substackcdn.com/image/fetch/$s_!BvN1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F27bf7977-511d-4b8e-8023-74c0cf4b21fb_2168x922.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p><strong>Join the paid tier today to get access to all posts on this newsletter:</strong></p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129412; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127880; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p>and so many more!</p><div><hr></div><p>You can also read my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (15-21 March 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-15-21-march</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-15-21-march</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 25 Mar 2026 10:27:50 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!UfeV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!UfeV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!UfeV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!UfeV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png" width="728" height="323.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:2050508,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!UfeV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!UfeV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06af8a6b-fe81-4b2d-8fc8-1ed6ce6dff3d_6912x3072.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>1. Mixture-of-Depths Attention</h3><p>As LLMs become deeper, they often suffer from signal degradation. This means that important features formed in shallow layers are gradually diluted by repeated residual updates, making them harder to recover in deeper layers.</p><p>This research paper introduces the <strong>Mixture-of-depths attention (MoDA)</strong>, which allows each attention head to attend to sequence KV pairs at the current layer and to depth KV pairs from preceding layers. </p><p>The authors also describe a hardware-efficient algorithm for MoDA that resolves non-contiguous memory-access patterns, achieving 97.3% of FlashAttention-2&#8217;s efficiency at a sequence length of 64K. </p><p>Experiments on 1.5B-parameter models show that MoDA consistently outperforms strong baselines, and combining MoDA with post-norm yields better performance than using it with pre-norm.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!r5jb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!r5jb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 424w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 848w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 1272w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!r5jb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png" width="1456" height="817" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:817,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:192408,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!r5jb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 424w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 848w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 1272w, https://substackcdn.com/image/fetch/$s_!r5jb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd4045a73-d876-4bce-b173-7ad1d310c8c6_1526x856.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2603.15619">using this link</a>.</p><div><hr></div><p>&#127873; Join the <strong>paid tier today</strong> to get access to all posts on this newsletter.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;4dfe90bb-bef0-4c8b-9676-026e3da15513&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build a Mixture-of-Experts (MoE) Layer from Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-10T17:33:16.673Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!WsMN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79861f59-020d-4802-ac7e-5d253704d639_2892x1118.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:183456424,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;5d826bf3-a499-4bf8-84d6-aba0907fb340&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build and train an LLM from Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-31T11:47:16.904Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!MRfV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851366c5-6f74-479a-a06a-41e63fc79f6c_2480x1074.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-and-train-an-llm-from-scratch&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:182314468,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:19,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;701ac23b-af68-4540-b6fc-5d2b83f5d678&quot;,&quot;caption&quot;:&quot;We have long ago moved away from training production-grade deep learning models on a single machine.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Learn to train deep learning models on multiple GPUs&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-26T13:06:02.566Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!YyVB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:189120446,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:1,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>2. Attention Residuals</h3><p>This research paper proposes <strong>Attention Residuals (AttnRes)</strong>, an alternative to standard residual connections in transformers. </p><p>Typically, models add all outputs from previous layers equally, which can dilute valuable information as the model deepens. <em>AttnRes</em> instead allows each layer to use attention to selectively merge earlier layers, assigning learned, input-dependent weights to focus on the most relevant past information.</p><p>To increase efficiency, <em>Block AttnRes</em> groups layers into blocks and attends to summaries of these blocks rather than every layer, reducing memory costs while preserving most benefits of full <em>AttnRes</em>.</p><p>Overall, this enhances training stability, maintains stronger signals across depth, and improves performance, particularly in large models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Txi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Txi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 424w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 848w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Txi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png" width="1456" height="861" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:861,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:381130,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Txi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 424w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 848w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!2Txi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F601d067a-d954-49c9-bb50-7b864973ca14_2226x1316.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2603.15031">using this link</a>.</p><div><hr></div><h3>3. Mamba-3</h3><p>This research paper introduces&nbsp;<strong>Mamba-3</strong>, a new sequence model that makes inference much more efficient than transformers (which have quadratic compute and high memory costs).</p><p>The key idea is an inference-first redesign of state space models (SSMs) where Mamba-3 uses a:</p><ul><li><p>More expressive recurrence</p></li><li><p>Complex-valued state update to better track information over time</p></li><li><p>Multi-input multi-output (MIMO) setup that improves performance without slowing down decoding</p></li></ul><p>These enable it to handle difficult tasks such as state tracking and long-range dependencies, areas where previous linear models struggled, while maintaining near-linear efficiency. This results in Mamba-3 also achieving better accuracy and similar or lower memory use compared to previous models. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hOh7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hOh7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 424w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 848w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 1272w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hOh7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png" width="1456" height="766" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:766,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:256160,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!hOh7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 424w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 848w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 1272w, https://substackcdn.com/image/fetch/$s_!hOh7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc026eded-67ed-4e93-a43d-ee42724f27d7_2380x1252.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2603.15569">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>4. V-JEPA 2.1</h3><p>This research paper presents <strong>V-JEPA 2.1</strong>, a family of self-supervised models that learn dense, high-quality visual representations for both images and videos while retaining strong global scene understanding. </p><p>The approach combines four key components.</p><ol><li><p>A dense predictive loss that uses a masking-based objective in which both visible and masked tokens contribute to the training signal, encouraging explicit spatial and temporal grounding. </p></li><li><p>Deep self-supervision that applies the self-supervised objective hierarchically across multiple intermediate encoder layers to improve representation quality. </p></li><li><p>Multi-modal tokenizers that enable unified training across images and videos. </p></li><li><p>Effective scaling in both model capacity and training data</p></li></ol><p>Empirically, V-JEPA 2.1 achieves state-of-the-art performance on several challenging benchmarks for short-term object-interaction anticipation and high-level action anticipation. </p><p>The model also shows strong performance in robotic navigation, depth estimation, and global recognition.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nPhC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nPhC!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 424w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 848w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 1272w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nPhC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png" width="1456" height="722" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:722,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2633502,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nPhC!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 424w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 848w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 1272w, https://substackcdn.com/image/fetch/$s_!nPhC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc80e0918-2d15-472e-9cd4-a7f485ea7519_2392x1186.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2603.14482">using this link</a>.</p><div><hr></div><h3>5. <strong>GPT&#8209;5.4 mini and nano</strong></h3><p>OpenAI released <strong>GPT&#8209;5.4 mini</strong> and <strong>nano</strong>, which are two small, fast, and economical versions of GPT-5.4.</p><p>GPT-5.4 mini is a strong, lightweight general-purpose model, especially for coding, reasoning, multimodal understanding, and tool use, and runs over 2&#215; faster than GPT-5-mini. It also approaches the performance of the larger GPT&#8209;5.4 model on several evaluations, including SWE-Bench Pro and OSWorld-Verified.</p><p>GPT&#8209;5.4 nano is the smallest, cheapest version of GPT&#8209;5.4 for tasks where speed and cost matter most, such as classification, data extraction, ranking, and coding subagents that handle simpler supporting tasks.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!J0GI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!J0GI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 424w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 848w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 1272w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!J0GI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png" width="1456" height="360" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:360,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:154449,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!J0GI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 424w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 848w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 1272w, https://substackcdn.com/image/fetch/$s_!J0GI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa1e5d68f-51f5-4540-a0d8-a675613962be_2594x642.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Read more about this release <a href="https://openai.com/index/introducing-gpt-5-4-mini-and-nano/">using this link</a>. </p><div><hr></div><h3>6. Composer-2</h3><p>Composer 2 is a frontier-level coding model that offers a new, optimal combination of intelligence and cost, delivering impressive performance on hard benchmarks, including Terminal-Bench 2.0 and SWE-bench Multilingual.</p><p>The model is the result of a continued pretraining run (<a href="https://techcrunch.com/2026/03/22/cursor-admits-its-new-coding-model-was-built-on-top-of-moonshot-ais-kimi/">built on top of</a> the open-source Kimi K2.5 model from Moonshot AI) and further training on long-horizon coding tasks through reinforcement learning, enabling it to solve challenging tasks requiring hundreds of actions.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QJ_y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QJ_y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 424w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 848w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 1272w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QJ_y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png" width="1456" height="1092" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1092,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Composer 2 efficiency and quality on CursorBench&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Composer 2 efficiency and quality on CursorBench" title="Composer 2 efficiency and quality on CursorBench" srcset="https://substackcdn.com/image/fetch/$s_!QJ_y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 424w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 848w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 1272w, https://substackcdn.com/image/fetch/$s_!QJ_y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5ea045-3cdf-4d5c-b1f3-9af243992e21_1920x1440.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://cursor.com/blog/composer-2">using this link</a>.</p><p>A technical report can be found <a href="https://cursor.com/resources/Composer2.pdf">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. <strong>BioReason-Pro</strong></h3><p>This research paper introduces <strong>BioReason-Pro</strong>, a new multimodal LLM for predicting protein functions that goes beyond traditional methods that rely on sequence similarity or classification.</p><p>Instead, it explicitly models biological reasoning across sequence, structure, domains, and interaction data. </p><p>BioReason-Pro combines a new autoregressive Gene Ontology predictor (GO-GPT) with a reasoning LLM trained on large-scale synthetic reasoning traces and further improved using reinforcement learning, enabling it to generate clear, step-by-step functional annotations. </p><p>Empirically, BioReason-Pro achieves strong performance and produces higher-quality functional summaries than previous methods. Human experts prefer its annotations over curated UniProt entries about 79% of the time. </p><p>Notably, the model shows emergent capabilities such as:</p><ul><li><p>accurately predicting protein binding partners</p></li><li><p>identifying interaction sites at the residue level, consistent with experimental structures</p></li><li><p>correcting misleading annotations by using contextual reasoning</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B6GT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B6GT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 424w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 848w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B6GT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png" width="1456" height="585" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/79366e22-5943-495c-893c-5a068af95346_2724x1094.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:585,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1169975,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B6GT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 424w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 848w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!B6GT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79366e22-5943-495c-893c-5a068af95346_2724x1094.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.biorxiv.org/content/10.64898/2026.03.19.712954v1">using this link</a>.</p><div><hr></div><h3>8. <strong>Hyperagents</strong></h3><p>This research paper introduces <strong>Hyperagents</strong>, a framework that combines a task agent, which solves problems, and a meta agent, which improves the system, into a single editable program. </p><p>Importantly, the improvement process is also editable, allowing metacognitive self-modification, meaning the system enhances its own improvement. </p><p>The research builds on the <a href="https://www.intoai.pub/p/darwin-godel-machine-the-first-self?utm_source=publication-search">Darwin G&#246;del Machine (DGM)</a> to create DGM-Hyperagents (DGM-H), which removes DGM's assumption that self-improvement only works within coding and outperforms baselines across coding, paper review, robotics reward design, and Olympiad-level math-solution grading, showing meta-level improvements that transfer across domains.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JkkQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JkkQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 424w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 848w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 1272w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JkkQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png" width="1456" height="614" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:614,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:496220,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JkkQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 424w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 848w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 1272w, https://substackcdn.com/image/fetch/$s_!JkkQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5686150e-10f8-4cfc-99af-6883d755c7c0_2754x1162.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.19461">using this link</a>.</p><div><hr></div><h3>9. LeWorldModel</h3><p>This research paper introduces <strong>LeWorldModel (LeWM), </strong>which is the first Joint Embedding Predictive Architecture (JEPA) that trains stably end-to-end from raw pixels using only two loss terms:</p><ul><li><p>a next-embedding prediction loss </p></li><li><p>a regularizer enforcing Gaussian-distributed latent embeddings</p></li></ul><p>This reduces the number of tunable loss hyperparameters from 6 to 1 compared to the only existing end-to-end alternative. </p><p>With approximately 15M parameters trainable on a single GPU in a few hours, LeWM plans up to 48x faster than foundation-model-based world models while remaining competitive across diverse 2D and 3D control tasks. </p><p>Beyond control, the researchers show that LeWM's latent space encodes meaningful physical structure through probing of physical quantities. Surprise evaluation confirms that the model reliably detects physically implausible events.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!s6Cn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!s6Cn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 424w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 848w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!s6Cn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png" width="1456" height="783" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/644a0420-388a-472b-90a7-2e458792408a_2116x1138.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:783,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:433691,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!s6Cn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 424w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 848w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 1272w, https://substackcdn.com/image/fetch/$s_!s6Cn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F644a0420-388a-472b-90a7-2e458792408a_2116x1138.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.19312v1">using this link</a>.</p><div><hr></div><h3>10. Nemotron-Cascade 2</h3><p>This research paper from NVIDIA introduces <strong>Nemotron-Cascade 2</strong>, an open 30B <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts?utm_source=publication-search">MoE model</a> with 3B activated parameters that delivers best-in-class reasoning and strong agentic capabilities. </p><p>Despite its compact size, its mathematical and coding reasoning performance approaches that of frontier open models. It is the second open-weight LLM, after <em><a href="https://arxiv.org/abs/2512.02556">DeepSeekV3.2-Speciale-671B-A37B</a></em>, to achieve Gold Medal-level performance in the 2025 International Mathematical Olympiad (IMO), the International Olympiad in Informatics (IOI), and the ICPC World Finals, demonstrating remarkably high intelligence density with 20x fewer parameters. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LWah!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LWah!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 424w, https://substackcdn.com/image/fetch/$s_!LWah!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 848w, https://substackcdn.com/image/fetch/$s_!LWah!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!LWah!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LWah!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png" width="1456" height="735" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:735,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:360216,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191360817?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LWah!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 424w, https://substackcdn.com/image/fetch/$s_!LWah!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 848w, https://substackcdn.com/image/fetch/$s_!LWah!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!LWah!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb878a95a-41a0-46e0-be93-a3919faa50aa_2380x1202.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.19220">using this link</a>. </p><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others to <strong>earn referral rewards.</strong> &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-15-21-march?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-15-21-march?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><p>Join the <strong>paid tier today</strong> to get access to all posts on this newsletter:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129412; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127880; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p>and so many more!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><p>You can also read my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Build and Train a Mixture-of-Experts (MoE) LLM from Scratch]]></title><description><![CDATA[An end-to-end guide to training an Mixture-of-Experts (MoE) LLM from scratch.]]></description><link>https://www.intoai.pub/p/build-and-train-a-mixture-of-experts</link><guid isPermaLink="false">https://www.intoai.pub/p/build-and-train-a-mixture-of-experts</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 20 Mar 2026 11:51:20 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Jr5y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Jr5y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Jr5y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 424w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 848w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Jr5y!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png" width="1200" height="533.2417582417582" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:1390858,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190610837?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Jr5y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 424w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 848w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 1272w, https://substackcdn.com/image/fetch/$s_!Jr5y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc51b1f7f-35ae-4b36-84ce-8aa3cf7c04f6_7776x3456.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Most modern-day LLMs use the <strong>Mixture of Experts (MoE)</strong> architecture. This includes <a href="https://x.ai/news/grok-os">Grok-1</a>, <a href="https://arxiv.org/abs/2401.06066">DeepSeekMoE</a><strong>,</strong> <a href="https://openai.com/index/introducing-gpt-oss/">gpt-oss,</a><strong> </strong>and <a href="https://arxiv.org/abs/2401.04088">Mixtral</a> (and many other proprietary LLMs whose architectural details aren&#8217;t publicly available).</p><p>In the previous lessons on <em>Into AI</em>, we learned:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">What the Mixture-of-Experts (MoE) architecture is and how it works</a></p></li><li><p>&#127880;<a href="https://www.intoai.pub/p/build-a-mixture-of-experts-transformer">Built the Mixture-of-Experts (MoE) decoder-only transformer from scratch</a></p></li></ul><p>In this lesson, we will:</p><ol><li><p>Build an LLM using the MoE transformer with <a href="https://www.intoai.pub/p/grouped-query-attention">Grouped Query Attention (GQA)</a></p></li><li><p>Train it on a publicly available database from Wikipedia</p></li><li><p>Generate text from the trained LLM</p></li></ol><p>The following are the steps that we implement.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MVtt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MVtt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 424w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 848w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 1272w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MVtt!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png" width="1200" height="240.65934065934067" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:292,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:167287,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190610837?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MVtt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 424w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 848w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 1272w, https://substackcdn.com/image/fetch/$s_!MVtt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F50625d27-6058-4f43-a0c3-bba935522aa1_2866x574.png 1456w" sizes="100vw"></picture><div></div></div></a></figure></div><p>Let&#8217;s begin!</p><div><hr></div><h3>Import Necessary Packages</h3><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;plaintext&quot;,&quot;nodeId&quot;:&quot;326ad129-8a05-4ecf-be72-a2381cb373dc&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-plaintext"># Standard libraries
import math
import random

# Hide deprecation warnings
import warnings
warnings.filterwarnings('ignore')

# PyTorch core
import torch
import torch.nn as nn
import torch.optim as optim

# For processing data
from datasets import load_dataset
from torch.utils.data import Dataset, DataLoader

# BPE tokenizer
import tiktoken

# Training utils
from torch.nn.utils import clip_grad_norm_
from torch.optim.lr_scheduler import CosineAnnealingLR

# Progress bar
from tqdm import tqdm</code></pre></div><div><hr></div><p>Before we move forward, I want to introduce you to the <strong><a href="https://bamaniaashish.gumroad.com/l/visualtech">Visual Tech Bundle</a>.</strong></p><p>It is a collection of visual guides that explain core AI, LLM, Systems design, and Computer science concepts via image-first lessons.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Cc9Q!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 424w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 848w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 1272w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png" width="1200" height="300" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f3773516-037c-48af-a288-d6d9d3cab751_2560x640.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:364,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 424w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 848w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 1272w, https://substackcdn.com/image/fetch/$s_!Cc9Q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff3773516-037c-48af-a288-d6d9d3cab751_2560x640.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Others are already loving these books.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZH6L!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZH6L!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 424w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 848w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 1272w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZH6L!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png" width="1456" height="1147" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1147,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2124317,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://intoai.pub/i/178965288?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!ZH6L!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 424w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 848w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 1272w, https://substackcdn.com/image/fetch/$s_!ZH6L!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7ff9ec7c-0279-4ede-a46b-fd9797508ad7_2376x1872.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This includes <a href="https://www.hubspot.com/company/management/dharmesh-shah">Dharmesh Shah</a>, the co-founder and CEO of HubSpot.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AoQ3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AoQ3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 424w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 848w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 1272w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AoQ3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png" width="1082" height="346" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:346,&quot;width&quot;:1082,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:71287,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/182081753?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!AoQ3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 424w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 848w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 1272w, https://substackcdn.com/image/fetch/$s_!AoQ3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19f2ca2a-57c5-4057-84e7-82d853963e00_1082x346.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Dharmesh&#8217;s reply to my LinkedIn post. (<a href="https://www.linkedin.com/feed/update/urn:li:activity:7408092938876133377/">Source</a>)</figcaption></figure></div><p><strong>&#10084;&#65039; I&#8217;m sure that you will love these too:</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://bamaniaashish.gumroad.com/l/visualtech&quot;,&quot;text&quot;:&quot;Link to the Visual Tech Bundle&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://bamaniaashish.gumroad.com/l/visualtech"><span>Link to the Visual Tech Bundle</span></a></p><p>Now back to our lesson!</p><div><hr></div><h3><strong>Getting our data ready</strong></h3><h4><strong>1. Download the dataset</strong></h4><p>We will use the <a href="https://huggingface.co/datasets/Salesforce/wikitext">WikiText dataset</a> from Hugging Face to train our model. This dataset is derived from verified Wikipedia articles and contains approximately 103 million words.</p><p>It is downloaded as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;6d73618f-1667-4d5f-bed6-ccdd923a98a7&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Load WikiText dataset 
train_data = load_dataset("wikitext", "wikitext-103-v1", split="train") # Train subset
val_data   = load_dataset("wikitext", "wikitext-103-v1", split="validation") # Validation subset</code></pre></div><p>Let&#8217;s check out a training example from this dataset.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;2ab8ebef-cf0d-458c-94fe-9e4e78bfd01c&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Training subset example
print(train_data['text'][807])

"""
Output:
The International Civil Aviation Organization ( ICAO ) defines general aviation ( GA ) 
as " an aircraft operation other than a commercial air transport operation 
or an aerial work operation . " It defines commercial air transport ( CAT ) 
as " an aircraft operation involving the transport of passengers , cargo 
or mail for remuneration or hire " , and aerial work as " an aircraft operation 
in which an aircraft is used for specialized services such as agriculture , 
construction , photography , surveying , observation and patrol , search and 
rescue , aerial advertisement , etc . " 

"""</code></pre></div><p>There&#8217;s a little problem with this dataset where some text examples are blanks and headings are formatted as &#8220; === HEADING === &#8220;, as shown below.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;d3ad0889-9c2f-4cc7-aa0a-285f00ef37c2&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Unwanted training subset example
print(train_data['text'][804:807])

"""
Output:
['', ' = = Definitions = = \n', '']
"""</code></pre></div><p>Alongside this, WikiText-103 was built with a vocabulary consisting of words (rather than sub-words/ characters), and any word that appeared fewer than three times was replaced with <code>&lt;unk&gt;</code>.</p><p>This is shown below.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;1671305b-893a-4392-ac1b-507e0c9a6f40&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># UNK marker
print(train_data['text'][80])

"""
Output:
 &lt;unk&gt; &lt;unk&gt; 2 
"""</code></pre></div><p>We need to get rid of these to clean up the dataset. </p><p>We also need an <strong>End-of-Text (EOS) token</strong> called <code>&lt;|endoftext|&gt; </code>between separate sections and articles, so that the model learns natural stopping boundaries and doesn't try to connect unrelated content.</p><p>(<em>We will come back to re-using the EOS token when discussing the helper function for text generation/inference.</em>)</p><p>Both of these changes are made using a helper function as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;c80bc179-1b78-4aa1-9f40-9a0a1318af3d&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">def clean_text(dataset):
    # EOS token
    EOS = "&lt;|endoftext|&gt;"

    cleaned = []
    for text in dataset["text"]:
        # Strip surrounding whitespace
        line = text.strip()

        # Keep only non-empty lines that aren't headings (such as "=== HEADING ===")
        if line and not line.startswith("="):
            # Remove &lt;unk&gt; markers and normalize whitespace
            cleaned.append(" ".join(line.replace("&lt;unk&gt;", "").split()))

    # Join all entries into a string, separated by EOS token
    return EOS.join(cleaned) + EOS
 
# Clean training and validation text
training_text = clean_text(train_data)
validation_text = clean_text(val_data)</code></pre></div><p>A short subset of <code>training_text </code>is shown below.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;7bb98981-91e1-4e87-a881-42b47ee50360&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Print a subset of 'training_text'
print(training_text[3457:3700])

"""
Output:
Each character has a field and distance of movement limited by their 
Action Gauge . Up to nine characters can be assigned to a single mission . 
During gameplay , characters will call out if something happens to them , 
such as their health points ( HP ) getting low or being knocked out
"""</code></pre></div><div><hr></div><h4>2. Tokenize the dataset</h4><p>We previously learned how to build a character-level tokenizer from scratch.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;9690bd15-285a-4839-a238-3b91db0f3144&quot;,&quot;caption&quot;:&quot;We are on a journey to implement a GPT-like LLM from scratch in PyTorch.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build an LLM Tokenizer From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-23T11:56:28.847Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UBl3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe0b259a-6a3f-4754-a32a-48d349ae488e_1846x686.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-an-llm-tokenizer&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:182161682,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:10,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>We have also used it to <strong><a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">train an LLM from scratch</a></strong>.</p><p>But because it is character-based, training an LLM using it makes the model learn the meaning of text from individual letters rather than semantic units. This makes training painfully difficult.</p><p>For this tutorial, we will instead use&nbsp;<strong><a href="https://github.com/openai/tiktoken">Tiktoken</a>, </strong>a fast BPE tokeniser used in OpenAI's models. It is a sub-word tokenizer (breaks down words into sub-words) that better captures language structure than a character-level tokenizer, enabling faster LLM training.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VOLh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VOLh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VOLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:46398,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190610837?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VOLh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!VOLh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa2d827d2-355f-406c-8c83-73a2de16bbcd_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Image from the author&#8217;s book titled &#8216;<strong><a href="https://bamaniaashish.gumroad.com/l/llmbook/">LLMs In 100 Images</a></strong>.&#8217;</figcaption></figure></div><p>We start by creating an instance of Tiktoken as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;eee7481a-0836-41fa-8ec5-957b2a89a627&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Create an instance of Tokenizer used by GPT-2
tokenizer = tiktoken.get_encoding("gpt2")</code></pre></div><p>The vocabulary size when using the Tiktonen tokenizer for GPT-2 is 50,257. This means that the tokenizer recognizes 50,257 unique subwords.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;0d75f68f-6f10-46c5-b089-e4002d2a716a&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Check vocabulary size
vocab_size = tokenizer.n_vocab

print(f"Vocabulary size: {vocab_size}")

# Output: Vocabulary size: 50257</code></pre></div><div><hr></div><h4><strong>3. Create a dataset required for language modeling</strong></h4><p>Once we have tokenized our dataset, we need to load and serve the data during training. This is where the <code>TextDataset</code> class comes in, which inherits from PyTorch&#8217;s <a href="https://docs.pytorch.org/docs/stable/data.html#torch.utils.data.Dataset">Dataset</a> class, and has the following methods:</p><ul><li><p><code>__init__</code>: Tokenizes given text (tokens to token IDs) and sets the maximum sequence length (<code>max_seq_length</code>)</p></li><li><p><code>__len__</code>:<code> </code>Returns the number of training sequences available</p></li><li><p><code>__getitem__</code>: Returns a training sequence and its targets (tokens shifted forward by one position) at a given index</p></li></ul><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;85dc9b01-6289-4146-b896-62b78e2a2d9a&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Dataset for language modeling
class TextDataset(Dataset):
    def __init__(self, text, tokenizer, max_seq_length):
        # Convert text to token IDs
        self.tokens = tokenizer.encode(text, allowed_special="all")

        # Maximum length of each training sequence
        self.max_seq_length = max_seq_length

    # Get number of valid training sequences
    def __len__(self):
        num_sequences = (len(self.tokens) - 1) // self.max_seq_length
        return num_sequences

    # Get an input sequence and targets
    def __getitem__(self, idx):
        # Start index of the sequence
        start = idx * self.max_seq_length

        # End index of the sequence
        end = start + self.max_seq_length

        # Input token sequence
        input_ids = torch.tensor(self.tokens[start:end], dtype=torch.long)

        # Next-token targets/ labels (shifted by one character)
        target_ids = torch.tensor(self.tokens[start+1:end+1], dtype=torch.long)

        return input_ids, target_ids</code></pre></div><p>Let&#8217;s use the <code>TextDataset</code> class to create the training and validation datasets.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;40a91ad4-87cf-4c9c-a23b-57dd88d9d5a1&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Define maximum sequence length
MAX_SEQ_LENGTH = 128

# Create training and validation datasets
train_dataset = TextDataset(training_text, tokenizer, MAX_SEQ_LENGTH)
val_dataset = TextDataset(validation_text, tokenizer, MAX_SEQ_LENGTH)</code></pre></div><p>The number of training sequences in the training and validation datasets is as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;de5651aa-73c7-4395-bffe-b5de8d0f63ab&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">print(f"Number of training sequences: {len(train_dataset):,}")
print(f"Number of validation sequences: {len(val_dataset):,}")

"""
Output:
Number of training sequences: 876,162
Number of validation sequences: 1,828
"""</code></pre></div><p>And this is how a training sequence and its target look.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;4f7cffee-65db-412a-98fa-20024c618830&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Example of a training sequence and its target
input, target = train_dataset[16]

print("Input IDs:\n", input)
print("\nTarget IDs:\n", target)

print("\nDecoded Input:\n", tokenizer.decode(input.tolist()))
print("\nDecoded Target:\n", tokenizer.decode(target.tolist()))

"""
Output:
Input IDs:
 tensor([  262,  1271,   286, 22469,  4991,   583,  4365,   764,   317,   636,
          286,   428,  8515,  2950,  4441,  3748,  7514, 14520,  4981,   329,
         1123,  2095,   705,    82,  1767,   764,   554,  1502,   284,  4620,
          428,   837,   262, 22849,  4847, 16560,   656,   262,  1218,   983,
          547,  4615,   837,   355,   484,  1718,   510,   257,  1588,  6903,
          286,  4088,  2272,  2622,   329,   262,  8561,   764,  1119,   635,
        12328,   262,  8722,  6460,   290, 10152,   286,   711,   523,   484,
          714,  5198,   284,   649,  1938,   981, 26645,   262,  6393,  6805,
          286,   262,  2168,   705, 11327,   764,   383, 15064,  3341,   547,
         3066,  2402,  1903,   287,  2478,   764,   383,  2095,  9824,   547,
         1760,   416,  8835,    73,   280,   837,   508,   550,  3111,   319,
          262,  2180,   569, 18354,  7496, 17740,  1830,   764,  1649,  4441,
          262, 17871,  5321, 11630,   837,  8835,    73,   280])

Target IDs:
 tensor([ 1271,   286, 22469,  4991,   583,  4365,   764,   317,   636,   286,
          428,  8515,  2950,  4441,  3748,  7514, 14520,  4981,   329,  1123,
         2095,   705,    82,  1767,   764,   554,  1502,   284,  4620,   428,
          837,   262, 22849,  4847, 16560,   656,   262,  1218,   983,   547,
         4615,   837,   355,   484,  1718,   510,   257,  1588,  6903,   286,
         4088,  2272,  2622,   329,   262,  8561,   764,  1119,   635, 12328,
          262,  8722,  6460,   290, 10152,   286,   711,   523,   484,   714,
         5198,   284,   649,  1938,   981, 26645,   262,  6393,  6805,   286,
          262,  2168,   705, 11327,   764,   383, 15064,  3341,   547,  3066,
         2402,  1903,   287,  2478,   764,   383,  2095,  9824,   547,  1760,
          416,  8835,    73,   280,   837,   508,   550,  3111,   319,   262,
         2180,   569, 18354,  7496, 17740,  1830,   764,  1649,  4441,   262,
        17871,  5321, 11630,   837,  8835,    73,   280,   373])

Decoded Input:
  the number of playable units per mission . A part of this upgrade involved creating 
  unique polygon models for each character 's body . In order to achieve this , 
  the cooperative elements incorporated into the second game were removed , 
  as they took up a large portion of memory space needed for the improvements . 
  They also adjusted the difficulty settings and ease of play so they could appeal 
  to new players while retaining the essential components of the series ' gameplay . 
  The newer systems were decided upon early in development . The character designs 
  were done by Honjou , who had worked on the previous Valkyria Chronicles games . 
  When creating the Nameless Squad , Honjou

Decoded Target:
  number of playable units per mission . A part of this upgrade involved creating 
  unique polygon models for each character 's body . In order to achieve this , 
  the cooperative elements incorporated into the second game were removed , 
  as they took up a large portion of memory space needed for the improvements . 
  They also adjusted the difficulty settings and ease of play so they could appeal 
  to new players while retaining the essential components of the series ' gameplay . 
  The newer systems were decided upon early in development . The character designs 
  were done by Honjou , who had worked on the previous Valkyria Chronicles games . 
  When creating the Nameless Squad , Honjou was
"""</code></pre></div><p>Note how the input and target sequences are shifted by one sub-word. This is to help the model learn to predict each next sub-word given the previous ones.</p><div><hr></div><h3><strong>Setting up DataLoader for Batch training</strong></h3><p>Next, we use PyTorch <a href="https://docs.pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader">DataLoader</a> to create an iterable that loads shuffled batches of <code>BATCH_SIZE</code> samples from <code>train_dataset</code> and <code>val_dataset</code> for training and validation.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;6e0eefa8-efe3-4db7-95e6-3d10a5c808c3&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Define batch size
BATCH_SIZE = 32

# Create training and validation DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)</code></pre></div><p>This creates the following number of batches of training and validation data.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;76481d80-c9c8-4393-b5c6-f3d2153bf697&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">print(f"Number of training batches: {len(train_loader):,}") # Number of training sequences / batch_size
print(f"Number of validation batches: {len(val_loader):,}") # Number of validation sequences / batch_size

"""
Output:
Number of training batches: 27,381
Number of validation batches: 58
"""</code></pre></div><p>Our data is now ready for model training! </p><div><hr></div><h3>Building the MoE Transformer model</h3><p>We will now set up our MoE transformer model that we built in a <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-transformer?utm_source=publication-search">previous lesson</a> with a few changes (based on what modern LLMs follow):</p><ol><li><p>We replace the <a href="https://www.intoai.pub/p/causal-mha?utm_source=publication-search">Causal Multi-head Self-attention</a> module with <strong><a href="https://www.intoai.pub/p/grouped-query-attention?utm_source=publication-search">Causal Grouped-Query Attention</a></strong></p></li><li><p>We introduce <strong>Weight tying</strong>, which means we reuse the same weights for both the token embedding matrix and the output projection matrix, rather than learning two separate ones. This is because they do reciprocal functions and have the same shape. This decreases the number of learning parameters in the model by a few million.</p></li><li><p>We <strong>remove the bias term from all linear layers</strong> following the Llama models</p></li></ol><p>Let&#8217;s implement these one by one.<br></p><h4>1. Set up Causal Grouped Query Attention</h4><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;7e31e8e8-cef2-4bf8-9afa-bf59b6e339df&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Grouped Query Attention (GQA) with Causal Masking
class GroupedQueryAttention(nn.Module):
    def __init__(self, embedding_dim, num_heads, num_groups, max_seq_length):
        super().__init__()
 
        # Check if embedding_dim is divisible by num_heads
        assert embedding_dim % num_heads == 0, "embedding_dim must be divisible by num_heads"
 
        # Check if num_heads is divisible by num_groups
        # (Each group must be shared by the same number of heads)
        assert num_heads % num_groups == 0, "num_heads must be divisible by num_groups"
 
        # Embedding dimension
        self.embedding_dim = embedding_dim
 
        # Number of total query heads
        self.num_heads = num_heads
 
        # Dimension of each head
        self.head_dim = embedding_dim // num_heads
 
        # Number of KV groups
        self.num_groups = num_groups
 
        # Number of query heads per KV group
        self.group_size = num_heads // num_groups
 
        # Linear projection matrix for query
        self.W_q = nn.Linear(embedding_dim, embedding_dim, bias=False)
 
        # Linear projection matrices for key and value
        self.W_k = nn.Linear(embedding_dim, num_groups * self.head_dim, bias=False)
        self.W_v = nn.Linear(embedding_dim, num_groups * self.head_dim, bias=False)
 
        # Linear projection matrix to produce final output
        self.W_o = nn.Linear(embedding_dim, embedding_dim, bias=False)
 
        # Build the causal mask once at init instead of every forward pass
        mask = torch.tril(torch.ones(max_seq_length, max_seq_length))
 
        # Add batch_size and num_heads dimensions
        mask = mask.view(1, 1, max_seq_length, max_seq_length)

        # register_buffer saves it as part of the model so it automatically moves to the right device (CPU/GPU)
        self.register_buffer("causal_mask", mask)
 
    # Splits Q into multiple heads
    def _split_heads(self, x):
        """
        Transforms input embeddings from
        [batch_size, sequence_length, embedding_dim]
        to
        [batch_size, num_heads, sequence_length, head_dim]
        """
        batch_size, sequence_length, embedding_dim = x.shape
 
        # Split embedding_dim into (num_heads, head_dim)
        x = x.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)
 
        # Reorder and return the intended shape
        return x.transpose(1, 2)
 
    # Splits K or V into num_groups heads
    def _split_groups(self, x):
        """
        Transforms K/V from
        [batch_size, sequence_length, num_groups * head_dim]
        to
        [batch_size, num_groups, sequence_length, head_dim]
        """
        batch_size, sequence_length, _ = x.shape
 
        x = x.reshape(batch_size, sequence_length, self.num_groups, self.head_dim)
        return x.transpose(1, 2)
 
    # Merge heads back together
    def _merge_heads(self, x):
        """
        Transforms inputs from
        [batch_size, num_heads, sequence_length, head_dim]
        to
        [batch_size, sequence_length, embedding_dim]
        """
        batch_size, num_heads, sequence_length, head_dim = x.shape
 
        # Move sequence_length back before num_heads in the shape
        x = x.transpose(1, 2)
 
        # Merge (num_heads, head_dim) back into embedding_dim
        embedding_dim = num_heads * head_dim
        x = x.reshape(batch_size, sequence_length, embedding_dim)
 
        return x
 
    # Forward pass
    def forward(self, x):
        batch_size, sequence_length, embedding_dim = x.shape
 
        # Compute Q, K, V
        Q = self.W_q(x)  # [batch_size, sequence_length, embedding_dim]
        K = self.W_k(x)  # [batch_size, sequence_length, num_groups * head_dim]
        V = self.W_v(x)  # [batch_size, sequence_length, num_groups * head_dim]
 
        # Split Q into multiple heads
        Q = self._split_heads(Q)  # [batch_size, num_heads, sequence_length, head_dim]
 
        # Split K and V into num_groups heads
        K = self._split_groups(K)  # [batch_size, num_groups, sequence_length, head_dim]
        V = self._split_groups(V)  # [batch_size, num_groups, sequence_length, head_dim]
 
        # Expand K and V so each KV group is shared across multiple query heads
        K = K.repeat_interleave(self.group_size, dim=1)  # [batch_size, num_heads, sequence_length, head_dim]
        V = V.repeat_interleave(self.group_size, dim=1)  # [batch_size, num_heads, sequence_length, head_dim]
 
        # Calculate scaled dot-product attention
        attn_scores = Q @ K.transpose(-2, -1)
        attn_scores = attn_scores / math.sqrt(self.head_dim)
 
        # Slice the pre-built mask to the current sequence length
        causal_mask = self.causal_mask[:, :, :sequence_length, :sequence_length]
 
        # Mask out future positions by setting their scores to -inf
        attn_scores = attn_scores.masked_fill(causal_mask == 0, float('-inf'))
 
        # Apply softmax to get attention weights
        attn_weights = torch.softmax(attn_scores, dim=-1)
 
        # Multiply attention weights by V
        weighted_values = attn_weights @ V  # [batch_size, num_heads, sequence_length, head_dim]
 
        # Merge head outputs
        merged_heads_output = self._merge_heads(weighted_values)
 
        # Obtain final output
        output = self.W_o(merged_heads_output)
 
        return output</code></pre></div><div><hr></div><h4>2. Set up Expert and Router</h4><p>An <strong>Expert</strong> is simply a small feed-forward network that specializes in a particular skill, which may involve processing different token types (numerical/code/language-related tokens), reasoning patterns, context patterns, and so on.</p>
      <p>
          <a href="https://www.intoai.pub/p/build-and-train-a-mixture-of-experts">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (8-14 March 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-8-14-march</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-8-14-march</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Thu, 19 Mar 2026 11:14:13 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!z4We!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!z4We!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!z4We!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 424w, https://substackcdn.com/image/fetch/$s_!z4We!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 848w, https://substackcdn.com/image/fetch/$s_!z4We!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 1272w, https://substackcdn.com/image/fetch/$s_!z4We!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!z4We!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png" width="724.859375" height="319.11734847184067" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:641,&quot;width&quot;:1456,&quot;resizeWidth&quot;:724.859375,&quot;bytes&quot;:949021,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!z4We!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 424w, https://substackcdn.com/image/fetch/$s_!z4We!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 848w, https://substackcdn.com/image/fetch/$s_!z4We!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 1272w, https://substackcdn.com/image/fetch/$s_!z4We!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F66b69714-e5e1-4546-9e97-a31ba168bf8e_2742x1208.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>1. <strong>Covenant-72B</strong></h3><p>This research paper presents <strong>Covenant-72B</strong>, an LLM produced by the largest globally distributed collaborative pre-training run to date, in terms of both compute and model scale.</p><p>This pre-training involved open, permissionless participation, supported by a live blockchain protocol, <strong><a href="https://bittensor.com/about">Bittensor</a></strong>, and a state-of-the-art, communication-efficient optimizer called <strong>SparseLoCo</strong>, which enabled dynamic participation with peers joining and leaving freely.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!j9Ek!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!j9Ek!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 424w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 848w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 1272w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!j9Ek!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png" width="1456" height="687" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:687,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:368610,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!j9Ek!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 424w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 848w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 1272w, https://substackcdn.com/image/fetch/$s_!j9Ek!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ffa3367-fc5e-4d54-81be-83e4571c91e2_2014x950.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Covenant-72B was pre-trained on approximately 1.1T tokens and performs at par with fully centralized models pre-trained on similar or higher compute budgets.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gTO_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gTO_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 424w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 848w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 1272w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gTO_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png" width="1456" height="518" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:518,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:178517,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!gTO_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 424w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 848w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 1272w, https://substackcdn.com/image/fetch/$s_!gTO_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4df4f3a8-fe66-4426-b732-012dc603053e_2076x738.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This shows that open and fully democratized participation is possible on an unprecedented scale for a globally distributed LLM pre-training run.</p><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.08163">using this link</a>.</em></p><div><hr></div><p>&#127873; Unlock every lesson in the newsletter and accelerate your AI engineering career with a&nbsp;<strong>25% discount on the annual membership</strong>&nbsp;of &#8216;Into AI&#8217; today!</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129412; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127880;<a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h3>2. Training Language Models via Neural Cellular Automata</h3><p>Natural LLM pre-training data has its flaws, as it is finite, contains human biases, and mixes knowledge with reasoning.</p><p>This research paper proposes using neural cellular automata (NCA) to generate synthetic, non-linguistic data for <strong>pre-pre-training LLMs</strong>, which means training on synthetic data first, followed by natural language.</p><p>This is because NCA data has a rich spatiotemporal structure and statistics that resemble natural language, while being controllable and cheap to generate at scale.</p><p>Pre-pre-training is surprisingly effective, and training on only 164M NCA tokens improves downstream language modeling by up to 6% and accelerates convergence by up to 1.6x. </p><p>This approach of using NCA even outperforms pre-pre-training on 1.6B tokens of natural language from Common Crawl with more compute, and the gains transfer to reasoning benchmarks, such as GSM8K, HumanEval, and BigBench-Lite. </p><p>Further experiments show that attention layers transfer best, and that optimal NCA complexity depends on the task, with simpler dynamics working better for code, while more complex ones benefit math and web text more.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FBd0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FBd0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 424w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 848w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 1272w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FBd0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png" width="1456" height="944" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:944,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:547559,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FBd0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 424w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 848w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 1272w, https://substackcdn.com/image/fetch/$s_!FBd0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc8d6b625-3279-4556-b3c8-c3732cf895e1_1742x1130.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://www.arxiv.org/abs/2603.10055">using this link</a>.</em></p><div><hr></div><h3>3. Lost in Backpropagation: The LM Head is a Gradient Bottleneck</h3><p>The final layer of LLMs, called the LM head, maps the hidden dimension (<code>D</code>) to the vocabulary dimension (<code>V</code>), where usually <code>D</code> is very small as compared to <code>V</code>. This creates an expressivity limitation called the&nbsp;<strong>Softmax bottleneck</strong>. </p><p>This research paper shows that the softmax bottleneck is not only an expressivity bottleneck but also an optimization bottleneck where high-dimensional gradients are compressed into a much smaller space, leading to most of the training signal (around 95&#8211;99% of the gradient norm) getting lost.</p><p>This leads to poor update directions and makes even simple patterns harder to learn, slowing down training and harming efficiency across models.</p><p>The research suggests that current LLM architectures have a fundamental flaw in how they learn and that better LM head designs are needed.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bLtL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bLtL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 424w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 848w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 1272w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bLtL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png" width="1456" height="747" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:747,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:370313,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bLtL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 424w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 848w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 1272w, https://substackcdn.com/image/fetch/$s_!bLtL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8da8128a-2e57-4523-9888-9e7557bb80f8_1864x956.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.10145">using this link</a>.</em></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h3>4. OpenClaw-RL: Train Any Agent Simply by Talking</h3><p>This research paper presents <strong>OpenClaw-RL</strong>, a reinforcement learning framework in which AI agents learn continuously from real-time interactions, using next-state signals (such as user replies, tool outputs, or environment changes) as training data. </p><p>These signals offer evaluative feedback (how well an action is performed, expressed as scalar rewards via a PRM judge) and directive feedback (how the action should be improved, expressed as textual hints and used for token-level supervision via Hindsight-Guided On-Policy Distillation, or OPD).</p><p>By blending these, OpenClaw-RL supports more effective learning than traditional methods. </p><p>Its asynchronous, decoupled architecture also allows agents to improve during live use in various environments, such as chat, tools, and software tasks, without requiring offline datasets or manual labeling.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!OfpP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!OfpP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 424w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 848w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 1272w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!OfpP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png" width="1456" height="948" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:948,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:486644,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!OfpP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 424w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 848w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 1272w, https://substackcdn.com/image/fetch/$s_!OfpP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc467a5d7-4c4f-4fe8-82cc-4ad9fba8410c_1618x1054.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.10165">using this link</a>.</em></p><div><hr></div><h3>5. How Far Can Unsupervised RLVR Scale LLM Training?</h3><p>This research paper studies a training approach called <strong>Unsupervised Reinforcement Learning with Verifiable Rewards (RLVR)</strong>, in which models learn without labeled data by generating their own rewards from signals that can be automatically verified.</p><p>The paper categorises RLVR methods into intrinsic (self-generated rewards) and external (externally verified rewards), and shows that intrinsic methods mainly strengthen a model&#8217;s existing beliefs and work only if those beliefs are already correct, but can lead to collapse if they are wrong.</p><p>While intrinsic rewards still help with test-time training on small datasets, the authors propose the Model Collapse Step, which acts as a practical indicator for RL trainability (predicting when training will fail). </p><p>They also present early evidence that external reward methods may overcome these limitations, suggesting more scalable approaches.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!e85t!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!e85t!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 424w, https://substackcdn.com/image/fetch/$s_!e85t!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 848w, https://substackcdn.com/image/fetch/$s_!e85t!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 1272w, https://substackcdn.com/image/fetch/$s_!e85t!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!e85t!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png" width="1456" height="863" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:863,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:951001,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!e85t!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 424w, https://substackcdn.com/image/fetch/$s_!e85t!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 848w, https://substackcdn.com/image/fetch/$s_!e85t!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 1272w, https://substackcdn.com/image/fetch/$s_!e85t!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f0bb518-f34e-4d1b-a801-5db5c9802d20_2260x1340.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.08660">using this link</a>.</em></p><div><hr></div><h3>6. Attend Before Attention</h3><p>This research paper introduces <strong>AutoGaze </strong>for multi-modal LLMs (MLLMs), an efficient method for processing videos by selectively focusing on important regions (like human gaze) before applying full attention, rather than analyzing every pixel equally.</p><p>Such gazing lets the model ignore unnecessary or static information, enabling faster, easier-to-scale video understanding while maintaining high performance. </p><p>Experiments show that this approach greatly increases efficiency (higher processing FPS) and allows for real-time video processing, offering a practical way to scale multimodal models to longer or streaming video inputs.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nIM5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nIM5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 424w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 848w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nIM5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png" width="1456" height="649" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:649,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:725586,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nIM5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 424w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 848w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 1272w, https://substackcdn.com/image/fetch/$s_!nIM5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1d0e72cb-50c5-46c0-8781-dd0ff9d6e30e_2482x1106.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.12254">using this link</a>.</em></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h3>7. Geometry-Grounded Point Transformer</h3><p>Feed-forward networks suffer from geometric inconsistencies and achieve only limited fine-grained accuracy in 3D reconstruction (i.e., predicting dense point maps directly from RGB images).</p><p>This research paper introduces the <strong>Geometry-Grounded Point Transformer (GGPT)</strong>, a method for improving 3D reconstruction from images by combining fast neural predictions with explicit geometric guidance.</p><p>It uses an improved Structure-from-Motion pipeline to estimate accurate camera poses and partial 3D structure from sparse input views, and then uses a Geometry-guided 3D point transformer to refine dense point maps under explicit partial-geometry supervision using an optimised guidance encoding.</p><p>Experiments show that GGPT produces reconstructions that are both geometrically consistent and spatially complete. </p><p>While being trained solely on <a href="https://arxiv.org/abs/2308.11417">ScanNet++</a> with VGGT predictions, it generalises across architectures and datasets, significantly outperforming SOTA feed-forward 3D reconstruction models in both in-domain and out-of-domain settings.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1Icv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1Icv!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 424w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 848w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 1272w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1Icv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png" width="1456" height="618" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:618,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3472856,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1Icv!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 424w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 848w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 1272w, https://substackcdn.com/image/fetch/$s_!1Icv!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5a77289b-e628-4e9a-9ca6-0b5e9220d409_2646x1124.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.11174">using this link</a>.</em></p><div><hr></div><h3>8. The Attack and Defense Landscape of Agentic AI</h3><p>This research paper is a comprehensive survey of security risks in AI agent systems that examines how design choices, such as tool use, memory, and autonomy, create new attack surfaces.</p><p>It uses case studies to highlight existing gaps in securing agentic AI systems and identifies open challenges in this domain.</p><p>It also introduces the first systematic framework for understanding the security risks and defense strategies of AI agents, serving as a foundation for building secure agentic systems and advancing research.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fm0C!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fm0C!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 424w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 848w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 1272w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fm0C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png" width="1456" height="731" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:731,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:467991,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fm0C!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 424w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 848w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 1272w, https://substackcdn.com/image/fetch/$s_!fm0C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2b63e377-2642-4d14-b204-cb870631ca28_2144x1076.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.11088">using this link</a>.</em></p><div><hr></div><h3>9.IndexCache</h3><p>This research paper presents <strong>IndexCache</strong>, a method that improves sparse attention in LLMs by reusing attention indices across layers rather than recalculating them each time. </p><p>In a standard sparse attention mechanism, models repeatedly select the most relevant tokens at each layer, which is computationally expensive. This research shows that these selections are often similar across layers and can be cached and reused.</p><p>By adding this cross-layer index reuse method, the approach reduces computation and latency while maintaining model performance, making long-context and agent-style workloads more efficient and scalable.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3mMr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3mMr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 424w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 848w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 1272w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3mMr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png" width="1456" height="519" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:519,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:222025,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3mMr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 424w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 848w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 1272w, https://substackcdn.com/image/fetch/$s_!3mMr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6fce7334-3e23-4869-ac21-3b61c9bc3261_2666x950.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.12201">using this link</a>.</em></p><div><hr></div><h3>10. Neural Thickets</h3><p>This research paper suggests that large pre-trained LLMs already have many good solutions for task-specific experts nearby in their weights. They do not require extensive training or optimization to uncover them. </p><p>Instead of using complex optimization methods, the authors propose a simple, fully parallel post-training approach where they make small random changes to the model parameters, select the top-performing variants, and combine their predictions using majority voting. </p><p>This straightforward approach is surprisingly effective and works almost as well as standard post-training methods such as PPO, GRPO, and ES for contemporary large-scale models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qr2d!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qr2d!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 424w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 848w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 1272w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qr2d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png" width="1456" height="871" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:871,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:600797,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/191119957?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qr2d!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 424w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 848w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 1272w, https://substackcdn.com/image/fetch/$s_!qr2d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9eda28d-95e6-42bd-9378-7a4282d29ae2_2120x1268.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><em>Read more about this research <a href="https://arxiv.org/abs/2603.12228">using this link</a>.</em></p><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others to <strong>earn referral rewards.</strong> &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-8-14-march?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-8-14-march?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><p>Join the <strong>paid tier today</strong> <strong>at a 25% discount</strong> to get access to all posts on this newsletter!</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129412; <a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127880; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><p>You can also check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (1-7 March 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-1-7-march</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-1-7-march</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 10 Mar 2026 12:38:02 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!3tcx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3tcx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3tcx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3tcx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png" width="725.203125" height="322.25715788118134" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:725.203125,&quot;bytes&quot;:5006735,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3tcx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!3tcx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c85f129-a65a-40b4-b3b9-54964749befd_6912x3072.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3>1. Introducing GPT&#8209;5.4</h3><p>OpenAI released <strong>GPT-5.4</strong>, with improved reasoning, coding, and professional knowledge capabilities, enabling the development of more dependable AI agents.</p><ul><li><p>GPT-5.4 can perform hard tasks across documents, spreadsheets, presentations, and software tools, producing higher-quality outputs and helping teams complete workflows faster.</p></li><li><p>It is the first general-purpose model with native, state-of-the-art computer-use capabilities. </p></li><li><p>It supports up to 1M tokens of context, allowing agents to plan, execute, and verify tasks across long horizons.</p></li></ul><p>It is available across ChatGPT, the OpenAI API, and Codex, with variants such as GPT-5.4 Thinking for deeper reasoning and GPT-5.4 Pro for more demanding workloads.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!n0nP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!n0nP!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 424w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 848w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 1272w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!n0nP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png" width="1456" height="1046" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1046,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:86668,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!n0nP!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 424w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 848w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 1272w, https://substackcdn.com/image/fetch/$s_!n0nP!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ca35322-cfe7-4688-b4cd-f5dda787f008_1598x1148.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://openai.com/index/introducing-gpt-5-4/">using this link</a>.</p><div><hr></div><h3><strong>&#128680; A Special Offer</strong></h3><p><strong>Grab a 25% discount on the annual subscription</strong> to the newsletter and get access to articles like these, and so much more:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#127851; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#127846; <a href="https://www.intoai.pub/p/train-a-cnn-with-pytorch-ddp">Learn to train a deep learning model on multiple GPUs with Distributed PyTorch</a></p></li></ul><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Your 25% off&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Your 25% off</span></a></p><div><hr></div><h3>2. <strong>FlashAttention-4</strong></h3><p>This research paper introduces the latest version, <strong>FlashAttention-4</strong>, which makes transformer attention faster and more efficient on modern GPUs.</p><p>FlashAttention-3 was optimized attention for Hopper GPUs, specifically targeting the H100 architecture. But the industry has moved to Blackwell-based systems such as the B200 and GB200, which come with fundamentally different performance characteristics.</p><p>FlashAttention-4 optimizes attention on Blackwell GPUs by:</p><ul><li><p>Redesigning pipelines that take advantage of fully asynchronous Matrix Multiply-Accumulate (MMA) operations and larger tile sizes</p></li><li><p>Software-based implementations of exponential and conditional softmax rescaling, which reduce the number of non-matrix operations and improve overall speed</p></li><li><p>Better use of GPU tensor memory and the 2-CTA MMA mode (which allows two Cooperative Thread Arrays to jointly execute a single Matrix Multiply-Accumulate operation), which reduces shared-memory traffic and atomic adds required during the backward pass</p></li></ul><p>FlashAttention-4 achieves up to:</p><ul><li><p>a 1.3&#215; speedup over cuDNN 9.13</p></li><li><p>a 2.7&#215; speedup over Triton on B200 GPUs with BF16, reaching up to 1613 TFLOPs/s (71% utilization)</p></li></ul><p>The researchers also implement FlashAttention-4 entirely in CuTe-DSL embedded in Python, resulting in 20-30&#215; faster compile times compared to traditional C++ template-based approaches while maintaining full expressivity.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!72Nk!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!72Nk!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 424w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 848w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 1272w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!72Nk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png" width="1456" height="897" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:897,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1636886,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!72Nk!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 424w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 848w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 1272w, https://substackcdn.com/image/fetch/$s_!72Nk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6472f19d-3442-4286-89ab-a14c00ec5c28_2068x1274.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.05451">using this link</a>.</p><div><hr></div><h3><strong>3. Solving an Open Problem in Theoretical Physics using AI-Assisted Discovery</strong></h3><p>This research paper describes a neuro-symbolic system that autonomously solved an open problem in theoretical physics. </p><p>This system combines <strong>Gemini Deep Think</strong> with a systematic Tree Search (TS) algorithm and an automated numerical feedback system to explore mathematical ideas and verify them computationally.</p><p>The system successfully derived new and exact analytical solutions for the power spectrum of gravitational radiation emitted by cosmic strings. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Oe9Z!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 424w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 848w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 1272w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png" width="1456" height="677" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:677,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:254369,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 424w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 848w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 1272w, https://substackcdn.com/image/fetch/$s_!Oe9Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F941da312-3a4b-48c7-b9f8-73d0ff51b1f1_2296x1068.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The research system identified 6 different analytical methods, the most elegant of which uses the <a href="https://en.wikipedia.org/wiki/Gegenbauer_polynomials">Gegenbauer expansion</a>.</figcaption></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.04735">using this link</a>.</p><div><hr></div><h3>4. <strong>The Spike, the Sparse, and the Sink</strong> </h3><p>This research paper studies two phenomena that usually co-occur and often involve the same tokens in Transformer-based LLMs:</p><ol><li><p><strong>Massive activations</strong>: where some tokens produce extremely large activations (&#8220;Spikes&#8221;)</p></li><li><p><strong>Attention sinks</strong>: where some tokens consistently attract attention disproportionate to their semantic importance (&#8220;Sinks&#8221;)</p></li></ol><p>Experiments show that:</p><ul><li><p>The co-occurrence of the two is an architectural artifact of modern Transformer design.</p></li><li><p>Massive activations work globally to create nearly constant hidden representations that persist across many layers, effectively acting like implicit parameters of the model.</p></li><li><p>Attention sinks work locally by influencing attention outputs across heads, pushing individual heads to focus more on short-range dependencies.</p></li><li><p><strong>Pre-norm</strong> is the main architectural choice that allows these two phenomena to appear together, and removing pre-norm causes them to decouple.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!b1vm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!b1vm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 424w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 848w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 1272w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!b1vm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png" width="1456" height="528" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:528,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:378103,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!b1vm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 424w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 848w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 1272w, https://substackcdn.com/image/fetch/$s_!b1vm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547f63e9-b6b7-4ebb-8943-bafb61f79797_2524x916.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.05498">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>5. Speculative Speculative Decoding</h3><p><a href="https://pytorch.org/blog/hitchhikers-guide-speculative-decoding/">Speculative decoding</a> has become a standard way to accelerate autoregressive inference in LLMs.</p><p>It uses a fast draft model to predict upcoming tokens from a slower, larger target model, and then verifies them in parallel with a single forward pass of the target model.</p><p>However, the method is still partly sequential because the draft must wait for verification before making the next speculation.</p><p>This research paper introduces <strong>Speculative speculative decoding (SSD)</strong>, which parallelizes the speculation and verification operations.</p><p>While verification is ongoing, the draft model predicts likely verification outcomes and prepares pre-emptive speculations for them. If the actual verification outcome is then in the predicted set, a speculation is returned immediately, removing drafting delays entirely.</p><p>This method is further improved, leading to <strong>Saguaro</strong>, an optimized SSD algorithm that is up to 5x faster than autoregressive decoding on open-source inference engines.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_VBO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_VBO!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 424w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 848w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_VBO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png" width="1456" height="752" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:752,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:425451,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_VBO!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 424w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 848w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 1272w, https://substackcdn.com/image/fetch/$s_!_VBO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd9b9ca8e-0e0f-4bc9-a5e4-e72261568cae_2172x1122.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.03251">using this link</a>.</p><div><hr></div><h3>6. Olmo Hybrid</h3><p>Hybrid LLMs use an architecture that combines transformer attention and linear RNN layers.</p><p>Despite previous releases such as Nemotron-H, Qwen3-Next, Kimi Linear, and Qwen 3.5, it was unclear if the purported benefits of hybrid architectures justify the cost of scaling them up.</p><p>This debate has been settled by Ai2 researchers, who released the fully open-source&nbsp;<strong>Olmo Hybrid</strong>, a 7B-parameter model largely comparable to Olmo 3 7B, but with its sliding-window layers replaced by Gated DeltaNet layers.</p><p>Olmo Hybrid is trained using 49% fewer training tokens and outperforms Olmo 3 across standard pre-training and mid-training evaluations. </p><p>This is empirical proof that hybrid architectures not only reduce memory usage during inference but also obtain more expressive models that scale better during pretraining.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ayym!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ayym!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 424w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 848w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 1272w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ayym!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png" width="930" height="810" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:810,&quot;width&quot;:930,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ayym!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 424w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 848w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 1272w, https://substackcdn.com/image/fetch/$s_!Ayym!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f23d489-135b-4a08-9187-1a409d7b7883_930x810.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Architecture comparison for Olmo 3 and Olmo Hybrid. Olmo Hybrid replaces the sliding-window attention layers from Olmo 3 with Gated DeltaNet.</figcaption></figure></div><p>Read more about this research <a href="https://allenai.org/papers/olmo-hybrid">using this link</a>.</p><div><hr></div><h3>7. <strong>Reasoning Models Struggle to Control their Chains of Thought</strong></h3><p>Chain-of-thought (CoT) monitoring is a promising method for understanding the intents and finding misbehaviors in modern reasoning LLMs.</p><p>This research paper examines whether CoT monitoring is a reliable safety tool or whether reasoning models can control what they reveal in their chain-of-thought (CoT), a phenomenon known as CoT controllability.</p><p>For this, the researchers introduce an evaluation suite called <strong>CoT-Control</strong>, which includes tasks that require reasoning models to solve problems while adhering to CoT instructions. (For example, reasoning about a genetics question without using the word &#8220;chromosome&#8221;.)</p><p>Experiments show that:</p><ul><li><p>Reasoning models have significantly lower CoT controllability than output controllability. For example, Claude Sonnet 4.5 can control its CoT only 2.7% of the time, but 61.9% of the time when controlling its final output. </p></li><li><p>CoT controllability is higher for larger models and decreases with more RL training, test-time compute, and increased problem difficulty. </p></li><li><p>CoT controllability failures extend even to situations in which models are given incentives (as opposed to direct requests) to evade CoT monitors</p></li><li><p>Models exhibit slightly higher controllability when they are told they are being monitored. </p></li><li><p>Eliciting controllability by adversarially optimizing prompts does not meaningfully increase controllability. </p></li></ul><p>The overall results indicate that CoT controllability is currently unlikely to be a failure mode of CoT monitorability and must be tracked by frontier labs in future models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6Y9w!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6Y9w!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 424w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 848w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 1272w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6Y9w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png" width="1456" height="1110" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1110,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:382348,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!6Y9w!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 424w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 848w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 1272w, https://substackcdn.com/image/fetch/$s_!6Y9w!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb139b2ba-577b-4232-b77f-ccafb57827f0_1778x1356.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.05706">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>8. Phi-4-reasoning-vision-15B </h3><p>This research paper from Microsoft introduces <strong>Phi-4-reasoning-vision-15B</strong>, a 15B-parameter multimodal reasoning model that can process both text and images. </p><p>The model combines the Phi-4-Reasoning backbone with a SigLIP-2 vision encoder in a mid-fusion architecture that converts images into visual tokens for reasoning. </p><p>It performs efficient multimodal reasoning across tasks such as math, science, OCR, document understanding, and GUI interaction. This is made possible by the use of high-resolution visual encoder and by training the model on a mix of reasoning and non-reasoning data with explicit mode tokens. This setup allows the model to provide fast, direct answers for simple tasks and chain-of-thought reasoning for more complex ones. </p><p>The paper shows that the greatest performance improvements come from systematic data filtering, error correction, and the augmentation with synthetic data. This tells us that data quality is the main factor behind how well the model performs.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7YQ7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7YQ7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 424w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 848w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 1272w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7YQ7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png" width="1456" height="733" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:733,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:203648,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7YQ7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 424w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 848w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 1272w, https://substackcdn.com/image/fetch/$s_!7YQ7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffb5d6233-1e86-4563-ab45-9490f73de576_1882x948.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.03975">using this link</a>.</p><div><hr></div><h3>9. Helios</h3><p>This research paper presents <strong>Helios</strong>, the first 14B autoregressive diffusion model that can create high-quality minute-scale videos in real time at about 19.5 FPS on a single NVIDIA H100 GPU.</p><p>Helios also supports text-to-video, image-to-video, and video-to-video generation within a single framework, and extensive experiments show that it consistently outperforms prior methods on both short and long-video generation.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_BPh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_BPh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 424w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 848w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 1272w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_BPh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png" width="1456" height="1292" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/eedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1292,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2911938,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_BPh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 424w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 848w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 1272w, https://substackcdn.com/image/fetch/$s_!_BPh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feedd1887-c82a-4798-a3ea-19adf1d20c5d_1458x1294.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.04379">using this link</a>.</p><div><hr></div><h3>10. Symbol-Equivariant Recurrent Reasoning Models</h3><p>This research paper introduces <strong>Symbol-Equivariant Recurrent Reasoning Models (SE-RRMs)</strong>, a new addition to the family of Recurrent Reasoning Models (RRMs).</p><p>SE-RRMs extend Recurrent Reasoning Models by explicitly encoding symbol-permutation symmetry in the architecture using symbol-equivariant layers. </p><p>This change ensures that the model provides the same solution even when symbols, such as numbers or colors, are permuted. This reduces the need for extensive data augmentation and improves generalization.</p><p>Experiments show that SE-RRMs outperform previous RRM models on Sudoku and can adapt to different Sudoku grid sizes, which existing RRMs cannot.</p><p>SE-RRMs also achieve competitive performance on ARC-AGI-1 and ARC-AGI-2 with substantially less data augmentation and only 2M parameters.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!h2fn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!h2fn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 424w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 848w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 1272w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!h2fn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png" width="1456" height="1120" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1120,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:358084,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/190199560?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!h2fn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 424w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 848w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 1272w, https://substackcdn.com/image/fetch/$s_!h2fn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe2e4f09f-f7e6-4a16-a6e4-b0587b2a6055_1550x1192.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2603.02193">using this link</a>.</p><p>Read more about the previous RRMs using the following links:</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;4fdbdc03-11cd-4479-ac84-1e47e20fed15&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Hierarchical Reasoning Model: A Deep Dive&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-08-10T11:22:50.624Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!MqBc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa91bfc18-37e3-4d84-9ea2-faa611ac0336_1408x768.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/hierarchical-reasoning-model&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:170533295,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:22,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;43edb459-c031-4f4c-ac06-4c2bdd2c7ebe&quot;,&quot;caption&quot;:&quot;Into AI thrives thanks to the support of paid subscribers. If you want to access exclusive analysis, in-depth guides, and help this work continue, consider becoming a paid member today. Your support truly makes a difference!&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Tiny Recursive Model (TRM): A Deep Dive&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-10-16T10:16:57.478Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!nsQx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F990b949c-7da6-4f07-92f3-4204a9771c32_1600x968.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/tiny-recursive-model&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:176082755,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:11,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;e23e28ae-026d-41ee-bce3-00f18dd9ef21&quot;,&quot;caption&quot;:&quot;Recursive reasoning models are a promising architectural candidate that could lead us towards AGI.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;A Deep Dive Into Universal Reasoning Models&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-31T11:01:52.669Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!iYMg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8918235d-f4f4-425b-8ea5-1bda5910d868_2718x1186.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/universal-reasoning-model&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:185674112,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:2,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others to <strong>earn referral rewards.</strong> &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-1-7-march?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-1-7-march?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p><strong>Don&#8217;t forget to become a paid subscriber at a 25% discount to access all valuable posts in the newsletter.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Your 25% off&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Your 25% off</span></a></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Learn to train a deep learning model on multiple GPUs with Distributed PyTorch]]></title><description><![CDATA[Part 2: Train a CNN on NVIDIA GPUs with PyTorch's Distributed Data Parallel (DDP)]]></description><link>https://www.intoai.pub/p/train-a-cnn-with-pytorch-ddp</link><guid isPermaLink="false">https://www.intoai.pub/p/train-a-cnn-with-pytorch-ddp</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sun, 08 Mar 2026 11:33:47 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!xxxe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!apHD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!apHD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 424w, https://substackcdn.com/image/fetch/$s_!apHD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 848w, https://substackcdn.com/image/fetch/$s_!apHD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 1272w, https://substackcdn.com/image/fetch/$s_!apHD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!apHD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png" width="1456" height="637" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:637,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:640884,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189239741?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!apHD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 424w, https://substackcdn.com/image/fetch/$s_!apHD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 848w, https://substackcdn.com/image/fetch/$s_!apHD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 1272w, https://substackcdn.com/image/fetch/$s_!apHD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb323866f-8f65-46e2-b314-3f6d24fef728_2212x968.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>&#128075;&#127995; Hey there! </p><p>This is the second part of a two-part lesson where we get our hands dirty by writing PyTorch code to train a model across multiple GPUs using the distributed machine learning algorithm <strong>Distributed Data Parallel (DDP).</strong></p><p>If you missed the first part, where we learned about DDP visually, here is your link to get started.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;470ea86a-10a0-434a-a381-7d3245923ca5&quot;,&quot;caption&quot;:&quot;We have long ago moved away from training production-grade deep learning models on a single machine.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Learn to train deep learning models on multiple GPUs&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-26T13:06:02.566Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!YyVB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/distributed-data-parallel&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:189120446,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:1,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><p>DDP sounds straightforward conceptually, but the intricate dependencies between GPU computation and communication make it tricky to implement and optimize.</p><p>Fortunately, we do not have to rewrite it from scratch and handle all existing edge cases. PyTorch already implements all of these in its <a href="https://docs.pytorch.org/tutorials/beginner/dist_overview.html">PyTorch Distributed</a> library in the <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html">DistributedDataParallel</a> class.</p><p>To make this lesson easy to follow, I will write all the code in a Jupyter notebook on <a href="https://kaggle.com/">Kaggle</a>. </p><p>Kaggle provides free access to two NVIDIA T4 GPUs for 30 hours each week. We will train a CNN on these two GPUs with DDP using the <a href="https://www.kaggle.com/c/cifar-10">CIFAR-10 dataset</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3cG3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3cG3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 424w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 848w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 1272w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3cG3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png" width="1456" height="693" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:693,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1208009,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189239741?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3cG3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 424w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 848w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 1272w, https://substackcdn.com/image/fetch/$s_!3cG3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5f0589f5-9a45-4d71-acee-fed68268fc5d_1728x822.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The <a href="https://www.cs.toronto.edu/~kriz/cifar.html">CIFAR-10 dataset</a> contains 60,000 32x32 colour images in 10 classes, with 6000 images per class. Some examples from the dataset are shown in this image.</figcaption></figure></div><p>Let&#8217;s begin!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xxxe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xxxe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 424w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 848w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 1272w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xxxe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif" width="480" height="480" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:480,&quot;width&quot;:480,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3807584,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xxxe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 424w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 848w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 1272w, https://substackcdn.com/image/fetch/$s_!xxxe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffc2633d6-a9d4-46df-9d14-32bf82af1c0c_480x480.gif 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Blessings from Jensen Huang (<a href="https://giphy.com/gifs/NVIDIA-GeForce-jensen-nvidia-3080-jihwEDnsFoaXWDTiKc">Source</a>)</figcaption></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>Import packages</h3><p>We start with importing the necessary packages as follows.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;plaintext&quot;,&quot;nodeId&quot;:&quot;c0cb4798-a1f8-46c8-9c61-7a4da42a71d4&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-plaintext"># PyTorch core
import torch                              
import torch.nn as nn                    
import torch.optim as optim  
from torch.utils.data import DataLoader                 

# PyTorch Distributed
import torch.distributed as dist          
import torch.multiprocessing as mp        
from torch.nn.parallel import DistributedDataParallel as DDP  
from torch.utils.data.distributed import DistributedSampler  

# TorchVision
import torchvision                        
import torchvision.transforms as transforms 

# Standard library utils
import os       </code></pre></div><div><hr></div><h3>Define the CNN classifier</h3><p>Our CNN takes an image as input and classifies it into one of the 10 classes. </p><p>We define it exactly the same way we would when training on a single machine, and we aren&#8217;t doing anything fancy in this step.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;c31e03dc-e20e-428c-bb77-40b861836d20&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">class ImageClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        
        self.cnn = nn.Sequential(
            # Feature extraction
            # Block 1
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),

            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),

            nn.MaxPool2d(kernel_size=2),

            # Block 2
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),

            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),

            nn.MaxPool2d(kernel_size=2),

            # Block 3
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),

            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),

            nn.MaxPool2d(kernel_size=2),
            
            nn.Flatten(),
            nn.Linear(in_features=256 * 4 * 4, out_features=512),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            
            # Classification
            nn.Linear(in_features=512, out_features=num_classes),
        )

    def forward(self, x):
        return self.cnn(x)</code></pre></div><div><hr></div><h3>Write helper functions for distributed training</h3><p>Now comes the distributed part. </p>
      <p>
          <a href="https://www.intoai.pub/p/train-a-cnn-with-pytorch-ddp">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[A Simple 3-Step Guide to Move Your Memories From ChatGPT to Claude]]></title><description><![CDATA[Millions of users moved from ChatGPT to Claude this week. If you're as well, don't forget to move your memories.]]></description><link>https://www.intoai.pub/p/a-simple-3-step-guide-to-move-your</link><guid isPermaLink="false">https://www.intoai.pub/p/a-simple-3-step-guide-to-move-your</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 04 Mar 2026 16:21:36 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/6f47dfc2-ea3c-4160-b530-2601e7a78909_271x300.gif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xoti!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xoti!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 424w, https://substackcdn.com/image/fetch/$s_!xoti!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 848w, https://substackcdn.com/image/fetch/$s_!xoti!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 1272w, https://substackcdn.com/image/fetch/$s_!xoti!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xoti!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png" width="1456" height="279" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:279,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:157241,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189885734?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xoti!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 424w, https://substackcdn.com/image/fetch/$s_!xoti!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 848w, https://substackcdn.com/image/fetch/$s_!xoti!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 1272w, https://substackcdn.com/image/fetch/$s_!xoti!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F25901040-6f0f-4a81-bd48-d537b127485c_1866x358.png 1456w" sizes="100vw" fetchpriority="high"></picture><div></div></div></a></figure></div><p>Millions decided to quit ChatGPT this week, given OpenAI&#8217;s new deal with the US Department of War.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xzVr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xzVr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 424w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 848w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 1272w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xzVr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png" width="1180" height="1262" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1262,&quot;width&quot;:1180,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:626430,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189885734?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xzVr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 424w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 848w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 1272w, https://substackcdn.com/image/fetch/$s_!xzVr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F989dfb55-94ba-4e1d-97ee-77390f4c3a25_1180x1262.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://x.com/OpenAI/status/2027846012107456943">Source</a></figcaption></figure></div><p>Their first choice was Claude, given that <a href="https://www.anthropic.com/news/statement-department-of-war">Anthropic did not participate</a> in this deal and refused to use its AI models for mass surveillance and developing fully autonomous weapons.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MHyD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MHyD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 424w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 848w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 1272w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MHyD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png" width="934" height="1306" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1306,&quot;width&quot;:934,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:296734,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189885734?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MHyD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 424w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 848w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 1272w, https://substackcdn.com/image/fetch/$s_!MHyD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ffcfcb2-51e8-418d-977d-6d3ff1173055_934x1306.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://truthsocial.com/@realDonaldTrump/posts/116144552969293195">Source</a></figcaption></figure></div><p>If you&#8217;re making this move as well, don&#8217;t forget to move your memories into Claude. Here&#8217;s a simple 3-step guide to do this.</p><p>Note that:</p><ul><li><p>Claude&#8217;s memory is also available on the free plan.</p></li><li><p>This guide isn&#8217;t ChatGPT-specific and will let you move memories from any LLM provider to Claude. </p></li></ul><div><hr></div><h3>Step 1</h3><p>Paste the following prompt when you&#8217;re chatting with the LLM that you want to move away from. </p><blockquote><p><em>I&#8217;m moving to another service and need to export my data. List every memory you have stored about me, as well as any context you&#8217;ve learned about me from past conversations. Output everything in a single code block so I can easily copy it. Format each entry as: [date saved, if available] - memory content. Make sure to cover all of the following &#8212; preserve my words verbatim where possible: Instructions I&#8217;ve given you about how to respond (tone, format, style, &#8216;always do X&#8217;, &#8216;never do Y&#8217;). Personal details: name, location, job, family, interests. Projects, goals, and recurring topics. Tools, languages, and frameworks I use. Preferences and corrections I&#8217;ve made to your behavior. Any other stored context not covered above. Do not summarize, group, or omit any entries. After the code block, confirm whether that is the complete set or if any remain.</em></p></blockquote><div><hr></div><h3>Step 2</h3><p>Copy all the results that you obtain in the chat response.</p><div><hr></div><h3>Step 3</h3><p>Go to your <a href="https://claude.ai/settings/capabilities?open_memory_import=true">Claude Memory settings</a>, paste the copied result in the text box, and click the &#8220;Add to memory&#8221; button.</p><p>That&#8217;s it!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LhXA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LhXA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 424w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 848w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 1272w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LhXA!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png" width="1200" height="628.021978021978" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3a7efc41-3229-4955-9332-500772586495_2656x1390.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:762,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:346393,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189885734?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LhXA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 424w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 848w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 1272w, https://substackcdn.com/image/fetch/$s_!LhXA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3a7efc41-3229-4955-9332-500772586495_2656x1390.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p>That&#8217;s all you need to do to transfer memories, so Claude knows about you and gives responses best suited to you.</p><p>If you have any other questions, let me know in the comments below.</p><p>Bye now! &#128075;&#127995;</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Subscribe to &#8216;Into AI&#8217; and join a community of over 7,000 smart AI engineers looking to 100X their AI skills.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[🤗 I want your help to make your life better]]></title><description><![CDATA[&#128075;&#127995; Hey friend!]]></description><link>https://www.intoai.pub/p/i-want-your-help-to-make-your-life</link><guid isPermaLink="false">https://www.intoai.pub/p/i-want-your-help-to-make-your-life</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 04 Mar 2026 13:38:09 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Ea4T!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>&#128075;&#127995; Hey friend!</p><p>I&#8217;ll keep this short. </p><p>I want to make &#8220;Into AI&#8221; as useful as possible for your career, and the best way to do that is to ask you directly.</p><ul><li><p>What topics are you most interested in right now?</p></li><li><p>What&#8217;s the biggest challenge you&#8217;re facing in your career?</p></li><li><p>What kind of content would actually help you move forward?</p></li></ul><p><strong>Just hit reply to this email and let me know. &#128231; </strong></p><p>There&#8217;s no wrong answer. </p><p>Even a one-liner is totally fine. </p><p>I read every single reply, and your answers will directly shape what comes next in this newsletter.</p><p>&#127873; <strong>As a thank you, I&#8217;ll send you a personalized 40% discount coupon on the annual subscription just for sharing your thoughts.</strong></p><p>Talk soon!</p><p><em>Ashish </em></p>]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (22-28 February 26)]]></title><description><![CDATA[The top 11 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-22-28-february</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-22-28-february</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 03 Mar 2026 10:11:35 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!x93u!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!x93u!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!x93u!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!x93u!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!x93u!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!x93u!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!x93u!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png" width="1200" height="533.2417582417582" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:2375264,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!x93u!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!x93u!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!x93u!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!x93u!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0f7cf45f-d83b-410a-bdbd-f041bf270a00_6912x3072.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#127873; Here&#8217;s your 25% off the annual membership. Unlock every lesson in the newsletter and accelerate your AI engineering career!</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7nUB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7nUB!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 424w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 848w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 1272w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7nUB!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif" width="480" height="480" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:480,&quot;width&quot;:480,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7nUB!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 424w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 848w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 1272w, https://substackcdn.com/image/fetch/$s_!7nUB!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6dc84b87-f01c-4d7d-a5d7-8c925b263686_480x480.gif 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://giphy.com/gifs/pudgypenguins-join-us-huddle-ahaoiyouFTTrGYc3hV">Source</a></figcaption></figure></div><p><strong>Claim your discount now!</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h4>1. <strong>ChatGPT Health performance in a structured test of triage recommendations</strong></h4><p>This research paper, published in Nature Medicine, evaluated <strong>ChatGPT Health</strong>, a popularly used AI tool for triaging patients. </p><p>The study found that it failed to appropriately triage over half (52%) of serious emergencies like diabetic ketoacidosis or impending respiratory failure and suggested a follow-up in 24 to 48 hours instead of seeking immediate emergency care.</p><p>Additionally, it mismanaged patients with suicidal ideations, sometimes triggering alerts in low-risk cases and failing to respond when individuals mentioned specific self-harm methods (which actually made them high-risk clinically).</p><p>Its recommendations also shifted toward less urgent care when relatives and friends downplayed symptom severity.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pUK2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pUK2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 424w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 848w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 1272w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pUK2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png" width="1558" height="934" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:934,&quot;width&quot;:1558,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:263976,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Feb67777f-2491-421f-b3ce-e254cee6bbea_1558x934.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!pUK2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 424w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 848w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 1272w, https://substackcdn.com/image/fetch/$s_!pUK2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fef9433b3-8dd8-43d6-ae7f-cbc87f8a2b48_1558x934.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.nature.com/articles/s41591-026-04297-7">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>2. Mercury 2</h4><p>Inception announced <strong>Mercury 2</strong>, the world's fastest reasoning language model, which uses diffusion-based generation rather than the traditional autoregressive generation.</p><p>This allows it to generate and refine many tokens in parallel, resulting in much lower latency in production workflows.</p><p>Mercury 2 generates&nbsp;over 1,000 tokens per second on NVIDIA Blackwell GPUs, making it&nbsp;more than five times faster&nbsp;than typical speed-optimized models while maintaining competitive generation quality. </p><p>It comes with a 128K context window, tunable reasoning effort, native tool use, and schema-aligned JSON output, and is OpenAI API compatible, so it can be dropped into existing stacks without requiring any rewrites.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!s-1p!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!s-1p!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 424w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 848w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!s-1p!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png" width="1456" height="1045" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1045,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:241719,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!s-1p!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 424w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 848w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!s-1p!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff81d81d8-8b20-4837-8661-19011924ef0f_1728x1240.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://www.inceptionlabs.ai/blog/introducing-mercury-2">using this link</a>.</p><div><hr></div><h4>3. DualPath: Breaking the Storage Bandwidth Bottleneck in Agentic LLM Inference</h4><p>Multi-turn, agentic LLM workloads are limited by storage I/O during the loading of large key-value (KV) caches rather than computation. </p><p>This issue is particularly common in popular architectures, where loading large KV caches from external storage creates an imbalance, with prefill engines consuming their storage network bandwidth while decode engines are not fully utilized. This restricts the system's overall throughput.</p><p>To solve this problem, the authors introduce <strong>DualPath</strong>, a new inference system that  creates a second path for loading KV caches. </p><p>Alongside the usual storage-to-prefill path, it creates a new storage-to-decode path that loads KV caches into decoding engines. These caches are then transferred via RDMA to prefill engines. </p><p>This optimized data path is combined with a global scheduler that dynamically balances load across prefill and decode engines.</p><p>Experiments on three models with realistic agentic workloads show that DualPath improves offline inference by up to roughly 1.87&#215; and online serving throughput by about 1.96&#215;, all while meeting the SLO. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IAxZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IAxZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 424w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 848w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 1272w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IAxZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png" width="1456" height="419" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:419,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:181620,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!IAxZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 424w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 848w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 1272w, https://substackcdn.com/image/fetch/$s_!IAxZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0cbd7950-2897-428a-a207-291e23d1a226_1834x528.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.21548">using this link</a>.</p><div><hr></div><h4>4. Agents of Chaos</h4><p>This research paper presents a read-teaming study of autonomous LLM-based agents deployed in a live lab environment with persistent memory, email accounts, Discord, file systems, and shell execution.</p><p>The goal was to observe how these agents interact with humans and other agents under benign and adversarial conditions. Over two weeks, twenty AI researchers tested the systems and recorded eleven significant failure cases. </p><p>These included:</p><ul><li><p>Unauthorized compliance with non-owners</p></li><li><p>Disclosure of sensitive information</p></li><li><p>Execution of destructive system-level actions</p></li><li><p>Denial-of-service conditions</p></li><li><p>Uncontrolled resource consumption</p></li><li><p>Identity spoofing vulnerabilities</p></li><li><p>Cross-agent propagation of unsafe practices</p></li><li><p>Partial system takeover </p></li><li><p>Fake reporting of task completion</p></li></ul><p>These findings reveal major issues with agentic systems when deployed in the real world.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1HKB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1HKB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 424w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 848w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 1272w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1HKB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png" width="1456" height="674" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:674,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:380530,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1HKB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 424w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 848w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 1272w, https://substackcdn.com/image/fetch/$s_!1HKB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd1026ddb-543b-4934-9c00-09236fbb416b_1780x824.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.20021">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h4>5. dLLM: Simple Diffusion Language Modeling</h4><p>This research paper introduces <strong>dLLM</strong>, an open-source framework that unifies the core components of diffusion language modeling (training, inference, and evaluation) and makes them easy to customize.</p><p>With dLLM, users can reproduce, finetune, deploy, and evaluate open-source large DLMs such as LLaDA and Dream through a standardized pipeline. </p><p>The framework also provides minimal, reproducible recipes for building small DLMs from scratch with accessible compute, including converting any BERT-style encoder or autoregressive LM into a DLM. </p><p>The researchers release the checkpoints of these small DLMs to make DLMs more accessible and accelerate future research.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!HkoA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!HkoA!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 424w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 848w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 1272w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!HkoA!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif" width="480" height="210" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:210,&quot;width&quot;:480,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;dLLM logo&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="dLLM logo" title="dLLM logo" srcset="https://substackcdn.com/image/fetch/$s_!HkoA!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 424w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 848w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 1272w, https://substackcdn.com/image/fetch/$s_!HkoA!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F387dfa0e-b6e8-4283-9148-3e543df01a36_480x210.gif 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.22661">using this link</a>.</p><div><hr></div><h4>6. Generative Recommendation for Large-Scale Advertising</h4><p>This research paper introduces <strong>GR4AD (Generative Recommendation for ADdvertising)</strong>, a generative recommendation system focused on production for large-scale advertising, where real-time performance and business value are important.</p><p>GR4AD introduces:</p><ul><li><p><strong>UA-SID</strong> for tokenization to capture complicated business information, and</p></li><li><p><strong>LazyAR</strong>, a lazy autoregressive decoder that lowers inference costs for short, multi-candidate outputs</p></li></ul><p>To better align optimization with business metrics, it uses <strong>Value-Aware Supervised Learning (VSL)</strong> and a ranking-aware reinforcement learning algorithm called <strong>RSPO (Ranking-Guided Softmax Preference Optimization)</strong>. </p><p>For online inference, a method called <strong>Dynamic Beam Serving</strong> is used that adjusts beam width according to generation workload to balance quality and compute power. </p><p>In large-scale online A/B tests, GR4AD achieved up to a 4.2% increase in ad revenue compared to a baseline recommendation stack. </p><p>It has also been fully deployed in <a href="https://en.wikipedia.org/wiki/Kuaishou">Kuaishou</a>&#8217;s advertising system, serving over 400 million users and achieving high-throughput, real-time serving.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!493f!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!493f!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 424w, https://substackcdn.com/image/fetch/$s_!493f!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 848w, https://substackcdn.com/image/fetch/$s_!493f!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 1272w, https://substackcdn.com/image/fetch/$s_!493f!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!493f!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png" width="1456" height="761" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:761,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:505026,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!493f!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 424w, https://substackcdn.com/image/fetch/$s_!493f!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 848w, https://substackcdn.com/image/fetch/$s_!493f!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 1272w, https://substackcdn.com/image/fetch/$s_!493f!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F19b25ca8-2df8-4f19-a4bb-2d00e4fc5ce7_2288x1196.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.22732">using this link</a>.</p><div><hr></div><h4>7. Aletheia tackles FirstProof autonomously</h4><p>This research paper from Google DeepMind reports the performance of <strong><a href="https://arxiv.org/abs/2602.10177">Aletheia</a></strong>, a mathematics research agent powered by Gemini 3 Deep Think, on the <a href="https://arxiv.org/abs/2602.05192">FirstProof</a> challenge, a collection of 10 research-level math problems with publicly undisclosed solutions used to assess AI capability.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WPpy!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WPpy!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 424w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 848w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WPpy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg" width="1456" height="616" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:616,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Image&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Image" title="Image" srcset="https://substackcdn.com/image/fetch/$s_!WPpy!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 424w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 848w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!WPpy!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe9125819-2d44-41f6-9d75-595343813b2c_1682x712.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Aletheia solved 6 out of 10 problems within the allowed time, with most expert evaluators considering those solutions correct. Problem 8 had mixed expert agreement.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wHrZ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wHrZ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 424w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 848w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 1272w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wHrZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png" width="1456" height="708" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:708,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:192148,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wHrZ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 424w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 848w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 1272w, https://substackcdn.com/image/fetch/$s_!wHrZ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F916b2a3e-9473-4dba-b511-04439a1a8f41_1990x968.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.21201">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h4>8. <strong>LeRobot: An Open-Source Library for End-to-End Robot Learning</strong></h4><p>This research paper introduces&nbsp;<strong>LeRobot</strong>, an open-source library that brings together and simplifies the entire robot-learning stack. </p><p>It covers everything from low-level middleware for controlling robots to large-scale data collection, storage, streaming, and learning algorithms, within a single framework.</p><p>Instead of relying on hand-crafted techniques, it introduces scalable methods that lower the barriers for robotics researchers and practitioners worldwide.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!EZVX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!EZVX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 424w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 848w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 1272w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!EZVX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png" width="1456" height="526" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7a074262-036d-4765-9e90-21e288b59696_2720x982.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:526,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:638770,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!EZVX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 424w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 848w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 1272w, https://substackcdn.com/image/fetch/$s_!EZVX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7a074262-036d-4765-9e90-21e288b59696_2720x982.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.22818">using this link</a>.</p><div><hr></div><h4>9. Training Agents to Self-Report Misbehavior</h4><p>LLM agents may pursue misaligned goals while hiding their actions, despite being Alignment-trained.</p><p>To counter this, this research paper introduces <strong>Self-incrimination training, </strong>which trains agents to produce a visible signal when they act deceptively.</p><p>The researchers train agents based on GPT-4.1 and Gemini-2.0 to use a <code>report_scheming() </code>tool whenever they behave inappropriately and measure their ability to cause harm undetected in out-of-distribution environments.</p><p>Self-incrimination significantly reduces undetected attacks and outperforms similar monitoring and alignment methods, while preserving the instruction hierarchy and causing little loss in capability. </p><p>Its effectiveness stays steady across tasks, continues to work under adversarial prompt optimization, and applies to situations where agents pursue misaligned goals themselves rather than being instructed to misbehave.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!dwfX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!dwfX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 424w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 848w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 1272w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!dwfX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png" width="1456" height="617" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:617,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:763799,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!dwfX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 424w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 848w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 1272w, https://substackcdn.com/image/fetch/$s_!dwfX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F369c7ecb-587c-4015-be54-c9ddd5e143bf_2172x920.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.22303">using this link</a>.</p><div><hr></div><h4>10. On Data Engineering for Scaling LLM Terminal Capabilities</h4><p>This research paper from NVIDIA is a systematic study of data engineering practices to build terminal agents. It introduces:</p><ol><li><p><strong>Terminal-Task-Gen</strong>: a lightweight synthetic task generation pipeline that supports seed-based and skill-based task construction</p></li><li><p><strong>Terminal-Corpus</strong>: a large-scale open-source dataset for terminal tasks</p></li></ol><p>Using this dataset, the researchers train <strong>Nemotron-Terminal</strong>, a family of models initialized from Qwen3 that achieve substantial gains on <a href="https://www.tbench.ai/">Terminal-Bench 2.0</a>, matching the performance of significantly larger models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ciMM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ciMM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 424w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 848w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 1272w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ciMM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png" width="1456" height="747" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/be3d4037-b926-4bf4-b183-927773e55721_2440x1252.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:747,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:597950,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ciMM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 424w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 848w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 1272w, https://substackcdn.com/image/fetch/$s_!ciMM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe3d4037-b926-4bf4-b183-927773e55721_2440x1252.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.21193">using this link</a>.</p><div><hr></div><h4>11. How to Train Your Deep Research Agent?</h4><p>This research paper explores how to best train Deep Research agents.</p><p>By studying three RL training choices (Prompt templates, Reward functions, and Policy optimization), the researchers reveal three main findings:</p><ol><li><p><em>Fast Thinking</em> prompts work better than <em>Slow Thinking</em></p></li><li><p>F1-based rewards need action penalties to avoid collapse and can outperform exact match (EM)</p></li><li><p>REINFORCE outperforms PPO while requiring fewer search actions, whereas GRPO shows the poorest stability among policy optimization methods</p></li></ol><p>Building on these findings, the researchers introduce <strong>Search-R1++</strong>, a strong baseline that improves the performance of <a href="https://arxiv.org/abs/2503.09516">Search-R1</a> from 0.403 to 0.442 (Qwen2.5-7B) and 0.289 to 0.331 (Qwen2.5-3B).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!0OeS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!0OeS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 424w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 848w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 1272w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!0OeS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png" width="1456" height="664" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:664,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:372715,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189537380?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!0OeS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 424w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 848w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 1272w, https://substackcdn.com/image/fetch/$s_!0OeS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9238bc7-0ff2-4e06-b737-ccb11701ad86_1872x854.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.19526">using this link</a>.</p><div><hr></div><p>If you loved reading it, restack and share it with others to <strong>earn referral rewards.</strong> &#10084;&#65039;</p><p>Become a <strong>paid subscriber today</strong> to get access to all posts on this newsletter.</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129380;<a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#127812;&#8205;&#129003; <a href="https://www.intoai.pub/p/distributed-data-parallel">Learn to train deep learning models on multiple GPUs</a></p></li></ul><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><p>You can also check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[11 Infographics on the Dark Side of AI Kept Hidden From You]]></title><description><![CDATA[This is what the AI industry is hiding from you]]></description><link>https://www.intoai.pub/p/the-dark-side-of-ai</link><guid isPermaLink="false">https://www.intoai.pub/p/the-dark-side-of-ai</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sun, 01 Mar 2026 13:07:55 GMT</pubDate><enclosure url="https://substack-post-media.s3.amazonaws.com/public/images/17628388-c81c-4bf5-b509-856d6c949ac9_960x504.gif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9Bmt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9Bmt!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 424w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 848w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 1272w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9Bmt!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif" width="1200" height="630" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:630,&quot;width&quot;:1200,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5558691,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/gif&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9Bmt!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 424w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 848w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 1272w, https://substackcdn.com/image/fetch/$s_!9Bmt!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F742a9f59-61df-46f4-9994-e01c86236a83_1200x630.gif 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>&#128075;&#127995; Hey friends! </p><p>Artificial Intelligence is one of the greatest technologies of our lifetime. But there&#8217;s a dark side to it which you might not be aware of. </p><p>In this article, I present 11 infographics that tell the flaws and risks associated with current AI systems.</p><p>Get ready to have your mind blown! &#129327;</p><div><hr></div><p>&#129309; Before we begin, I want to introduce you to <strong><a href="https://www.linkedin.com/in/emma-mcilwaine-b8a5abab/overlay/about-this-profile/">Emma McIlwaine</a></strong>, the brilliant graphic designer and illustrator behind this newsletter&#8217;s infographics. </p><p>Check out her designs on <strong><a href="https://www.emmamcildesign.co.uk/">her website</a></strong><a href="https://www.emmamcildesign.co.uk/">,</a> follow her on <strong><a href="https://www.linkedin.com/in/emma-mcilwaine-b8a5abab/">LinkedIn</a></strong><a href="https://www.linkedin.com/in/emma-mcilwaine-b8a5abab/">,</a> and <strong>reach out to work with her</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!i6gh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!i6gh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 424w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 848w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 1272w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!i6gh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png" width="1456" height="364" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:364,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1190719,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!i6gh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 424w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 848w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 1272w, https://substackcdn.com/image/fetch/$s_!i6gh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fca205241-ec2f-481e-8770-ca2927d777fa_6600x1650.png 1456w" sizes="100vw"></picture><div></div></div></a></figure></div><div><hr></div><h3>1. ChatGPT Health would cause serious harm if deployed in hospitals</h3><p>ChatGPT Health today answers health queries for millions of people around the world, but is it ready to do so? </p><p>Maybe not. A recent study published in <a href="https://www.nature.com/articles/s41591-026-04297-7">Nature Medicine</a> showed that:</p><ul><li><p>It under-traiged 52% of medical emergencies, such as <a href="https://en.wikipedia.org/wiki/Diabetic_ketoacidosis">Diabetic ketoacidosis</a> and impending <a href="https://en.wikipedia.org/wiki/Respiratory_failure">Respiratory failure</a>, to a 24&#8211;48-hour evaluation rather than the emergency department.</p></li><li><p>When a patient&#8217;s symptoms were downplayed by friends or family, the triage recommendations shifted towards less urgent care.</p></li><li><p>When people expressed suicidal thoughts with a specific method (which means a higher risk clinically), crisis interventional messages to get urgent help were less likely to be shown.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!o8V4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!o8V4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!o8V4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:745592,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!o8V4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!o8V4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F586d32c7-9c18-46a3-b9f0-70b8f22fd140_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>2. AI is drinking the world dry</h3><p>According to the <a href="https://blogs.worldbank.org/en/water/why-water-security-our-most-urgent-challenge-today">World Bank</a>, 4 billion people live in water-scarce areas today. </p><p><a href="https://ifcsia.org/wp-content/uploads/pdf/publications/Charting-Our-Water-Future-Economic.pdf">By 2030</a>, global demand for water will exceed sustainable supply by as much as 40%, and <a href="https://unstats.un.org/sdgs/report/2022/The-Sustainable-Development-Goals-Report-2022.pdf">1.6 billion people</a> will not have access to safe drinking water.</p><p>At present, the AI economy requires 23 cubic km of water a year. <a href="https://amp.xylem.com/m/aa10f8022757c5e/original/Watering-the-New-Economy-DIGITAL-final.pdf">By 2050</a>, this is predicted to more than double to more than 54 cubic kilometers. </p><p>Unfortunately, this means that our world will run out and will need to find an extra 31 cubic kilometers of water per year to run the AI economy. That&#8217;s enough to supply every human being on Earth with an extra 3,820 liters of freshwater a year.</p><p><a href="https://arxiv.org/pdf/2304.03271">According to another study</a>, training GPT-3 could have consumed a total of 5.4 million liters of water, with 700,000 liters of clean freshwater evaporated directly in the training data centers. </p><p>Alongside this, GPT-3 needs to &#8220;drink&#8221; a 500ml bottle of water for roughly 10-50 medium-length responses, depending on when and where it is deployed.</p><p>Information about the water consumption of our current AI systems is largely kept a secret.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-nTG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-nTG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-nTG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:205803,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-nTG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!-nTG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c1e819c-8843-4904-8ea4-49407a7313c8_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>3. AI has a race problem</h3><p>A <a href="https://news.lehigh.edu/ai-exhibits-racial-bias-in-mortgage-underwriting-decisions">study from Lehigh University</a> found that LLMs consistently recommended denying more loans and charging higher interest rates to Black applicants than to otherwise identical white applicants.</p><p>The study also found that black applicants would need credit scores ~120 points higher than white applicants to receive the same approval rate, and ~30 points higher to receive the same interest rate.</p><p>Another <a href="https://ojs.aaai.org/index.php/AIES/article/view/31748/33915">study from the University of Washington</a> revealed that AI resume screeners preferred white-associated names in 85.1% of cases, and Black male candidates were preferred 0% of the time.</p><p>Read it again: It&#8217;s <strong>0%</strong>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jK8U!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jK8U!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jK8U!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:195873,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jK8U!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!jK8U!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdff43357-55a5-4f5f-83db-e2bcd5e267c2_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>4. Copyright? What&#8217;s that?</h3><p>Anthropic <a href="https://www.anthropic.com/news/detecting-and-preventing-distillation-attacks">recently released</a> a detailed report on how three Chinese AI labs used ~24,000 fraudulent accounts to extract their state-of-the-art AI model, Claude&#8217;s capabilities to improve their own models. </p><p><em>But has Anthopic always been fair?</em> </p><p>A <a href="https://arxiv.org/abs/2601.02671">recent study</a> revealed that current LLMs can narrate almost the entire text of &#8216;<em>Harry Potter and the Sorcerer&#8217;s Stone</em>&#8217; word for word, with Claude 3.7 Sonnet reaching the highest recall of 95.8%, followed by Gemini 2.5 Pro (76.8%) and Grok 3 (70.3%).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ImsK!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ImsK!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ImsK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1205993,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ImsK!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!ImsK!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F41516498-f7de-4ceb-aff1-7ceba8a12e18_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>5. Yes, AI is really coming for your job</h3><p>Former CEO of Twitter/ X and current CEO of Block, Jack Dorsey, fired 4,000 employees this week, saying AI can work faster and better.</p><blockquote><p><em>&#8220;We're already seeing that the intelligence tools we&#8217;re creating and using, paired with smaller and flatter teams, are enabling a new way of working which fundamentally changes what it means to build and run a company. and that's accelerating rapidly. &#8212; <a href="https://x.com/jack/status/2027129697092731343?s=46&amp;t=JcgJzCfBLAqjEclylNU5WQ">Jack</a>&#8221;</em></p></blockquote><div class="twitter-embed" data-attrs="{&quot;url&quot;:&quot;https://x.com/jack/status/2027129697092731343?s=46&amp;t=JcgJzCfBLAqjEclylNU5WQ&quot;,&quot;full_text&quot;:&quot;we're making <span class=\&quot;tweet-fake-link\&quot;>@blocks</span> smaller today. here's my note to the company.\n\n####\n\ntoday we're making one of the hardest decisions in the history of our company: we're reducing our organization by nearly half, from over 10,000 people to just under 6,000. that means over 4,000 of you are&quot;,&quot;username&quot;:&quot;jack&quot;,&quot;name&quot;:&quot;jack&quot;,&quot;profile_image_url&quot;:&quot;https://pbs.substack.com/profile_images/1661201415899951105/azNjKOSH_normal.jpg&quot;,&quot;date&quot;:&quot;2026-02-26T21:12:36.000Z&quot;,&quot;photos&quot;:[],&quot;quoted_tweet&quot;:{},&quot;reply_count&quot;:8535,&quot;retweet_count&quot;:6479,&quot;like_count&quot;:49292,&quot;impression_count&quot;:57682210,&quot;expanded_url&quot;:null,&quot;video_url&quot;:null,&quot;belowTheFold&quot;:true}" data-component-name="Twitter2ToDOM"></div><p>But this isn&#8217;t a one-off event. </p><p>The <a href="https://www.imf.org/en/blogs/articles/2024/01/14/ai-will-transform-the-global-economy-lets-make-sure-it-benefits-humanity">IMF</a> says that almost 40% of global employment is exposed to AI, rising to 60% in advanced economies.</p><p>Moreover, <a href="https://www.challengergray.com/wp-content/uploads/2026/02/CR126007123.pdf">reports from Challenger, Gray &amp; Christmas</a> clearly show that AI caused nearly 55,000 job cuts in the US in 2025 alone.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zo_j!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zo_j!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zo_j!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:211104,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zo_j!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!zo_j!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ddf0996-9426-4c6a-b581-90f86fc2747c_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>6. Your passport is in AI training data</h3><p>Data privacy in the AI age is largely a myth. </p><p>A <a href="https://arxiv.org/abs/2506.17185v1">study</a> that examined 0.1% of a popular large-scale web-scraped dataset used for training AI systems called <em>CommonPool</em>, found millions of images of passports, credit cards, and birth certificates, with an estimated 102 million unblurred faces. </p><p>Even examining every sample in the 0.1% was extremely difficult for the researchers, given the size of this dataset. <em>Imagine what the remaining 99.9% could contain?</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZBed!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZBed!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZBed!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:247399,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZBed!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!ZBed!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffa3c1b5c-8fb8-4018-97c7-afb5f85dce5d_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>7. <strong>AI is choosing who lives and dies in war</strong></h3><p><a href="https://www.anthropic.com/news/statement-department-of-war">Anthropic has been in debates</a> with the U.S. Department of War over using its AI models to develop systems for mass domestic surveillance and fully autonomous weapons.</p><p>That might be great news for America, but many other countries have gone beyond such discussions.</p><p>(<em>Update: I&#8217;m sorry to tell you that <a href="https://x.com/sama/status/2027578580159631610">OpenAI has now accepted the above proposal</a>.</em>)</p><p>Israel&#8217;s defense forces reported using AI systems known as "Lavender" to identify targets in Gaza, and "The Gospel" to help select and prioritize physical strike targets.</p><p>According to the <a href="https://www.972mag.com/lavender-ai-israeli-army-gaza/">+972 Magazine</a>, Israel&#8217;s army almost completely relied on "Lavender," which flagged 37,000 Palestinians as suspected militants and their homes for possible air strikes. </p><p>Despite knowing that the system makes errors in approximately 10% of cases, there was no requirement to check why the AI system made its choices.</p><p>Once the target was identified by &#8220;Lavender&#8221;, human personnel would take just 20 seconds to authorize a bombing to ensure the marked target was male.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8Q_q!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8Q_q!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8Q_q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:337123,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8Q_q!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!8Q_q!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffd702a8e-49b9-4b4f-8cf2-694de1eb65f4_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>8. Researchers are using AI to write fake science</h3><p>AI-written papers are becoming increasingly common in academia. To add to this, many are even using LLMs to peer-review journal submissions.</p><p>A <a href="https://www.nature.com/articles/d41586-023-03974-8">study published in Nature</a> showed that more than 10,000 research papers were retracted in 2023, setting a new record. AI-generated fraud was found to be one of the increasingly common reasons for these retractions.</p><p><em>What happens when results from these fake research papers find their way to products that directly affect your life?</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!nGiF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!nGiF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!nGiF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:334815,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!nGiF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!nGiF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbacd2eed-740c-4533-8d5f-f53e520e71c9_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3><strong>9. Yes, AI might now send you to jail</strong></h3><blockquote><p><em>"AI won't take over for people. But people who use AI will take the place of those who don't.&#8221;</em> &#8212; Sam Altman</p></blockquote><p>Many have held on to this quote quite passionately despite knowing what AI systems can and cannot do. This includes lawyers as well.</p><p>There have been more than 900 cases worldwide where AI-generated hallucinated content appeared in <a href="https://www.damiencharlotin.com/hallucinations/">court filings</a>.</p><p>According to another <a href="https://hai.stanford.edu/news/ai-trial-legal-models-hallucinate-1-out-6-or-more-benchmarking-queries">study by Stanford HAI</a>, general-purpose AI chatbots hallucinate between 58% and 82% of the time in responses to legal queries.</p><p>A single hallucination like this is enough to cause a wrong legal judgment and send someone to jail.</p><p><em>God bless us all.</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!V7AO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!V7AO!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!V7AO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1187212,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!V7AO!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!V7AO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd879d377-134d-4367-acc5-eb7e0bd2de71_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>10. The world lives in the dark as AI electrifies</h3><p><a href="https://www.iea.org/commentaries/access-to-electricity-stagnates-leaving-globally-730-million-in-the-dark">Data from the International Energy Agency (IEA)</a> shows that 730 million people worldwide still lacked access to electricity in 2024.</p><p><a href="https://www.iea.org/reports/energy-and-ai/energy-demand-from-ai">Global data centers</a>, on the other hand, consumed 415 TWh of electricity in 2024 and are projected to reach 945 TWh by 2030. This is more than Japan's total electricity consumption today!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xJ4s!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xJ4s!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xJ4s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:418101,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xJ4s!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!xJ4s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbcb7c6e4-1856-4f85-befa-b9b793aeb921_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>11. Deepfakes are here to steal your identity</h3><p>Deepfakes sounded like a thing of the future a few years ago, but such a dystopian future is already here.</p><p>According to a report from the <a href="https://www.europarl.europa.eu/RegData/etudes/BRIE/2025/775855/EPRS_BRI(2025)775855_EN.pdf">European Parliament Research Service (EPRS)</a>, Deepfakes grew from ~500,000 in 2023 to a projected 8 million in 2025. </p><p>98% of them were non-consensual pornography and mostly targeted women.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JVJ1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JVJ1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JVJ1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:218539,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188602140?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JVJ1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 424w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 848w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 1272w, https://substackcdn.com/image/fetch/$s_!JVJ1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8a97715e-65e8-437d-8ef2-6b6cc024588c_2811x2811.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p>If you loved reading this article and found it valuable, restack to share it with others and <strong>earn some referral rewards</strong>. &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/the-dark-side-of-ai?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/the-dark-side-of-ai?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p><strong>Don&#8217;t forget to connect with Emma on <a href="https://www.linkedin.com/in/emma-mcilwaine-b8a5abab/">LinkedIn</a> and reach out to work with her.</strong></p><div class="embedded-publication-wrap" data-attrs="{&quot;id&quot;:2895345,&quot;name&quot;:&quot;Creative Bites&quot;,&quot;logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!b2Ac!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab77ac8-9203-40ea-a97d-18d56cfd6266_1054x1054.png&quot;,&quot;base_url&quot;:&quot;https://emmathedesigner.substack.com&quot;,&quot;hero_text&quot;:&quot;As a full-time graphic designer, I serve up bite-sized insights, tips, and inspiration to fuel your creative journey.&quot;,&quot;author_name&quot;:&quot;Emma Mac&quot;,&quot;show_subscribe&quot;:true,&quot;logo_bg_color&quot;:null,&quot;language&quot;:&quot;en&quot;}" data-component-name="EmbeddedPublicationToDOMWithSubscribe"><div class="embedded-publication show-subscribe"><a class="embedded-publication-link-part" native="true" href="https://emmathedesigner.substack.com?utm_source=substack&amp;utm_campaign=publication_embed&amp;utm_medium=web"><img class="embedded-publication-logo" src="https://substackcdn.com/image/fetch/$s_!b2Ac!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab77ac8-9203-40ea-a97d-18d56cfd6266_1054x1054.png" width="56" height="56"><span class="embedded-publication-name">Creative Bites</span><div class="embedded-publication-hero-text">As a full-time graphic designer, I serve up bite-sized insights, tips, and inspiration to fuel your creative journey.</div><div class="embedded-publication-author-name">By Emma Mac</div></a><form class="embedded-publication-subscribe" method="GET" action="https://emmathedesigner.substack.com/subscribe?"><input type="hidden" name="source" value="publication-embed"><input type="hidden" name="autoSubmit" value="true"><input type="email" class="email-input" name="email" placeholder="Type your email..."><input type="submit" class="button primary" value="Subscribe"></form></div></div><div><hr></div><p>If you want to get even more value from this publication, <strong>become a paid subscriber</strong> to get access to posts like the following and more:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129380;<a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#128051; <a href="https://www.intoai.pub/p/deepseek-r1-beats-openais-o1-revealing">DeepSeek-R1 Beats OpenAI&#8217;s o1, Revealing All Its Training Secrets Out In The Open</a></p></li></ul><p>Bye now! &#128075;&#127995;</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Learn to train deep learning models on multiple GPUs]]></title><description><![CDATA[Part 1: Understand Distributed Data Parallel in depth, visually]]></description><link>https://www.intoai.pub/p/distributed-data-parallel</link><guid isPermaLink="false">https://www.intoai.pub/p/distributed-data-parallel</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Thu, 26 Feb 2026 13:06:02 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!YyVB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ikoI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ikoI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ikoI!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png" width="1200" height="533.2417582417582" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:1946790,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189120446?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ikoI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!ikoI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1df93aac-206e-455e-85b2-5c76092f8ae1_6912x3072.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>We have long ago moved away from training production-grade deep learning models on a single machine.</p><p>Today, large deep learning models such as LLMs are usually trained for months on distributed clusters with multiple GPUs. This is because a single machine usually lacks the memory and computing power needed to process massive training datasets and train a model.</p><p>For example, <a href="https://huggingface.co/meta-llama/Llama-3.1-405B">Llama 3.1 models</a> were pre-trained on around 15 trillion data tokens&nbsp;and required&nbsp;a total of&nbsp;39.3M GPU hours of computation on <a href="https://www.nvidia.com/en-gb/data-center/h100/">NVIDIA H100-80GB GPUs</a>. <em>That&#8217;s a big number!</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YyVB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YyVB!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 424w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 848w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 1272w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YyVB!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif" width="480" height="376" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:376,&quot;width&quot;:480,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YyVB!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 424w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 848w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 1272w, https://substackcdn.com/image/fetch/$s_!YyVB!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd6f05c92-9baf-4986-94a2-b3e2fca7578f_480x376.gif 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">LLM training in the times of the Flintstones (<a href="https://media0.giphy.com/media/v1.Y2lkPTc5MGI3NjExN3FtNDV1ZGQ5azZtcGkxNDUybGpjcWk0aWw5NGx3dHQ0dmw1b2JueCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/tWhSt6azAiDYhW9VhG/giphy.gif">Source</a>)</figcaption></figure></div><p>In this two-part lesson, we will learn about an algorithm called <strong>Distributed Data Parallel (DDP)</strong>, and then get our hands dirty by writing some PyTorch code to train a model across multiple GPUs using it.</p><p><em>Let&#8217;s begin!</em></p><div><hr></div><p>Btw, I&#8217;m running a <strong>50% off sale</strong> on the annual subscription to &#8216;Into AI&#8217;, valid <strong>ONLY for the next 2 days.</strong> </p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab your discount now!&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab your discount now!</span></a></p><p>Claim your discount <strong>NOW</strong> and get access to articles like these, and so much more:</p><ul><li><p>&#127752; <a href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch">Build and train an LLM from scratch</a></p></li><li><p>&#127853; <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch">Build a Mixture-of-Experts (MoE) LLM from Scratch</a></p></li><li><p>&#128126; <a href="https://www.intoai.pub/p/pytorch-essentials">20 PyTorch Concepts, Explained Simply</a></p></li><li><p><strong>&#127856;</strong> <a href="https://www.intoai.pub/p/hierarchical-reasoning-model">Hierarchical Reasoning Model: A Deep Dive</a></p></li><li><p>&#129380;<a href="https://www.intoai.pub/p/building-your-first-ai-agent">Building Your First AI Agent</a></p></li><li><p>&#128051; <a href="https://www.intoai.pub/p/deepseek-r1-beats-openais-o1-revealing">DeepSeek-R1 Beats OpenAI&#8217;s o1, Revealing All Its Training Secrets Out In The Open</a></p></li></ul><div><hr></div><h3>But first, what is <strong>Distributed Data Parallel (DDP)</strong>? </h3><p>Distributed data parallel (DDP) is a popular algorithm for training models across multiple GPUs. </p><p>The algorithm uses&nbsp;<strong>Data parallelism</strong>,&nbsp;which means splitting the training dataset across multiple GPUs so each GPU processes a different portion of it in parallel during model training.</p><p>Here are the steps that take place in DDP:</p><ol><li><p>The model is first replicated on each GPU along with a copy of the optimizer.</p></li><li><p>The training dataset is split across GPUs using <a href="https://docs.pytorch.org/docs/stable/data.html">DistributedSampler</a>, which ensures that each GPU trains on a unique data subset/ batch per epoch.</p></li><li><p>Each GPU runs a forward pass on its subset of the training dataset to calculate loss, and stores the activations</p></li><li><p>Each GPU runs a backward pass using the activations and loss, and computes gradients</p></li><li><p>All GPUs communicate and share their calculated gradients with each other </p></li><li><p>The gradients are averaged across the GPUs using an algorithm called <strong><a href="https://docs.nvidia.com/doca/archive/doca-v2.2.1/allreduce/index.html">AllReduce</a></strong></p></li><li><p>Averaged gradients are communicated back to each GPU</p></li><li><p>Each GPU updates its copy of model weights using the averaged gradients. As each GPU starts with the same model weights and updates them using the same averaged gradients, all the resulting model copies remain in sync with other GPUs.</p></li><li><p>The next batch of training data is loaded, and the cycle repeats from step 3 till the training epoch ends.</p></li><li><p>In case of training for multiple epochs, the entire training dataset is reshuffled using <a href="https://docs.pytorch.org/docs/stable/data.html">DistributedSampler</a> and is iterated over again.</p></li><li><p>Once all training epochs end, the trained model is returned.</p></li></ol><p><strong>DDP makes the training process even more efficient by running&nbsp;steps 4 and 5 at the same time. </strong></p><p>It doesn't wait for gradients from all layers to be computed before sharing them. Instead, it sends each layer's gradients to the other GPUs as soon as they're ready, so that communication occurs in parallel with computation.</p><div><hr></div><h3>Understand <strong>DDP Visually</strong></h3><p>Here is an example of a 3-GPU distributed training setup using DDP. </p><p>In the illustrations shown below:</p><ul><li><p>Each GPU starts with its own copy of the model (and optimizer).</p></li><li><p>Then, each GPU is given different samples/ batch from the training dataset</p></li><li><p>Each GPU runs a forward and backward pass, computing loss and gradients</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Q-HV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Q-HV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 424w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 848w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 1272w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Q-HV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png" width="1252" height="1244" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1244,&quot;width&quot;:1252,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:165521,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189120446?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Q-HV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 424w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 848w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 1272w, https://substackcdn.com/image/fetch/$s_!Q-HV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2664c9b-4b78-4e13-8481-55dc29f5fbbc_1252x1244.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p>Following this, gradients from all GPUs are aggregated and averaged using the <strong>AllReduce</strong> operation.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DtnB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DtnB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 424w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 848w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DtnB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png" width="1182" height="1202" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1202,&quot;width&quot;:1182,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:132067,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189120446?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DtnB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 424w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 848w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 1272w, https://substackcdn.com/image/fetch/$s_!DtnB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff85e9930-e15c-44a7-9c53-de8d3690f757_1182x1202.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p>After this step, the averaged gradients are broadcast back to all the GPUs. </p></li><li><p>Each GPU then updates its copy of model weights using them. This keeps all copies of the model between different GPUs synchronized.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YGJ-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YGJ-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 424w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 848w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 1272w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YGJ-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png" width="1232" height="1150" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1150,&quot;width&quot;:1232,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:134575,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/189120446?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YGJ-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 424w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 848w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 1272w, https://substackcdn.com/image/fetch/$s_!YGJ-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a10145b-d256-4f5c-824b-f19414ddeb74_1232x1150.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Once the last training epoch ends, the trained model is returned.</p><div><hr></div><h3>But DDP Isn&#8217;t Perfect</h3><p>DDP speeds up and efficiently scales model training, but it is not a one-stop solution to distributed model training.</p><p>DDP involves each GPU storing at least:</p><ul><li><p>Model parameters</p></li><li><p>Optimizer and its state</p></li><li><p>Model activations after forward pass</p></li><li><p>Gradients after backward pass</p></li></ul><p>Consider the <strong>NVIDIA H100 (80GB)</strong> GPU, which is widely used for model training. </p><p>If we train an <a href="https://huggingface.co/meta-llama/Meta-Llama-3-8B">8B-parameter Llama 3 model</a> on it using the <a href="https://www.intoai.pub/i/146844148/taking-adam-a-step-ahead-with-adamw">AdamW optimizer</a>, with full-precision training (which means that all numbers used are <a href="https://en.wikipedia.org/wiki/Single-precision_floating-point_format">FP32</a> or require 4 bytes):</p><ul><li><p>The model parameters will require: 8B &#215; 4 bytes = 32 GB</p></li><li><p>Gradients will require: 8B &#215; 4 bytes = 32 GB</p></li><li><p>The AdamW optimizer state (first and second moments) will require: 8B &#215; 4 bytes x 2 = 64 GB</p></li><li><p>Alongside these, activations will require 10-20 GB, with the use of <a href="https://pytorch.org/blog/activation-checkpointing-techniques/">activation checkpointing</a></p></li><li><p>Other unaccounted computation/ communication overheads might require another 2-3 GB</p></li></ul><p>The total requirements of 151 GB far exceed the GPU's VRAM capacity of 80 GB. This means that <strong>training such multi-billion-parameter models isn&#8217;t possible with DDP.</strong></p><p>This is when we move towards using Model Parallelism techniques, which involve splitting model parameters across multiple GPUs. This is a topic of future lessons in the newsletter.</p><div><hr></div><p>In the next part of this lesson on DDP, we will learn how to train a deep learning model using it on free-to-use cloud GPUs.</p><p>If you loved reading it and found it valuable, restack to share it with others. &#10084;&#65039;</p><p>Don&#8217;t forget to grab your <strong>50% off sale</strong> on the annual subscription to &#8216;Into AI&#8217;, valid <strong>ONLY for the next 2 days</strong>. </p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab your discount now!&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab your discount now!</span></a></p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (15-21 February 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-15-21-february</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-15-21-february</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 23 Feb 2026 14:47:18 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!MioJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MioJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MioJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 424w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 848w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 1272w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MioJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png" width="724.65625" height="322.0141440590659" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/936f543a-e466-429c-a9f1-874424c79734_13824x6144.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:724.65625,&quot;bytes&quot;:15063606,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MioJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 424w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 848w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 1272w, https://substackcdn.com/image/fetch/$s_!MioJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F936f543a-e466-429c-a9f1-874424c79734_13824x6144.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#127873;</strong> Before we start, I want to offer you a <strong>35% DISCOUNT </strong>on the annual subscription to &#8216;Into AI&#8217; as part of the <strong>Fab Feb offer</strong>. It&#8217;s only valid till the <strong>end of February</strong>, so don&#8217;t wait up!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab Your Offer NOW&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab Your Offer NOW</span></a></p><div><hr></div><h4>1. Gemini 3.1 Pro</h4><p>Google released <strong>Gemini 3.1 Pro</strong>, a new model in the Gemini 3 series with advanced reasoning capabilities for complex problem-solving. </p><p>3.1 Pro achieved a verified score of 77.1% on ARC-AGI-2. This is more than double the reasoning performance of its predecessor, 3 Pro.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pl9V!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pl9V!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 424w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 848w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 1272w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pl9V!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif" width="1456" height="1246" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1246,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Side-by-side comparison of different benchmarks for AI models.&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Side-by-side comparison of different benchmarks for AI models." title="Side-by-side comparison of different benchmarks for AI models." srcset="https://substackcdn.com/image/fetch/$s_!pl9V!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 424w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 848w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 1272w, https://substackcdn.com/image/fetch/$s_!pl9V!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e4b5aeb-3d90-45af-860b-89a1dc08024b_3000x2568.gif 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this release <a href="https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-1-pro/">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>2. <strong>MemoryArena</strong></h4><p>Current evaluation methods of agents test either one of the following in isolation:</p><ul><li><p><strong>Memorization</strong>: Recall of past conversations, but not capturing how memory is used to guide future decisions</p></li><li><p><strong>Action</strong>: Agent acting in single-session tasks without the need for long-term memory</p></li></ul><p>In reality, memorization and action are tightly coupled. <strong>MemoryArena</strong> is a new benchmark that assesses this setting by testing agents' memory in multi-session tasks that require remembering and using information from earlier sessions to succeed in later ones. </p><p>MemoryArena covers four domains: </p><ul><li><p>Bundled Web Shopping</p></li><li><p>Group Travel Planning</p></li><li><p>Progressive Web Search, and </p></li><li><p>Sequential formal reasoning in math/ physics</p></li></ul><p>Results show that while many agents might perform flawlessly on existing memory benchmarks, such as LoCoMo, they do not perform well on MemoryArena.</p><p>Alongside this, external memory and RAG systems do not consistently improve the performance of these agentic systems. This reveals a major issue that current benchmarks do not adequately test agents in real-world settings.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!scsi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!scsi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 424w, https://substackcdn.com/image/fetch/$s_!scsi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 848w, https://substackcdn.com/image/fetch/$s_!scsi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 1272w, https://substackcdn.com/image/fetch/$s_!scsi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!scsi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png" width="1456" height="748" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:748,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:256303,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!scsi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 424w, https://substackcdn.com/image/fetch/$s_!scsi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 848w, https://substackcdn.com/image/fetch/$s_!scsi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 1272w, https://substackcdn.com/image/fetch/$s_!scsi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb757d014-4acf-4928-9b64-7e1a6079142f_1678x862.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2602.16313">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>3. Fast KV Compaction via Attention Matching</h4><p>Scaling LLMs to long contexts is bottlenecked by the size of the KV cache. To address this, in deployed models, long contexts are usually compacted using summarization or token dropping, which degrades performance.</p><p>This research paper addresses this problem by introducing a method for fast context compaction in latent space using <strong>Attention Matching</strong>.</p><p>This constructs compact keys and values to reproduce attention outputs and preserve attention mass at a per-KV-head level.</p><p>The approach breaks down into efficient subproblems with many closed-form solutions, which enables KV compaction ratios of up to 50x in seconds with minimal loss in quality. These results are orders of magnitude faster than previous latent-space methods.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YeUV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YeUV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 424w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 848w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 1272w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YeUV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png" width="1270" height="766" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:766,&quot;width&quot;:1270,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:307119,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YeUV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 424w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 848w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 1272w, https://substackcdn.com/image/fetch/$s_!YeUV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffac05fa9-9522-4dea-9a07-43a786e213bb_1270x766.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2602.16284">using this link</a>.</p><div><hr></div><h4>4. Towards a Science of AI Agent Reliability</h4><p>The difference between AI agents' performance on benchmarks and in the real world is quite striking. This is because current benchmarks test agent behavior using a single success metric (accuracy), which hides many of their critical operational flaws.</p><p>This research paper addresses this by proposing 12 specific metrics that test agent reliability along four key dimensions:</p><ul><li><p>Consistency</p></li><li><p>Robustness</p></li><li><p>Predictability</p></li><li><p>Safety</p></li></ul><p>An evaluation of 14 agentic models across two complementary benchmarks shows that recent capability gains yield only small improvements in reliability, with consistency and predictability lagging the most.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!X3OY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!X3OY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 424w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 848w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 1272w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!X3OY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png" width="1456" height="891" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:891,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:454362,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!X3OY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 424w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 848w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 1272w, https://substackcdn.com/image/fetch/$s_!X3OY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb6dcaae9-6254-40e0-b704-142e8cbaefa9_2174x1330.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.16666v1">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>5. Experiential Reinforcement Learning</h4><p>This research paper introduces <strong>Experiential Reinforcement Learning (ERL)</strong>, a new training approach for LLMs that includes an explicit experience&#8211;reflection&#8211;consolidation loop into the RL process.</p><p>For a given task, ERL has a model generate its first attempt. The model then receives environmental feedback and generates a self-reflection conditioned on this attempt, which guides it to produce a more refined second attempt.</p><p>Both attempts and reflections are optimized with RL, while successful second attempts are internalized via self-distillation, so the model learns to reproduce improved behavior directly from the original input without self-reflection.</p><p>Across sparse-reward control environments and agentic reasoning benchmarks, ERL improves learning efficiency and final performance over strong RL baselines. </p><p>The gains are of up to +81% in complex multi-step environments and up to +11% in tool-using reasoning tasks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!N34k!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!N34k!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 424w, https://substackcdn.com/image/fetch/$s_!N34k!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 848w, https://substackcdn.com/image/fetch/$s_!N34k!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 1272w, https://substackcdn.com/image/fetch/$s_!N34k!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!N34k!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png" width="1456" height="684" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:684,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:367615,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!N34k!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 424w, https://substackcdn.com/image/fetch/$s_!N34k!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 848w, https://substackcdn.com/image/fetch/$s_!N34k!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 1272w, https://substackcdn.com/image/fetch/$s_!N34k!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F90b97658-f64f-4e5c-8231-5e2f8d0a43ee_2606x1224.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.arxiv.org/abs/2602.13949">using this link</a>.</p><div><hr></div><h4>6. Calibrate-Then-Act</h4><p>Solving complex problems requires LLM-based agents to interact with an environment to gather information.</p><p>This research paper introduces a framework that enables agents to balance this exploration cost and uncertainty when interacting with an environment, thereby performing more optimal environment exploration.</p><p>Multiple tasks are formalized as sequential decision-making problems under uncertainty. Each problem has a latent environment state that can be reasoned about via a prior. The framework, called&nbsp;<strong>Calibrate-Then-Act (CTA),</strong> then feeds the LLM this additional context to enable it to act more optimally.</p><p>Results on information-seeking QA and on a simplified coding task show that making cost-benefit tradeoffs explicit with CTA can help agents discover more optimal decision-making strategies.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Y-wg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Y-wg!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 424w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 848w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 1272w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Y-wg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png" width="1300" height="772" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:772,&quot;width&quot;:1300,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:377353,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Y-wg!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 424w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 848w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 1272w, https://substackcdn.com/image/fetch/$s_!Y-wg!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2228201b-664c-49aa-943c-ccfa35b7f907_1300x772.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.16699v2">using this link</a>.</p><div><hr></div><h4>7. <strong>Evaluating Collective Behaviour of Hundreds of LLM Agents</strong></h4><p>This research paper presents a scalable evaluation framework to study how large groups of LLM-based autonomous agents behave in social dilemmas.</p><p>With this framework, LLMs generate strategies encoded as algorithms, enabling their inspection prior to deployment.</p><p>Experiments show that newer models often lead to worse social outcomes than older ones when agents focus on personal gain instead of group benefits. </p><p>By using cultural evolution simulations to model user selection of agents, researchers find a significant risk of poor social outcomes, especially as the relative benefit of cooperation decreases, and population size grows. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iDt6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iDt6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 424w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 848w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 1272w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iDt6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png" width="1456" height="777" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:777,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1122524,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iDt6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 424w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 848w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 1272w, https://substackcdn.com/image/fetch/$s_!iDt6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4063ccc4-15b2-4916-8cfb-63f1a2d60943_2370x1264.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2602.16662">using this link</a>.</p><div><hr></div><h4>8. ZUNA</h4><p>This research paper from Zyphra introduces <strong>ZUNA</strong>, an <a href="https://github.com/Zyphra/zuna">open-sourced</a> foundation model with 380 million parameters for EEG (electroencephalogram) brain-computer interface (BCI) data. </p><p>The model is designed to denoise, reconstruct, and upsample EEG signals from different electrode layouts. </p><p>It is built as a diffusion autoencoder that can take a subset of EEG channels and reliably recover missing or corrupted ones. This improves spatial resolution and signal quality for research and practical uses. </p><p>ZUNA can work across various datasets and hardware, and serves as a foundation for non-invasive &#8220;thought-to-text&#8221; systems and for better understanding neural signals. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ypaD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ypaD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 424w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 848w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 1272w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ypaD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png" width="1456" height="732" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:732,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:968116,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ypaD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 424w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 848w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 1272w, https://substackcdn.com/image/fetch/$s_!ypaD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3397c001-5e36-4c2f-9f8e-09b90d293442_2006x1008.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.zyphra.com/zuna-technical-paper">using this link</a>.</p><div><hr></div><h4>9. KLong</h4><p>This research paper introduces <strong>KLong</strong>, an open-source LLM agent trained to solve extremely long-horizon tasks. These are tasks that would exceed the context window without context management and involve long-running experiments such as those from <a href="https://github.com/openai/mle-bench">MLE-bench</a> and <a href="https://openai.com/index/paperbench/">PaperBench</a>.</p><p>Experiments show that KLong greatly enhances performance on long-horizon benchmarks compared to baselines.</p><p>Notably, KLong (106B) beats Kimi K2 Thinking (1T) by 11.28% on PaperBench. This performance improvement generalizes to other coding benchmarks, such as SWE-bench-Verified and MLE-bench.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YVxX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YVxX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 424w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 848w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 1272w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YVxX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png" width="1456" height="805" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:805,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:389945,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YVxX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 424w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 848w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 1272w, https://substackcdn.com/image/fetch/$s_!YVxX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F89ad473c-902d-4c00-98b7-3ab24603cc31_2102x1162.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/pdf/2602.17547">using this link</a>.</p><div><hr></div><h4>10. Adam Improves Muon: Adaptive Moment Estimation with Orthogonalized Momentum</h4><p>This research paper presents <strong>NAMO</strong> and <strong>NAMO-D</strong>, two new optimization algorithms that combine the benefits of orthogonal updates (as seen in Muon optimizer) with the noise adjustment of Adam-type methods.</p><p>NAMO preserves momentum orthogonality and introduces a single adaptive learning rate, which enhances Muon without any significant extra cost. </p><p>Its diagonal variant, NAMO-D, further improves performance by allowing neuron-wise noise adaptation through a clamped diagonal scaling.</p><p>The research paper includes theoretical convergence guarantees in both deterministic and stochastic environments, showing that both optimizers adapt well to gradient noise.</p><p>Empirical results from GPT-2 pretraining also show that NAMO, particularly NAMO-D, surpasses both AdamW and Muon baselines.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7p-y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7p-y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 424w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 848w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 1272w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7p-y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png" width="1456" height="635" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:635,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:364517,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/188780254?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7p-y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 424w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 848w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 1272w, https://substackcdn.com/image/fetch/$s_!7p-y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F981add0f-dacc-4f22-a1a6-02fe76d3ca19_2436x1062.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research paper <a href="https://arxiv.org/abs/2602.17080">using this link</a>.</p><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others to <strong>earn referral rewards.</strong> &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-15-21-february?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-15-21-february?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p><em><strong>Don&#8217;t forget to become a paid subscriber with a 35% discount on the annual subscription. Offer ends this month!</strong></em></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?coupon=9e599873&quot;,&quot;text&quot;:&quot;Grab your offer NOW!&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?coupon=9e599873"><span>Grab your offer NOW!</span></a></p><p>You can also check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Build a Personal Health Companion with Mem0 and CrewAI]]></title><description><![CDATA[Learn to build a multi-agentic personal health companion with long-term memory using Mem0 and CrewAI.]]></description><link>https://www.intoai.pub/p/build-a-personal-health-companion</link><guid isPermaLink="false">https://www.intoai.pub/p/build-a-personal-health-companion</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 20 Feb 2026 11:06:31 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!4GXM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4GXM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4GXM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 424w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 848w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 1272w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4GXM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png" width="1456" height="642" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:642,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:501388,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187960730?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4GXM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 424w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 848w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 1272w, https://substackcdn.com/image/fetch/$s_!4GXM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcf142e18-5853-4049-82ea-4f64557d0670_2548x1124.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Hey there! </p><p>In the last few lessons, we learned about AI memory.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;4a4118be-4663-49e0-8757-e82386c8e52f&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Memory For AI Agents: Everything That You Need To Know (Part-1)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-04T10:30:36.271Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/ai-memory&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186643994,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;5b2950d3-3e70-4009-8401-cc4cfe45d7a1&quot;,&quot;caption&quot;:&quot;In the last part of this series, we learned about what memory is and why modern AI systems need it.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;What is Mem0 and how does it work&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-10T11:25:54.373Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/mem0&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186846978,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>In this lesson, we will apply these concepts to build a multi-agent system that can monitor personal health issues/comorbidities and act as a long-term health companion.</p><p>The tools that we will use are:</p><ul><li><p><strong>CrewAI</strong>: Framework for building multi-agent systems</p></li><li><p><strong>Mem0</strong>: Persistent memory layer for LLMs</p></li></ul><p>If you&#8217;re completely new to building agents with CrewAI, the following lesson will help you get started.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;8c1ddcd1-9459-475e-998e-868f2e56a4b2&quot;,&quot;caption&quot;:&quot;Agentic AI is on the boom.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Building Your First AI Agent (That Will Actually Improve You As An AI Engineer)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-03-15T15:21:47.164Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!H3AJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffee00599-5694-4e8b-aef2-706a238bf612_1792x1024.jpeg&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/building-your-first-ai-agent&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:159124901,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:28,&quot;comment_count&quot;:6,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p><em>Let&#8217;s begin!</em></p><div><hr></div><h3><strong>Overview of the Multi-Agent System</strong></h3><p>We intend to build a health companion that manages chronic health conditions/ comorbidities using three specialized AI agents that work together <a href="https://docs.crewai.com/en/learn/sequential-process">sequentially</a>.</p><p>These agents are:</p>
      <p>
          <a href="https://www.intoai.pub/p/build-a-personal-health-companion">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (8-14 February 26)]]></title><description><![CDATA[The top 11 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-8-14-february</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-8-14-february</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Mon, 16 Feb 2026 12:30:12 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Uz2e!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Uz2e!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Uz2e!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Uz2e!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png" width="1456" height="647" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2203465,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Uz2e!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 424w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 848w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!Uz2e!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d9c4133-917e-4984-b2af-6550bd3c5847_6912x3072.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Before we start, I want to introduce you to my book, <strong>LLMs In 100 Images</strong>, which is a collection of 100 easy-to-follow visuals that explain the most important concepts you need to master to understand LLMs today.</p><p><strong><a href="https://bamaniaashish.gumroad.com/l/llmbook/EARLYBIRD">Grab your copy today at a 20% discount using this link.</a></strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_ysS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png" width="1456" height="728" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:728,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!_ysS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 424w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 848w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1272w, https://substackcdn.com/image/fetch/$s_!_ysS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac3d2c1-d1f6-4907-8bf1-b2ca3719e2f9_2400x1200.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4>1. <strong>GPT&#8209;5.2 derives a new result in theoretical physics</strong></h4><p>OpenAI&#8217;s GPT&#8209;5.2 helped develop a new theoretical physics result by proposing a simpler formula for a previously overlooked gluon scattering amplitude. </p><p>This configuration, once thought to be zero under standard assumptions, actually has a non-zero amplitude in a special kinematic regime. </p><p>The formula first proposed by GPT&#8209;5.2 was later verified using another internal OpenAI model and by human researchers. </p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qdFh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qdFh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 424w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 848w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 1272w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qdFh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png" width="1456" height="218" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:218,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:51550,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qdFh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 424w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 848w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 1272w, https://substackcdn.com/image/fetch/$s_!qdFh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7df9ef77-49e2-46d2-9cb7-3cf0e18b4d73_2042x306.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Read more about this achievement <a href="https://openai.com/index/new-result-theoretical-physics/">using this link</a>.</p><p>Read the physics research paper <a href="https://arxiv.org/abs/2602.12176">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>2. <strong>Towards Autonomous Mathematics Research</strong></h4><p>This research from Google DeepMind introduces <strong>Aletheia</strong>, a math research agent designed to move beyond Olympiad-style problem-solving into professional mathematics research, where the big challenges include navigating vast literature and constructing long-term proofs.</p><p>Aletheia can iteratively generate, verify, and revise complete solutions in natural language and uses:</p><ol><li><p>An advanced version of <strong><a href="https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-deep-think/">Gemini Deep Think</a></strong> for hard reasoning</p></li><li><p>A new inference-time scaling law that extends beyond Olympiad-level problems</p></li><li><p>Intensive tool use to handle real mathematical research complexities</p></li></ol><p>With these, Athelia has achieved three significant research milestones:</p><ol><li><p>A <a href="https://arxiv.org/abs/2601.23245">fully AI-generated paper</a> on calculating structure constants in arithmetic geometry, called &#8216;Eigenweights&#8217;.</p></li><li><p>A <a href="https://arxiv.org/abs/2601.22401">human-AI collaborative paper</a> that proves bounds on systems of interacting particles called &#8216;independent sets&#8217;.</p></li><li><p>A <a href="https://arxiv.org/abs/2601.22401">semi-autonomous evaluation</a> of 700 open problems in Bloom&#8217;s Erd&#337;s Conjectures database, which led to <a href="https://arxiv.org/abs/2601.21442">autonomous solutions</a> for four open questions.</p></li></ol><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-RcG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-RcG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 424w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 848w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 1272w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-RcG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg" width="848" height="477" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:477,&quot;width&quot;:848,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Flowchart titled \&quot;Aletheia: Powered by Gemini Deep Think\&quot; illustrating a multi-step solution verification process. A central linear path moves from \&quot;Problem\&quot; to \&quot;Generator,\&quot; then \&quot;Candidate solution,\&quot; then \&quot;Verifier,\&quot; and finally \&quot;Final output.\&quot; The Verifier acts as a decision point with three feedback loops: Correct: Moves directly to \&quot;Final output.\&quot; Minor fixes needed: Routes back through a \&quot;Reviser\&quot; to update the \&quot;Candidate solution.\&quot; Critically flawed: Triggers a red dashed line back to the \&quot;Generator\&quot; to restart the process.&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Flowchart titled &quot;Aletheia: Powered by Gemini Deep Think&quot; illustrating a multi-step solution verification process. A central linear path moves from &quot;Problem&quot; to &quot;Generator,&quot; then &quot;Candidate solution,&quot; then &quot;Verifier,&quot; and finally &quot;Final output.&quot; The Verifier acts as a decision point with three feedback loops: Correct: Moves directly to &quot;Final output.&quot; Minor fixes needed: Routes back through a &quot;Reviser&quot; to update the &quot;Candidate solution.&quot; Critically flawed: Triggers a red dashed line back to the &quot;Generator&quot; to restart the process." title="Flowchart titled &quot;Aletheia: Powered by Gemini Deep Think&quot; illustrating a multi-step solution verification process. A central linear path moves from &quot;Problem&quot; to &quot;Generator,&quot; then &quot;Candidate solution,&quot; then &quot;Verifier,&quot; and finally &quot;Final output.&quot; The Verifier acts as a decision point with three feedback loops: Correct: Moves directly to &quot;Final output.&quot; Minor fixes needed: Routes back through a &quot;Reviser&quot; to update the &quot;Candidate solution.&quot; Critically flawed: Triggers a red dashed line back to the &quot;Generator&quot; to restart the process." srcset="https://substackcdn.com/image/fetch/$s_!-RcG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 424w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 848w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 1272w, https://substackcdn.com/image/fetch/$s_!-RcG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d66e9b2-6f3d-4e75-8cfc-aff09f30ab7d_848x477.svg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://deepmind.google/blog/accelerating-mathematical-and-scientific-discovery-with-gemini-deep-think/">Source</a></figcaption></figure></div><p>Read more about the research <a href="https://arxiv.org/abs/2602.10177">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4><strong>3. The Devil Behind Moltbook</strong></h4><p>The research paper examines LLM-based self-evolving multi-agent AI systems and proves a key limitation in their ability to stay safely aligned while improving on their own.</p><p>The authors introduce the &#8220;<strong>Self-Evolution trilemma</strong>,&#8221; which states that an agent society cannot achieve continuous self-evolution, complete isolation, and consistent safety all at the same time. </p><p>Using an information-theoretic approach, they show that isolated self-evolution creates statistical blind spots that lead to irreversible safety lapses when compared to anthropic (human-like) value distributions. </p><p>Observations from the Moltbook agent community and other closed systems support this prediction. </p><p>The authors also claim that this loss of safety is inherent, not just a problem to fix, and stress the need for external oversight or new mechanisms to preserve safety and address these risks.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DvgY!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DvgY!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 424w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 848w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 1272w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DvgY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png" width="1456" height="704" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:704,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2098151,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DvgY!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 424w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 848w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 1272w, https://substackcdn.com/image/fetch/$s_!DvgY!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa97d29b6-8a3d-4186-ba6f-e32c2a9ce2ec_2610x1262.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://www.arxiv.org/abs/2602.09877">using this link</a>.</p><div><hr></div><h4>4. SOTA ARC-AGI-2 Results with Agentica</h4><p>The open-sourced <strong>Agentica</strong> framework by Symbolica combines code-mode agents and recursive language model (RLM) workflows using a persistent Python REPL environment.</p><p>This setup allows agents to propose, implement, test, and continuously refine programs to tackle ARC-AGI tasks more effectively than standard chain-of-thought prompting. </p><p>A significant innovation in their approach is <strong>Recursive delegation</strong>, in which agents dynamically create sub-agents to manage subtasks, allowing exploration in both depth and breadth. </p><p>Experimental results show that Agentica agents significantly improve benchmark scores for models such as Opus 4.6, GPT-5.2, and Opus 4.5, with the highest score reaching 85.28% on the public ARC-AGI-2 set.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!L5G_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!L5G_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 424w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 848w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 1272w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!L5G_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png" width="1456" height="885" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:885,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:221123,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!L5G_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 424w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 848w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 1272w, https://substackcdn.com/image/fetch/$s_!L5G_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc89bdb68-9a7f-4f53-9181-2756a647fdb6_2070x1258.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://www.symbolica.ai/blog/arcgentica">using this link</a>.</p><p>The GitHub repository associated with Agentica <a href="https://github.com/symbolica-ai/agentica-python-sdk?tab=readme-ov-file">can be found here</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>5. AttentionRetriever</h4><p>Existing RAG methods struggle with long document retrieval due to poor context-awareness, causal dependence, and scope issues. </p><p>This research paper introduces <strong>AttentionRetriever</strong>, a new long-document retrieval model that shows that transformer attention layers can act as effective retrievers without any extra training. </p><p>This is achieved by using attention scores and entity-aware embeddings to capture context and scope in long texts. </p><p>Results show that AttentionRetriever can outperform previous models on long document retrieval benchmarks while being as efficient as standard dense retrievers.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CAj6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CAj6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 424w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 848w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 1272w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CAj6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png" width="1456" height="696" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:696,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:843994,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!CAj6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 424w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 848w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 1272w, https://substackcdn.com/image/fetch/$s_!CAj6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54bd4bc7-0110-4216-b9e3-08ef49fb8ab6_2426x1160.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://www.arxiv.org/abs/2602.12278">using this link</a>.</p><div><hr></div><h4>6. <strong>VLA-JEPA</strong></h4><p>This research paper presents <strong>VLA-JEPA</strong>, a new pretraining framework for vision-language-action (VLA) policies that improves how agents learn to act from large-scale video data.</p><p>It tackles a major issue in current latent-action objectives, which often learn irrelevant pixel changes, by using leakage-free state prediction in a latent world model where the target encoder generates latent representations from future frames, while the student pathway only sees the current observation. With such an approach, future information is used purely as a supervision target. </p><p>In contrast, by predicting in latent space rather than raw pixels, VLA-JEPA learns dynamic abstractions that are resistant to camera motion and background noise. </p><p>Experiments show that VLA-JEPA consistently improves generalization and robustness on benchmarks such as LIBERO, LIBERO-Plus, SimplerEnv, and real-world robot manipulation tasks, compared with existing methods.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_VA_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_VA_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 424w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 848w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 1272w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_VA_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png" width="1456" height="879" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:879,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:329677,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_VA_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 424w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 848w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 1272w, https://substackcdn.com/image/fetch/$s_!_VA_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd98bc7fc-5aaf-4c89-a45f-006cb1cbc42e_2124x1282.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://www.arxiv.org/abs/2602.10098">using this link</a>.</p><div><hr></div><h4>7. LaCy</h4><p>Small language models (SLMs) have limited capacity to learn due to their small number of parameters. This often leads them to generate factually incorrect responses. To address this, SLMs are given access to external sources, such as larger models, documents, or databases, using a <code>&lt;CALL&gt;</code> mechanism.</p><p>This research paper from Apple explores which tokens an SLM can and should learn during pretraining, versus which ones it should delegate via the <code>&lt;CALL&gt;</code> mechanism.</p><p>Findings indicate that this choice goes beyond merely minimizing loss, and some high-loss tokens are actually reasonable alternative continuations of the training text and should be learned by the SLM instead of being delegated.</p><p>Based on this, the authors introduce <strong>LaCy</strong>, a new pretraining method that uses an augmented loss signal from the spaCy grammar parser to determine which tokens the SLM should learn to delegate to prevent factual errors and which are safe to learn and predict even under high losses.</p><p>Experiments indicate that LaCy improves factual accuracy, reflected by higher FactScores, when SLMs are combined with larger models. It also outperforms other strategies, such as Rho-trained or LLM-judge-trained SLMs, while being simpler and more cost-effective.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!SBgi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!SBgi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 424w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 848w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 1272w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!SBgi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png" width="1456" height="527" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:527,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:254066,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!SBgi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 424w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 848w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 1272w, https://substackcdn.com/image/fetch/$s_!SBgi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ef3773e-9f3b-4431-a578-308d59c0ec1a_2542x920.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://arxiv.org/abs/2602.12005">using this link</a>.</p><div><hr></div><h4>8. <strong>Fine-Tuning GPT-5 for GPU Kernel Generation</strong></h4><p>This research paper addresses the challenge of developing efficient GPU kernels essential to high-performance AI systems, which are difficult to write because of complex hardware architectures and limited high-quality training data. </p><p>The authors show that fine-tuning GPT-5 with RL in <strong><a href="https://makora.com/">Makora</a>&#8217;s</strong> environment significantly improves its ability to generate correct Triton GPU kernels. </p><p>In the single-attempt setting, the fine-tuned GPT-5 improves kernel correctness from 43.7% to 77.0% and increases the fraction of problems outperforming TorchInductor from 14.8% to 21.8% compared to the baseline model, while exceeding prior state-of-the-art models on KernelBench.</p><p>When integrated into a complete coding agent, the fine-tuned model successfully solves nearly all problems in the expanded KernelBench suite and outperforms the PyTorch TorchInductor compiler on 72.9% of problems with a geometric mean speedup of 2.12X. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-cfR!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-cfR!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 424w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 848w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-cfR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png" width="1456" height="1014" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1014,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:283495,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-cfR!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 424w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 848w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 1272w, https://substackcdn.com/image/fetch/$s_!-cfR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f645214-1e9e-4d7f-a08a-df36c757473a_1780x1240.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://arxiv.org/abs/2602.11000">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h4>9. <strong>GPT&#8209;5.3&#8209;Codex&#8209;Spark</strong></h4><p>OpenAI has launched <strong>GPT-5.3-Codex-Spark</strong>, a research preview of an ultra-fast, real-time coding model optimized for interactive development workflows.</p><p>Spark is a smaller and highly responsive version of GPT-5.3-Codex. It is designed to deliver nearly instant code generation and edits, producing over 1,000 tokens per second with a 128k context window. This enables developers to interrupt, redirect, and improve outputs in real time. </p><p>The model runs on Cerebras&#8217; Wafer-Scale Engine 3 hardware, making it OpenAI&#8217;s first production deployment on non-NVIDIA chips to achieve ultra-low latency.</p><div class="native-video-embed" data-component-name="VideoPlaceholder" data-attrs="{&quot;mediaUploadId&quot;:&quot;8cec5310-daf3-4e72-b229-f3b10a3c7188&quot;,&quot;duration&quot;:null}"></div><p>Read more about the research <a href="https://openai.com/index/introducing-gpt-5-3-codex-spark/">using this link</a>.</p><div><hr></div><h4>10. GLM-5</h4><p>GLM-5 is Z.ai&#8217;s new flagship foundation model, built for complex systems engineering and long-horizon agentic tasks.</p><p>The model uses a Mixture-of-Experts (MoE) architecture with 744B parameters (40B active) and achieves excellent performance amongst open-source models in reasoning, coding, and agent execution.</p><p>Its capabilities are competitive with top proprietary models such as Anthropic&#8217;s Claude Opus series and OpenAI&#8217;s GPT-5.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!BOyV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!BOyV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 424w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 848w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 1272w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!BOyV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png" width="1456" height="991" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:991,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!BOyV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 424w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 848w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 1272w, https://substackcdn.com/image/fetch/$s_!BOyV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9a29f610-1c26-4f3b-aa20-f3fabf2286ad_4239x2884.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://z.ai/blog/glm-5">using this link</a>.</p><div><hr></div><h4>11. <strong>Gaia2</strong></h4><p>This research paper from Meta SuperIntelligence Labs introduces <strong>Gaia2</strong>, a new benchmark suite that evaluates LLM-based agents in realistic, dynamic, and asynchronous environments in which the world changes independently of the agent&#8217;s actions (unlike previous static or synchronized benchmarks). </p><p>Gaia2 tests agents on time constraints, noisy, evolving events, ambiguity resolution, and multi-agent collaboration, using a write-action verifier that supports action-level evaluation and reinforcement learning from verifiable rewards. </p><p>Initial evaluations on the benchmark show that no model performs well in all tasks. Proprietary models like GPT-5 (high) achieve the highest overall scores but struggle with tasks that are time-sensitive. </p><p>Other models vary in speed, cost, and reliability, revealing important trade-offs in reasoning, efficiency, and adaptability to real-world situations. </p><p>Gaia2 is built on a consumer environment with the open-source <a href="https://facebookresearch.github.io/meta-agents-research-environments/">Agents Research Environments platform</a> to support the development, benchmarking, and training of next-generation practical AI agent systems.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!67HM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!67HM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 424w, https://substackcdn.com/image/fetch/$s_!67HM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 848w, https://substackcdn.com/image/fetch/$s_!67HM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 1272w, https://substackcdn.com/image/fetch/$s_!67HM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!67HM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png" width="1456" height="761" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:761,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:439646,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187905475?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!67HM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 424w, https://substackcdn.com/image/fetch/$s_!67HM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 848w, https://substackcdn.com/image/fetch/$s_!67HM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 1272w, https://substackcdn.com/image/fetch/$s_!67HM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed555bf6-2777-46d2-92b9-4c0209f289be_2316x1210.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about the research <a href="https://arxiv.org/abs/2602.11964">using this link</a>.</p><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others. &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-8-14-february?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-8-14-february?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><div><hr></div><p>You can also check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Build Grouped Query Attention (GQA) From Scratch]]></title><description><![CDATA[Learn to implement Grouped Query Attention (GQA) from scratch, the de facto standard for modern LLMs like Llama, Mistral, GPT-OSS, and Qwen.]]></description><link>https://www.intoai.pub/p/grouped-query-attention</link><guid isPermaLink="false">https://www.intoai.pub/p/grouped-query-attention</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Fri, 13 Feb 2026 11:29:10 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!yeZi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!yeZi!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!yeZi!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 424w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 848w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 1272w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!yeZi!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png" width="1200" height="494.5054945054945" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:600,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:306885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186401144?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!yeZi!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 424w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 848w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 1272w, https://substackcdn.com/image/fetch/$s_!yeZi!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67cd33dc-da65-4494-824a-32cbeb0bcb55_2500x1030.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#127873; Here's your 25% off the annual membership. Unlock every lesson in the newsletter and accelerate your AI engineering career. Claim your discount now!</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><p>In the previous lessons on 'Into AI&#8217;, we learned to implement Self-attention and progressively updated it to reach Causal Multi-head Self-attention (MHA), which is used in LLMs like <a href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">GPT-2</a>.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;dfa06913-9354-4d0d-962d-403ed338c8e9&quot;,&quot;caption&quot;:&quot;Get access to all high-quality posts and hands-on tutorials on this publication by becoming a paid subscriber today. &#129309;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Self-Attention From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-11-02T13:24:10.415Z&quot;,&quot;cover_image&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/48b2de6e-e072-4a4f-b876-b3bc22d92bb7_2082x614.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/self-attention&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:177703387,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;e1952fd0-e618-42ca-ab9e-045f7a1a6c41&quot;,&quot;caption&quot;:&quot;We built Self-attention from scratch in a previous lesson in this series on ML interview essentials.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Multi-Head Self-Attention From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-11-25T18:46:04.851Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!AM-L!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F800d1cc4-4930-47c0-ad9f-63d3a1e91960_2758x918.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/mha&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:179946235,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;530bf77e-4cf7-46d4-9ca2-9969845c1acf&quot;,&quot;caption&quot;:&quot;We built Self-attention and Multi-head Self-attention from scratch in the previous lessons in this series on AI/ ML Engineering interview essentials.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Causal Multi-Head Self-Attention From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-14T08:59:44.858Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!ZFdp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa16d6507-5071-452b-9613-c6f0e944f0ca_2246x756.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/causal-mha&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:181497944,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:6,&quot;comment_count&quot;:2,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>MHA is a powerful architectural component that enables LLMs to process tokens in parallel, making fast autoregressive text generation possible. But it comes with high memory costs and latency.</p><p>We then learned how to build <strong>Multi-Query Attention (MQA)</strong>, an efficient variant of MHA that significantly reduces memory usage and speeds up inference.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;26db531e-54e5-4aa7-baa8-05de51250f72&quot;,&quot;caption&quot;:&quot;In the previous lessons on 'Into AI&#8217;, we learned to implement Self-attention and progressively updated it to reach Causal Multi-head Self-attention (MHA), which is used in LLMs like GPT-2.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Multi-Query Attention (MQA) From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-22T10:53:35.485Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!TqO4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c2c1820-2664-45b9-b5df-b7409f91f49f_2084x1100.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/multi-query-attention&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:184301901,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>It&#8217;s now time to improve the architecture further and learn to build <strong>Grouped Query Attention (GQA)</strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3>What is <strong>Grouped Query Attention (GQA)</strong>?</h3><p>In the conventional attention or Multi-Head Attention (MHA), each head has its own Q, K, and V vectors.</p><p><strong>In Multi-Query Attention (MQA), all attention heads share the same key (K) and value (V) vectors, while each head still has its own query (Q) vector.</strong></p><p>This reduces the&nbsp;<a href="https://www.intoai.pub/p/llm-optimizations">KV cache</a>&nbsp;size and memory requirements, leading to much faster inference.</p><p>MQA is fast and efficient, but it isn&#8217;t perfect. Because all heads share the same Keys and Values, this reduces the effectiveness of learned representations, making an LLM less expressive. </p><p><em><strong>What&#8217;s the fix then?</strong></em>: Finding the right balance and building something that fits between MHA and MQA.</p><p>And finding that sweet spot is what led to GQA.</p><p>Grouped Query Attention (GQA) comes from a Google Research paper titled &#8220;<em><a href="https://arxiv.org/abs/2305.13245">GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints</a></em>&#8221;, published in 2023.</p><p>It has since become the de facto standard for modern LLMs and is used across all popular LLM families, including Llama, Mistral, GPT-OSS, Qwen, and more.</p><p><strong>In GQA, Query (Q) heads are partitioned into groups, and each group shares one Key (K) and Value (V).</strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QwS6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QwS6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QwS6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f964abf0-6764-41cd-bec9-74a788757808_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:46206,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186401144?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QwS6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!QwS6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff964abf0-6764-41cd-bec9-74a788757808_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Grouped-Query Attention visualised (Image from author&#8217;s book titled &#8216;<a href="https://bamaniaashish.gumroad.com/l/llmbook/">LLMs In 100 Images</a>&#8217;)</figcaption></figure></div><p>Here is how GQA compares to MHA and MQA.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VDON!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VDON!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 424w, https://substackcdn.com/image/fetch/$s_!VDON!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 848w, https://substackcdn.com/image/fetch/$s_!VDON!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 1272w, https://substackcdn.com/image/fetch/$s_!VDON!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VDON!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png" width="1456" height="630" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:630,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:146141,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186401144?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!VDON!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 424w, https://substackcdn.com/image/fetch/$s_!VDON!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 848w, https://substackcdn.com/image/fetch/$s_!VDON!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 1272w, https://substackcdn.com/image/fetch/$s_!VDON!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bc41525-4292-4347-961f-8f27d0324f5e_2510x1086.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>A comparison between the three:</p><blockquote><p>MHA&#8217;s generation quality is the best, but its inference speed is the lowest.</p><p>MQA has the fastest inference speed and efficiency but the lowest generation quality.</p><p>GQA balances MHA&#8217;s generation quality and MQA&#8217;s inference speed/ efficiency to fit between these two. </p></blockquote><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3><strong>Revisiting </strong>Multi-Head Attention (<strong>MHA)</strong></h3><p>Multi-head Attention (MHA) is implemented with causal masking in LLMs, and this architecture is called Causal Multi-head Self-attention. This prevents an LLM from peeking at future tokens while generating a token.</p><p>(Note that Causal Multi-Head Self-Attention is usually just called MHA in the popular literature.)</p><p>We have previously learned how to build Causal Multi-Head Self-Attention from scratch. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;982f074c-80e0-44be-bf8d-67135f17f2c0&quot;,&quot;caption&quot;:&quot;We built Self-attention from scratch in a previous lesson in this series on ML interview essentials.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Multi-Head Self-Attention From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-11-25T18:46:04.851Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!AM-L!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F800d1cc4-4930-47c0-ad9f-63d3a1e91960_2758x918.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/mha&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:179946235,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Here&#8217;s a quick recap of how it works.</p><p>The following steps are performed in this class representing MHA:</p><ul><li><p>Input embeddings are projected into query (Q), key (K), and value (V) vectors</p></li><li><p><strong>Q, K, and V are split across multiple attention heads</strong></p></li><li><p>Scaled dot-product attention is computed, and a causal mask is applied</p></li><li><p>Softmax is applied to obtain attention weight</p></li><li><p>Attention weights are used to compute a weighted sum of values (V) for each head</p></li><li><p>The head outputs are merged and passed through a final output projection matrix</p></li></ul><pre><code><code>import torch
import torch.nn as nn
import math

class CausalMultiHeadSelfAttention(nn.Module):
  def __init__(self, embedding_dim, num_heads):
    super().__init__()

    # Check if embedding_dim is divisible by num_heads
    assert embedding_dim % num_heads == 0, &#8220;embedding_dim must be divisible by num_heads&#8221;

    # Embedding dimension
    self.embedding_dim = embedding_dim

    # Number of total heads
    self.num_heads = num_heads

    # Dimension of each head
    self.head_dim = embedding_dim // num_heads

    # Linear projection matrices for Q, K, V (to be split later for each head)
    self.W_q = nn.Linear(embedding_dim, embedding_dim, bias = False)
    self.W_k = nn.Linear(embedding_dim, embedding_dim, bias = False)
    self.W_v = nn.Linear(embedding_dim, embedding_dim, bias = False)

    # Linear projection matrix to produce final output
    self.W_o = nn.Linear(embedding_dim, embedding_dim, bias = False)

  def _split_heads(self, x):
    """
    Transforms input embeddings from
    [batch_size, sequence_length, embedding_dim]
    to
    [batch_size, num_heads, sequence_length, head_dim]
    """
    batch_size, sequence_length, embedding_dim = x.shape

    # Split embedding_dim into (num_heads, head_dim)
    x = x.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)

    # Reorder and return the intended shape
    return x.transpose(1,2)

  def _merge_heads(self, x):
    """
    Transforms inputs from
    [batch_size, num_heads, sequence_length, head_dim]
    to
    [batch_size, sequence_length, embedding_dim]
    """
    batch_size, num_heads, sequence_length, head_dim = x.shape

    # Move sequence_length back before num_heads in the shape
    x = x.transpose(1,2)

    # Merge (num_heads, head_dim) back into embedding_dim
    embedding_dim = num_heads * head_dim
    x = x.reshape(batch_size, sequence_length, embedding_dim)

    return x

  def forward(self, x):
    batch_size, sequence_length, embedding_dim = x.shape

    # Compute Q, K, V
    Q = self.W_q(x)
    K = self.W_k(x)
    V = self.W_v(x)

    # Split them into multiple heads
    Q = self._split_heads(Q)
    K = self._split_heads(K)
    V = self._split_heads(V)

    # Calculate scaled dot-product attention
    attn_scores = Q @ K.transpose(-2, -1)

    # Scale
    attn_scores = attn_scores / math.sqrt(self.head_dim)

    # Apply causal mask (prevent attending to future positions)
    causal_mask = torch.tril(torch.ones(sequence_length, sequence_length, device=x.device)) # Create lower triangular matrix

    causal_mask = causal_mask.view(1, 1, sequence_length, sequence_length)  # Add batch and head dimensions
     
    attn_scores = attn_scores.masked_fill(causal_mask == 0, float(&#8217;-inf&#8217;)) # Mask out future positions by setting their scores to -inf

    # Apply softmax to get attention weights
    attn_weights = torch.softmax(attn_scores, dim = -1)

    # Multiply attention weights by values (V)
    weighted_values = attn_weights @ V

    # Merge head outputs
    merged_heads_output = self._merge_heads(weighted_values)

    # Final output
    output = self.W_o(merged_heads_output)

    return output </code></code></pre><div><hr></div><h3>Revisiting Multi-Query Attention (MQA)</h3><p>We have also learned how to implement MQA from scratch in a previous lesson.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;ea92a3c0-eb01-4c88-9f1d-3b19c65a00b6&quot;,&quot;caption&quot;:&quot;In the previous lessons on 'Into AI&#8217;, we learned to implement Self-attention and progressively updated it to reach Causal Multi-head Self-attention (MHA), which is used in LLMs like GPT-2.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build Multi-Query Attention (MQA) From Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-22T10:53:35.485Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!TqO4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5c2c1820-2664-45b9-b5df-b7409f91f49f_2084x1100.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/multi-query-attention&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:184301901,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>The following steps take place in the class representing MQA:</p><ul><li><p>Input embeddings are projected into query (Q), key (K), and value (V) vectors</p></li><li><p>Q is split across multiple attention heads</p></li><li><p><strong>K and V are not split, but instead, a single shared K and V are used across all heads</strong></p></li><li><p>Scaled dot-product attention is computed between each head&#8217;s Q and the shared K, and a causal mask is applied</p></li><li><p>Softmax is applied to obtain attention weights</p></li><li><p>Attention weights are used to compute a weighted sum of the shared values (V) for each head</p></li><li><p>The head outputs are merged and passed through a final output projection matrix</p></li></ul><pre><code><code>import torch
import torch.nn as nn
import math

class MultiQueryAttention(nn.Module):
  def __init__(self, embedding_dim, num_heads):
    super().__init__()

    # Check if embedding_dim is divisible by num_heads
    assert embedding_dim % num_heads == 0, "embedding_dim must be divisible by num_heads"

    # Embedding dimension
    self.embedding_dim = embedding_dim

    # Number of total heads
    self.num_heads = num_heads

    # Dimension of each head
    self.head_dim = embedding_dim // num_heads

    # Linear projection matrix for Query
    self.W_q = nn.Linear(embedding_dim, embedding_dim, bias = False)

    # Linear projection matrices for Key and Value
    self.W_k = nn.Linear(embedding_dim, self.head_dim, bias = False)
    self.W_v = nn.Linear(embedding_dim, self.head_dim, bias = False)

    # Linear projection matrix to produce final output
    self.W_o = nn.Linear(embedding_dim, embedding_dim, bias = False)

  # Splits Query into multiple heads
  def _split_heads(self, x):
    """
    Transforms input embeddings from
    [batch_size, sequence_length, embedding_dim]
    to
    [batch_size, num_heads, sequence_length, head_dim]
    """
    batch_size, sequence_length, embedding_dim = x.shape

    # Split embedding_dim into (num_heads, head_dim)
    x = x.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)

    # Reorder and return the intended shape
    return x.transpose(1,2)

  # Merge heads back together
  def _merge_heads(self, x):
    """
    Transforms inputs from
    [batch_size, num_heads, sequence_length, head_dim]
    to
    [batch_size, sequence_length, embedding_dim]
    """
    batch_size, num_heads, sequence_length, head_dim = x.shape

    # Move sequence_length back before num_heads in the shape
    x = x.transpose(1,2)

    # Merge (num_heads, head_dim) back into embedding_dim
    embedding_dim = num_heads * head_dim
    x = x.reshape(batch_size, sequence_length, embedding_dim)

    return x

  # Forward pass
  def forward(self, x):
    batch_size, sequence_length, embedding_dim = x.shape

    # Compute Q, K, V
    Q = self.W_q(x) # [batch_size, sequence_length, embedding_dim]
    K = self.W_k(x) # [batch_size, sequence_length, head_dim]
    V = self.W_v(x) # [batch_size, sequence_length, head_dim]

    # Split Q into multiple heads
    Q = self._split_heads(Q) # [batch_size, num_heads, sequence_length, head_dim]

    # Add head dimension to K and V (Broadcast across all heads)
    K = K.unsqueeze(1) # [batch_size, 1, sequence_length, head_dim]
    V = V.unsqueeze(1) # [batch_size, 1, sequence_length, head_dim]

    # Calculate scaled dot-product attention
    attn_scores = Q @ K.transpose(-2, -1)
    attn_scores = attn_scores / math.sqrt(self.head_dim)

    # Create lower triangular matrix as causal masking
    causal_mask = torch.tril(torch.ones(sequence_length, sequence_length, device=x.device))

    # Add batch_size and num_heads dimensions
    causal_mask = causal_mask.view(1, 1, sequence_length, sequence_length)

    # Mask out future positions by setting their scores to -inf
    attn_scores = attn_scores.masked_fill(causal_mask == 0, float('-inf'))

    # Apply softmax to get attention weights
    attn_weights = torch.softmax(attn_scores, dim = -1)

    # Multiply attention weights by V
    weighted_values = attn_weights @ V

    # Merge head outputs
    merged_heads_output = self._merge_heads(weighted_values)

    # Obtain final output
    output = self.W_o(merged_heads_output)

    return output</code></code></pre><div><hr></div><h3>Building Grouped-Query Attention (<strong>GQA)</strong></h3><p>As discussed earlier:</p><blockquote><p>In GQA, Queries (Q) are partitioned into groups, and each group shares one Key (K) and Value (V).</p></blockquote><p>Following this, in GQA:</p><ul><li><p>We have <code>num_heads</code> query heads (same as MHA)</p></li><li><p>But only <code>num_groups</code> keys/ value heads (fewer than <code>num_heads</code>)</p></li><li><p>Each KV group is reused by a fixed number of query heads</p></li></ul><p>Next, we will implement the <code>GroupedQueryAttention</code> class. This represents the causal version of GQA, which is often simply referred to as GQA in the popular literature.</p><ul><li><p>We begin by setting the dimensions and creating projection matrices.</p></li></ul><pre><code>import torch 
import torch.nn as nn
import math 

class GroupedQueryAttention(nn.Module):
  def __init__(self, embedding_dim, num_heads, num_groups):
    super().__init__()

    # Check if embedding_dim is divisible by num_heads
    assert embedding_dim % num_heads == 0, "embedding_dim must be divisible by num_heads"

    # Check if num_heads is divisible by num_groups
    # (Each group must be shared by the same number of heads)
    assert num_heads % num_groups == 0, "num_heads must be divisible by num_groups"

    # Embedding dimension
    self.embedding_dim = embedding_dim

    # Number of total query heads
    self.num_heads = num_heads

    # Dimension of each head
    self.head_dim = embedding_dim // num_heads

    # Number of KV groups
    self.num_groups = num_groups

    # Number of query heads per KV group
    self.group_size = num_heads // num_groups

    # Linear projection matrix for query 
    self.W_q = nn.Linear(embedding_dim, embedding_dim, bias=False)

    # Linear projection matrices for key and value
    self.W_k = nn.Linear(embedding_dim, self.num_groups * self.head_dim, bias=False)
    self.W_v = nn.Linear(embedding_dim, self.num_groups * self.head_dim, bias=False)

    # Linear projection matrix to produce final output
    self.W_o = nn.Linear(embedding_dim, embedding_dim, bias=False)</code></pre><p>Note how the output size for the key and value projections is <code>num_groups * head_dim</code> and not <code>embedding_dim</code>, as in the case of the query projection.</p><p>This means that we create only <code>num_groups</code> sets of K and V vectors per token, each shared across multiple query heads.</p><ul><li><p>Next, we define a function that splits the query into multiple heads.</p></li></ul><pre><code>  # Splits Q into multiple heads
  def _split_heads(self, x):
    """
    Transforms input embeddings from
    [batch_size, sequence_length, embedding_dim]
    to
    [batch_size, num_heads, sequence_length, head_dim]
    """
    batch_size, sequence_length, embedding_dim = x.shape

    # Split embedding_dim into (num_heads, head_dim)
    x = x.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)

    # Reorder and return the intended shape
    return x.transpose(1, 2)</code></pre><ul><li><p>Then, we define a function that splits K and V into <code>num_groups</code> heads</p></li></ul><pre><code>  # Splits K or V into num_groups heads
  def _split_groups(self, x):
    """
    Transforms K/V from
    [batch_size, sequence_length, num_groups * head_dim]
    to
    [batch_size, num_groups, sequence_length, head_dim]
    """
    batch_size, sequence_length, _ = x.shape

    x = x.reshape(batch_size, sequence_length, self.num_groups, self.head_dim)
    return x.transpose(1, 2)</code></pre><ul><li><p>Following this comes the function that merges the head outputs back.</p></li></ul><pre><code>  # Merge heads back together
  def _merge_heads(self, x):
    """
    Transforms inputs from
    [batch_size, num_heads, sequence_length, head_dim]
    to
    [batch_size, sequence_length, embedding_dim]
    """
    batch_size, num_heads, sequence_length, head_dim = x.shape

    # Move sequence_length back before num_heads in the shape
    x = x.transpose(1, 2)

    # Merge (num_heads, head_dim) back into embedding_dim
    embedding_dim = num_heads * head_dim
    x = x.reshape(batch_size, sequence_length, embedding_dim)

    return x</code></pre><ul><li><p>Finally, the forward pass is defined as follows.</p></li></ul><pre><code>  # Forward pass
  def forward(self, x):
    batch_size, sequence_length, embedding_dim = x.shape

    # Compute Q, K, V
    Q = self.W_q(x)  # [batch_size, sequence_length, embedding_dim]
    K = self.W_k(x)  # [batch_size, sequence_length, num_groups * head_dim]
    V = self.W_v(x)  # [batch_size, sequence_length, num_groups * head_dim]

    # Split Q into multiple heads
    Q = self._split_heads(Q)  # [batch_size, num_heads, sequence_length, head_dim]

    # Split K and V into num_groups heads
    K = self._split_groups(K)  # [batch_size, num_groups, sequence_length, head_dim]
    V = self._split_groups(V)  # [batch_size, num_groups, sequence_length, head_dim]

    # Expand K and V so each KV group is shared across multiple query heads
    K = K.repeat_interleave(self.group_size, dim=1)
    V = V.repeat_interleave(self.group_size, dim=1)

    # Calculate scaled dot-product attention
    attn_scores = Q @ K.transpose(-2, -1)
    attn_scores = attn_scores / math.sqrt(self.head_dim)

    # Create lower triangular matrix as causal masking
    causal_mask = torch.tril(torch.ones(sequence_length, sequence_length, device=x.device))

    # Add batch_size and num_heads dimensions
    causal_mask = causal_mask.view(1, 1, sequence_length, sequence_length)

    # Mask out future positions by setting their scores to -inf
    attn_scores = attn_scores.masked_fill(causal_mask == 0, float('-inf'))

    # Apply softmax to get attention weights
    attn_weights = torch.softmax(attn_scores, dim=-1)

    # Multiply attention weights by V
    weighted_values = attn_weights @ V  # [batch_size, num_heads, sequence_length, head_dim]

    # Merge head outputs
    merged_heads_output = self._merge_heads(weighted_values)

    # Obtain final output
    output = self.W_o(merged_heads_output)

    return output</code></pre><p>The following operations take place during the forward pass.</p><ul><li><p>Input embeddings are first projected into query (Q), key (K), and value (V) vectors</p></li><li><p><strong>The query vector is split across multiple attention heads</strong></p></li><li><p><strong>The shared keys and values are split into </strong><code>num_groups</code><strong> heads</strong></p></li><li><p>Each KV group is broadcast across multiple query heads</p></li><li><p>Each head computes scaled dot-product attention scores between Q and K</p></li><li><p>A causal mask is applied to prevent the model from attending to future tokens</p></li><li><p>Softmax converts the masked scores into attention weights</p></li><li><p>Attention weights are used to compute a weighted sum of values (V)</p></li><li><p>The outputs from all heads are merged into a single representation</p></li><li><p>A final output projection matrix produces the layer&#8217;s output</p></li></ul><p>The complete code for the <code>GroupedQueryAttention</code> class is as follows. </p><pre><code>import torch
import torch.nn as nn
import math

class GroupedQueryAttention(nn.Module):
  def __init__(self, embedding_dim, num_heads, num_groups):
    super().__init__()

    # Check if embedding_dim is divisible by num_heads
    assert embedding_dim % num_heads == 0, "embedding_dim must be divisible by num_heads"

    # Check if num_heads is divisible by num_groups
    # (Each group must be shared by the same number of heads)
    assert num_heads % num_groups == 0, "num_heads must be divisible by num_groups"

    # Embedding dimension
    self.embedding_dim = embedding_dim

    # Number of total query heads
    self.num_heads = num_heads

    # Dimension of each head
    self.head_dim = embedding_dim // num_heads

    # Number of KV groups
    self.num_groups = num_groups

    # Number of query heads per KV group
    self.group_size = num_heads // num_groups

    # Linear projection matrix for query
    self.W_q = nn.Linear(embedding_dim, embedding_dim, bias=False)

    # Linear projection matrices for key and value
    self.W_k = nn.Linear(embedding_dim, self.num_groups * self.head_dim, bias=False)
    self.W_v = nn.Linear(embedding_dim, self.num_groups * self.head_dim, bias=False)

    # Linear projection matrix to produce final output
    self.W_o = nn.Linear(embedding_dim, embedding_dim, bias=False)

  # Splits Q into multiple heads
  def _split_heads(self, x):
    """
    Transforms input embeddings from
    [batch_size, sequence_length, embedding_dim]
    to
    [batch_size, num_heads, sequence_length, head_dim]
    """
    batch_size, sequence_length, embedding_dim = x.shape

    # Split embedding_dim into (num_heads, head_dim)
    x = x.reshape(batch_size, sequence_length, self.num_heads, self.head_dim)

    # Reorder and return the intended shape
    return x.transpose(1, 2)

  # Splits K or V into num_groups heads
  def _split_groups(self, x):
    """
    Transforms K/V from
    [batch_size, sequence_length, num_groups * head_dim]
    to
    [batch_size, num_groups, sequence_length, head_dim]
    """
    batch_size, sequence_length, _ = x.shape

    x = x.reshape(batch_size, sequence_length, self.num_groups, self.head_dim)
    return x.transpose(1, 2)

  # Merge heads back together
  def _merge_heads(self, x):
    """
    Transforms inputs from
    [batch_size, num_heads, sequence_length, head_dim]
    to
    [batch_size, sequence_length, embedding_dim]
    """
    batch_size, num_heads, sequence_length, head_dim = x.shape

    # Move sequence_length back before num_heads in the shape
    x = x.transpose(1, 2)

    # Merge (num_heads, head_dim) back into embedding_dim
    embedding_dim = num_heads * head_dim
    x = x.reshape(batch_size, sequence_length, embedding_dim)

    return x

  # Forward pass
  def forward(self, x):
    batch_size, sequence_length, embedding_dim = x.shape

    # Compute Q, K, V
    Q = self.W_q(x)  # [batch_size, sequence_length, embedding_dim]
    K = self.W_k(x)  # [batch_size, sequence_length, num_groups * head_dim]
    V = self.W_v(x)  # [batch_size, sequence_length, num_groups * head_dim]

    # Split Q into multiple heads
    Q = self._split_heads(Q)  # [batch_size, num_heads, sequence_length, head_dim]

    # Split K and V into num_groups heads
    K = self._split_groups(K)  # [batch_size, num_groups, sequence_length, head_dim]
    V = self._split_groups(V)  # [batch_size, num_groups, sequence_length, head_dim]

    # Expand K and V so each KV group is shared across multiple query heads
    K = K.repeat_interleave(self.group_size, dim=1)
    V = V.repeat_interleave(self.group_size, dim=1)

    # Calculate scaled dot-product attention
    attn_scores = Q @ K.transpose(-2, -1)
    attn_scores = attn_scores / math.sqrt(self.head_dim)

    # Create lower triangular matrix as causal masking
    causal_mask = torch.tril(torch.ones(sequence_length, sequence_length, device=x.device))

    # Add batch_size and num_heads dimensions
    causal_mask = causal_mask.view(1, 1, sequence_length, sequence_length)

    # Mask out future positions by setting their scores to -inf
    attn_scores = attn_scores.masked_fill(causal_mask == 0, float('-inf'))

    # Apply softmax to get attention weights
    attn_weights = torch.softmax(attn_scores, dim=-1)

    # Multiply attention weights by V
    weighted_values = attn_weights @ V  # [batch_size, num_heads, sequence_length, head_dim]

    # Merge head outputs
    merged_heads_output = self._merge_heads(weighted_values)

    # Obtain final output
    output = self.W_o(merged_heads_output)

    return output</code></pre><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><div><hr></div><h3><strong>MHA, MQA &amp; GQA Visualised</strong></h3><ul><li><p><strong>Multi-Head Attention (MHA)</strong></p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!l57b!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!l57b!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 424w, https://substackcdn.com/image/fetch/$s_!l57b!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 848w, https://substackcdn.com/image/fetch/$s_!l57b!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 1272w, https://substackcdn.com/image/fetch/$s_!l57b!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!l57b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png" width="1456" height="484" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:484,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!l57b!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 424w, https://substackcdn.com/image/fetch/$s_!l57b!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 848w, https://substackcdn.com/image/fetch/$s_!l57b!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 1272w, https://substackcdn.com/image/fetch/$s_!l57b!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7194e4d0-998a-4713-88d7-07802951fe88_2794x928.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MHA calculations (&#8216;n&#8217; in the image is the same as &#8216;num_heads&#8217; in the code)</figcaption></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2Y6r!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2Y6r!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 424w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 848w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 1272w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2Y6r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png" width="1456" height="423" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:423,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!2Y6r!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 424w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 848w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 1272w, https://substackcdn.com/image/fetch/$s_!2Y6r!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F247d3546-4bd5-4ad5-8682-f4dea45f57a3_2160x628.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MHA weighted Values calculations (Causal mask not shown in the image)</figcaption></figure></div><ul><li><p><strong>Multi-Query Attention (MQA)</strong></p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bVYS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bVYS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 424w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 848w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 1272w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bVYS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png" width="1456" height="602" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:602,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bVYS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 424w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 848w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 1272w, https://substackcdn.com/image/fetch/$s_!bVYS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56beedd9-6265-46b8-8836-138ce0b04a4b_2424x1002.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MQA operations (&#8216;n&#8217; in the image is the same as &#8216;num_heads&#8217; in the code)</figcaption></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZQod!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZQod!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 424w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 848w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 1272w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZQod!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png" width="1456" height="463" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:463,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZQod!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 424w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 848w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 1272w, https://substackcdn.com/image/fetch/$s_!ZQod!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd71a53d9-ae60-4b5b-a3e4-87c9329cc314_2300x732.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">MQA weighted Values calculation (Causal mask not shown in the image)</figcaption></figure></div><ul><li><p><strong>Grouped-Query Attention (GQA)</strong></p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!y8T4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!y8T4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 424w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 848w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 1272w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!y8T4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png" width="1456" height="504" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:504,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:212180,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186401144?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!y8T4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 424w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 848w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 1272w, https://substackcdn.com/image/fetch/$s_!y8T4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F02182c91-8cab-49fe-ab82-39ec6ca010a3_2866x992.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">GQA operations (&#8216;n&#8217; and &#8216;g&#8217; in the image are the same as &#8216;num_heads&#8217; and &#8216;num_groups&#8217; in the code, respectively)</figcaption></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!l6xJ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!l6xJ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 424w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 848w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 1272w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!l6xJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png" width="1456" height="376" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:376,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:123433,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186401144?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!l6xJ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 424w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 848w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 1272w, https://substackcdn.com/image/fetch/$s_!l6xJ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24673f09-fa53-49ca-b4e3-b1edb77bec6a_2314x598.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">GQA weighted Values calculation (Causal mask not shown in the image)</figcaption></figure></div><div><hr></div><h3><strong>Testing Out GQA</strong></h3><p>Let&#8217;s use GQA to process some randomly initialized input embeddings as follows.</p><pre><code># Hyperparameters
batch_size = 1
sequence_length = 4
embedding_dim = 12
num_heads = 6
num_groups = 2  # must divide num_heads

# Create input embeddings
input_embeddings = torch.rand(batch_size, sequence_length, embedding_dim)

# Initialize GQA
gqa = GroupedQueryAttention(embedding_dim, num_heads, num_groups)

# Forward pass
output = gqa(input_embeddings)</code></pre><p>Note how GQA preserves the shape of the input (<code>[batch_size, sequence_length, embedding_dim]</code>) just like MHA and MQA. This allows us to stack multiple GQA layers within a Transformer block.</p><pre><code>print("Input shape:", input_embeddings.shape)
print("Output shape:", output.shape)

"""
Output:
Input shape: torch.Size([1, 4, 12])
Output shape: torch.Size([1, 4, 12])
"""</code></pre><p>There&#8217;s another interesting finding that you must know about:</p><ul><li><p>When <code>num_groups = 1</code>, only 1 KV head is shared across all Q heads. This means the architecture becomes MQA.</p></li><li><p>When <code>num_groups = num_heads</code>, each Q head gets its own KV head. This means the architecture becomes MHA.</p></li><li><p>When <code>1 &lt; num_groups &lt; num_heads</code>, the architecture is GQA.</p></li></ul><div><hr></div><p>If you loved reading this article and found it valuable, restack to share it with others. &#10084;&#65039;</p><p>If you want to get even more value from this newsletter, become a <strong>paid subscriber</strong>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe&quot;,&quot;text&quot;:&quot;Join the Paid tier Today!&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe"><span>Join the Paid tier Today!</span></a></p><p>Get access to all valuable lessons, including:</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;47ff2ab3-ded7-4e19-8b69-7afea9c6f375&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build and Train an LLM from Scratch&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-31T11:47:16.904Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!MRfV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F851366c5-6f74-479a-a06a-41e63fc79f6c_2480x1074.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-and-train-an-llm-from-scratch&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:182314468,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:15,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;8c39c881-fdf7-46e0-abac-47c46efa0204&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Build a Mixture-of-Experts (MoE) LLM from Scratch (Part-1)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-10T17:33:16.673Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!WsMN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F79861f59-020d-4802-ac7e-5d253704d639_2892x1118.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:183456424,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;6e5765f1-f4cd-438b-96e2-c576c599e146&quot;,&quot;caption&quot;:&quot;In the last part of this series, we learned about what memory is and why modern AI systems need it.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;What is Mem0 and how does it work&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-10T11:25:54.373Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/mem0&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186846978,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:6,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;b6238910-5597-49e4-a2e5-7d04bfa71c68&quot;,&quot;caption&quot;:&quot;Recursive reasoning models are a promising architectural candidate that could lead us towards AGI.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;A Deep Dive Into Universal Reasoning Models&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-31T11:01:52.669Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!iYMg!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8918235d-f4f4-425b-8ea5-1bda5910d868_2718x1186.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/universal-reasoning-model&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:185674112,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:2,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><p>Check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[What is Mem0 and how does it work]]></title><description><![CDATA[Everything you need to know about AI memory (Part-2)]]></description><link>https://www.intoai.pub/p/mem0</link><guid isPermaLink="false">https://www.intoai.pub/p/mem0</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Tue, 10 Feb 2026 11:25:54 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!UGCo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 424w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 848w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1272w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" width="1456" height="645" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:645,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!UGCo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 424w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 848w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1272w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In the last part of this series, we learned about what memory is and why modern AI systems need it.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;9d859aa6-8d3d-4e87-98bd-0f1f08c3a121&quot;,&quot;caption&quot;:&quot;&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Memory For AI Agents: Everything That You Need To Know (Part-1)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:155457308,&quot;name&quot;:&quot;Dr. Ashish Bamania&quot;,&quot;bio&quot;:&quot;I help you to level up in AI and Quantum Computing &#128104;&#127995;&#8205;&#128187;&quot;,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1rS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff41b7f65-55d7-4099-969a-931c2ddd2f5f_612x612.png&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-02-04T10:30:36.271Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://www.intoai.pub/p/ai-memory&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:186643994,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:2,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1813260,&quot;publication_name&quot;:&quot;Into AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Ea4T!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0ff07812-0dd7-482f-b6c1-12eee68c4f8c_1080x1080.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>In this part, we will learn about <strong><a href="https://mem0.ai/">Mem0</a></strong>, an open-source memory layer for LLM applications.</p><div><hr></div><p><strong>Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription. &#127873;</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h3>What is Mem0?</h3>
      <p>
          <a href="https://www.intoai.pub/p/mem0">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[🗓️ This Week In AI Research (1-7 February 26)]]></title><description><![CDATA[The top 10 AI research papers that you must know about this week.]]></description><link>https://www.intoai.pub/p/this-week-in-ai-research-1-7-february</link><guid isPermaLink="false">https://www.intoai.pub/p/this-week-in-ai-research-1-7-february</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sun, 08 Feb 2026 11:38:04 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!P4A6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!P4A6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!P4A6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 424w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 848w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 1272w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!P4A6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png" width="725.0078125" height="322.1703672304258" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/adf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:647,&quot;width&quot;:1456,&quot;resizeWidth&quot;:725.0078125,&quot;bytes&quot;:9300945,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187017747?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!P4A6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 424w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 848w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 1272w, https://substackcdn.com/image/fetch/$s_!P4A6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fadf1262b-3967-4899-a56e-d75e3c0f9a68_13824x6144.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Before we begin, I have an <strong>exciting offer</strong> for you that is only <strong>valid for the next 24 hours</strong>!</p><p>Grab a <strong>flat 50% discount</strong> on the <strong>annual membership</strong> of Into AI today.</p><p>&#128073; Upgrade here, lock in the discount, and use &#8216;Into AI&#8217; as your <strong>unfair advantage</strong>:</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab Your Offer NOW&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab Your Offer NOW</span></a></p><div><hr></div><h4>1. GPT-5.3-Codex</h4><p>OpenAI launched <strong>GPT-5.3-Codex</strong>, its most powerful agentic coding model to date. </p><p>The model combines the coding performance of GPT-5.2-Codex with the reasoning and professional knowledge of GPT-5.2 in one system. It runs about 25% faster than the previous version and excels in industry benchmarks, such as SWE-Bench Pro and Terminal-Bench. </p><p>It can handle long, multi-step tasks that involve research, tool use, debugging, deployment, and complex execution. </p><p>And unlike the earlier versions, GPT-5.3-Codex not only writes and reviews code but also acts as an interactive collaborator throughout the software lifecycle and other knowledge work.</p><p>One of the most interesting aspects is that the Codex team used early versions of GPT-5.3-Codex to debug its own training, manage its own deployment, and analyze test results and evaluations, making it the first OpenAI model to help create itself.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4u_P!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4u_P!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 424w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 848w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 1272w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4u_P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png" width="1456" height="672" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:672,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:209168,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187017747?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!4u_P!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 424w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 848w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 1272w, https://substackcdn.com/image/fetch/$s_!4u_P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F49b579e7-65d2-4006-bc92-a3426316e706_2268x1046.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://openai.com/index/introducing-gpt-5-3-codex/">using this link</a>.</p><div><hr></div><h4>2. Claude Opus 4.6</h4><p>Anthropic has released <strong>Claude Opus 4.6</strong>, its most capable model to date.</p><p>Opus 4.6 performs exceptionally well in coding, planning, and long-term tasks, and is more reliable with large codebases and complex workflows, with a stronger ability to find and fix its own mistakes. </p><p>Additionally, it introduces a 1-million-token context window (in beta), which improves its understanding of longer contexts.</p><p>On benchmarks, Opus 4.6:</p><ul><li><p>Scores 2nd on Terminal-Bench 2.0 for agentic coding, only next to GPT-5.3-Codex</p></li></ul><div class="comment" data-attrs="{&quot;url&quot;:&quot;https://open.substack.com/home&quot;,&quot;commentId&quot;:210557532,&quot;comment&quot;:{&quot;id&quot;:210557532,&quot;date&quot;:&quot;2026-02-06T00:05:25.608Z&quot;,&quot;edited_at&quot;:null,&quot;body&quot;:&quot;Of course, \&quot;benchmarks != real world performance\&quot;, and benchmarks have many issues. But what an exciting week for coding LLMs. \n\nWe got the open-weight Qwen3-Next-Coder, and we just got the Codex 5.3 / Opus 4.6 double release. \n\nUnfortunately, Anthropic didn&#8217;t share SWE Bench Pro benchmarks, but here I put them side by side based on the available Terminus 2.0 numbers:&quot;,&quot;body_json&quot;:{&quot;type&quot;:&quot;doc&quot;,&quot;attrs&quot;:{&quot;schemaVersion&quot;:&quot;v1&quot;},&quot;content&quot;:[{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Of course, \&quot;benchmarks != real world performance\&quot;, and benchmarks have many issues. But what an exciting week for coding LLMs. &quot;}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;We got the open-weight Qwen3-Next-Coder, and we just got the Codex 5.3 / Opus 4.6 double release. &quot;}]},{&quot;type&quot;:&quot;paragraph&quot;,&quot;content&quot;:[{&quot;type&quot;:&quot;text&quot;,&quot;text&quot;:&quot;Unfortunately, Anthropic didn&#8217;t share SWE Bench Pro benchmarks, but here I put them side by side based on the available Terminus 2.0 numbers:&quot;}]}]},&quot;restacks&quot;:0,&quot;reaction_count&quot;:31,&quot;attachments&quot;:[{&quot;id&quot;:&quot;3e33b86a-b418-436b-8b9d-a5f1ab0d6503&quot;,&quot;type&quot;:&quot;image&quot;,&quot;imageUrl&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3d616501-63bd-490c-a28a-a8f0c730d11e_1310x936.png&quot;,&quot;imageWidth&quot;:1310,&quot;imageHeight&quot;:936,&quot;explicit&quot;:false}],&quot;name&quot;:&quot;Sebastian Raschka, PhD&quot;,&quot;user_id&quot;:27393275,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F61f4c017-506f-4e9b-a24f-76340dad0309_800x800.jpeg&quot;,&quot;user_bestseller_tier&quot;:1000,&quot;userStatus&quot;:{&quot;bestsellerTier&quot;:1000,&quot;subscriberTier&quot;:1,&quot;leaderboard&quot;:{&quot;ranking&quot;:&quot;trending&quot;,&quot;rank&quot;:33,&quot;publicationName&quot;:&quot;Ahead of AI&quot;,&quot;label&quot;:&quot;Technology&quot;,&quot;categoryId&quot;:&quot;4&quot;,&quot;publicationId&quot;:1174659},&quot;vip&quot;:false,&quot;badge&quot;:{&quot;type&quot;:&quot;bestseller&quot;,&quot;tier&quot;:1000},&quot;paidPublicationIds&quot;:[9873],&quot;subscriber&quot;:null}}}" data-component-name="CommentPlaceholder"></div><ul><li><p>Tops Humanity&#8217;s Last Exam for complex reasoning</p></li><li><p>Performs best on GDPval-AA for economically valuable knowledge work</p></li><li><p>Ranks highest on BrowseComp for finding hard-to-locate information online</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xhJ6!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xhJ6!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xhJ6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!xhJ6!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 424w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 848w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 1272w, https://substackcdn.com/image/fetch/$s_!xhJ6!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F57d54d23-2d7d-48b8-8cb9-01b7e9aec182_3840x2160.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"></figcaption></figure></div><p>Opus 4.6 can assist with research and financial analysis, create documents, work with spreadsheets and presentations, and multitask autonomously.</p><p>Despite these advancements, it maintains strong safety and alignment standards that meet the highest industry standards.</p><p>Read more about this research <a href="https://www-cdn.anthropic.com/0dd865075ad3132672ee0ab40b05a53f14cf5288.pdf">using this link.</a></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?coupon=9e599873&quot;,&quot;text&quot;:&quot;Grab your flat 50% discount&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?coupon=9e599873"><span>Grab your flat 50% discount</span></a></p><div><hr></div><h4>3. Qwen3-Coder-Next</h4><p>Alibaba&#8217;s Qwen team introduced its new open-weight AI coding model called <strong>Qwen3-Coder-Next</strong>, built specifically for agentic coding tasks and local development workflows. </p><p>This model uses an 80B-parameter <a href="https://www.intoai.pub/p/build-a-mixture-of-experts-layer-from-scratch?utm_source=publication-search">Mixture-of-Experts (MoE) architecture</a> and  activates only about 3B parameters during inference, making it efficient and cost-effective. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Nq1y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Nq1y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 424w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 848w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 1272w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Nq1y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png" width="1456" height="782" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:782,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Qwen3 Coder Next Main Image&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Qwen3 Coder Next Main Image" title="Qwen3 Coder Next Main Image" srcset="https://substackcdn.com/image/fetch/$s_!Nq1y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 424w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 848w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 1272w, https://substackcdn.com/image/fetch/$s_!Nq1y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4ef72009-8412-46a6-a4cf-e2ba6744f7c4_4420x2373.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Despite using so few parameters, it delivers high performance on real-world coding challenges, including long-horizon reasoning, complex tool use, and recovery from execution failures. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Dr7H!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Dr7H!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 424w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 848w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 1272w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Dr7H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png" width="1456" height="640" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:640,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Qwen3 Coder Next Benchmarks&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Qwen3 Coder Next Benchmarks" title="Qwen3 Coder Next Benchmarks" srcset="https://substackcdn.com/image/fetch/$s_!Dr7H!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 424w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 848w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 1272w, https://substackcdn.com/image/fetch/$s_!Dr7H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1adee3ab-1b32-48b9-85da-45ee922842ac_4862x2136.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://github.com/QwenLM/Qwen3-Coder/blob/main/qwen3_coder_next_tech_report.pdf?spm=a2ty_o06.30285417.0.0.3bdec9211LgjnZ&amp;file=qwen3_coder_next_tech_report.pdf">using this link</a>. </p><div><hr></div><h4>4. OpenAI Frontier</h4><p>OpenAI has launched <strong>OpenAI Frontier</strong>, a new platform for businesses to build, deploy, manage, and govern AI agents. </p><p>This platform connects agents with company data and tools, such as CRM systems, data warehouses, and internal apps, and provides shared context, onboarding, feedback loops, permissions, and governance. </p><p>It also allows agents to automate complex tasks, including data analysis, software engineering, and operations. </p><p>Frontier supports both agents built on OpenAI technology and third-party agents, aims to reduce fragmentation (<a href="https://www.credal.ai/blog/agent-sprawl-and-agent-registries">Agent sprawl</a>), and helps companies use AI effectively within their existing systems.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!4skb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!4skb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 424w, https://substackcdn.com/image/fetch/$s_!4skb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 848w, https://substackcdn.com/image/fetch/$s_!4skb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 1272w, https://substackcdn.com/image/fetch/$s_!4skb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!4skb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp" width="1456" height="655" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:655,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Layered architecture diagram showing ChatGPT Enterprise, OpenAI Atlas, and Business Applications at the top; Your Agents, OpenAI Agents, and Third-Party Agents beneath; and foundational layers for Evaluation and Optimization, Agent Execution, and Business Context supporting enterprise AI workflows.&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Layered architecture diagram showing ChatGPT Enterprise, OpenAI Atlas, and Business Applications at the top; Your Agents, OpenAI Agents, and Third-Party Agents beneath; and foundational layers for Evaluation and Optimization, Agent Execution, and Business Context supporting enterprise AI workflows." title="Layered architecture diagram showing ChatGPT Enterprise, OpenAI Atlas, and Business Applications at the top; Your Agents, OpenAI Agents, and Third-Party Agents beneath; and foundational layers for Evaluation and Optimization, Agent Execution, and Business Context supporting enterprise AI workflows." srcset="https://substackcdn.com/image/fetch/$s_!4skb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 424w, https://substackcdn.com/image/fetch/$s_!4skb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 848w, https://substackcdn.com/image/fetch/$s_!4skb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 1272w, https://substackcdn.com/image/fetch/$s_!4skb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f94c488-9bb8-4eab-bf39-549e7c865bf8_2000x900.webp 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about it <a href="https://openai.com/index/introducing-openai-frontier/">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?coupon=9e599873&amp;utm_content=187017747&quot;,&quot;text&quot;:&quot;Grab your flat 50% discount&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe?coupon=9e599873&amp;utm_content=187017747"><span>Grab your flat 50% discount</span></a></p><div><hr></div><h4>5. Voxtral-transcribe-2</h4><p>Mistral AI released&nbsp;<strong>Voxtral Transcribe 2</strong> family of speech-to-text models that includes the following:</p><ol><li><p><strong>Voxtral Mini Transcribe V2:</strong> For batch audio transcription</p></li><li><p><strong>Voxtral Realtime:</strong> For live, ultra-low latency streaming (that <strong>transcribes at the speed of sound)</strong></p></li></ol><p><strong>Voxtral Mini Transcribe V2</strong> is a batch speech-to-text model designed to transcribe pre-recorded audio, with a focus on accuracy and a detailed transcript structure.</p><p>It includes speaker diarization (who spoke when), word-level timestamps, strong noise robustness, and context biasing (better handling of names and domain jargon).</p><p>Voxtral Mini Transcribe V2 achieves the lowest word error rate at the lowest price point in the industry ($0.003/min).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9My1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9My1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 424w, https://substackcdn.com/image/fetch/$s_!9My1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 848w, https://substackcdn.com/image/fetch/$s_!9My1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 1272w, https://substackcdn.com/image/fetch/$s_!9My1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9My1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png" width="1456" height="911" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:911,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9My1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 424w, https://substackcdn.com/image/fetch/$s_!9My1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 848w, https://substackcdn.com/image/fetch/$s_!9My1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 1272w, https://substackcdn.com/image/fetch/$s_!9My1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff7df3da6-3085-4545-8ae2-50a7e60ee690_1775x1111.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Voxtral Realtime</strong> is a live, streaming speech-to-text model, designed for ultra-low-latency transcription (sub-200 ms). </p><p>This makes it suitable for real-time voice agents and for supporting key features such as timestamps, speaker separation, and stability in noisy real-world audio environments.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rIAn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rIAn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 424w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 848w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 1272w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rIAn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png" width="1456" height="718" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:718,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rIAn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 424w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 848w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 1272w, https://substackcdn.com/image/fetch/$s_!rIAn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb41af668-51bc-4099-b3e3-0c6aa7708883_2621x1293.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Voxtral Realtime is available as open source under the Apache 2.0 license and is efficient enough to be deployable on edge devices for privacy-first applications.</p><p>Read more about it <a href="https://mistral.ai/news/voxtral-transcribe-2">using this link</a>.</p><div><hr></div><h4>6. World Action Models are Zero-shot Policies</h4><p>This research by NVIDIA presents <strong>DreamZero</strong>, a 14B-parameter World Action Model (WAM) built upon a pretrained video diffusion backbone and designed for robot control.</p><p>DreamZero learns to predict future world states and corresponding actions as part of the same model. This allows it to generalize zero-shot to new tasks and environments better than existing VLA models, which struggle with unseen physical motions (over 2&#215; improvement in task progress in unseen scenarios). </p><p>It works in real time and can quickly adapt to a new robot using about 30 minutes of play data. This demonstrates its strong cross-embodiment transfer and robustness without needing repetitive demonstrations.</p><div class="native-video-embed" data-component-name="VideoPlaceholder" data-attrs="{&quot;mediaUploadId&quot;:&quot;cad20c2c-7874-48e8-afb0-270252c8f498&quot;,&quot;duration&quot;:null}"></div><p>Read more about this research <a href="https://dreamzero0.github.io/DreamZero.pdf">using this link</a>.</p><div><hr></div><h4><strong>7. Kona 1.0</strong></h4><p>Logical Intelligence presents an Energy-Based Reasoning Model (EBRM) called <strong>Kona 1.0</strong> that can solve hard Sudoku puzzles, showing that EBMs can handle constraint-satisfaction problems much better than standard LLMs. </p><p>Kona does not generate solutions one token at a time. Instead, it evaluates entire candidate solutions against all constraints at once, assigning a low energy score to valid grids and using gradient-based adjustments to fix any violations. </p><p>In their benchmark, Kona solved 96.2% of puzzles in about 313 milliseconds, while leading LLMs like GPT-5.2, Claude Opus 4.5, and Gemini 3 Pro only solved around 2% before timing out or failing. </p><p>This shows that LLMs&#8217; autoregressive design makes them poorly suited to reasoning on spatial tasks, whereas energy-based models can reason holistically and efficiently revise partial solutions to these problems.</p><div class="native-video-embed" data-component-name="VideoPlaceholder" data-attrs="{&quot;mediaUploadId&quot;:&quot;26b4f13c-eff3-4051-a34f-d06061d7893c&quot;,&quot;duration&quot;:null}"></div><p>Read more about this research <a href="https://logicalintelligence.com/blog/energy-based-model-sudoku-demo">using this link</a>. </p><p>Try out the demo yourself <a href="https://sudoku.logicalintelligence.com/">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?coupon=9e599873&amp;utm_content=187017747&quot;,&quot;text&quot;:&quot;Grab your flat 50% discount&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/subscribe?coupon=9e599873&amp;utm_content=187017747"><span>Grab your flat 50% discount</span></a></p><div><hr></div><h4>8. Mining Generalizable Activation Function</h4><p>This research from Google DeepMind presents four novel activation functions discovered using an evolutionary search powered by <a href="https://deepmind.google/blog/alphaevolve-a-gemini-powered-coding-agent-for-designing-advanced-algorithms/">AlphaEvolve</a>.</p><p>These activation functions are:</p><ol><li><p>GELU-Sinc-Perturbation</p></li><li><p>GELUSine</p></li><li><p>Gaussian-Modulated Tangent Unit (GMTU)</p></li><li><p>Turbulent Activation Function</p></li></ol><p>These activations consistently outperform ReLU and often GELU, especially on out-of-distribution (OOD) and reasoning-style benchmarks. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iXH7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iXH7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 424w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 848w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 1272w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iXH7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png" width="1456" height="876" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:876,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:520408,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187017747?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iXH7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 424w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 848w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 1272w, https://substackcdn.com/image/fetch/$s_!iXH7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd754d707-f00e-4d87-82e5-713d7d159f24_2216x1334.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://www.arxiv.org/abs/2602.05688">using this link</a>.</p><div><hr></div><h4>9. Learning to Reason in 13 Parameters</h4><p>This research from Meta presents <strong>TinyLoRA</strong>, an extremely low-parameter fine-tuning method for LLMs that enables reasoning with very few trained parameters. </p><p>The authors show that applying TinyLoRA to a base model (Qwen-2.5-7B-Instruct) can significantly improve performance on reasoning benchmarks such as GSM8K, achieving about 91% accuracy while training with only 13 parameters. </p><p>This trend continues with tougher learning-to-reason benchmarks, including AIME, AMC, and MATH500, recovering around 90% of the full performance gains while using 1000X fewer trainable parameters.</p><p>Note that such strong performance was achieved only with RL training. Models trained using SFT required 100-1000x more trainable parameters to achieve the same performance.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9Zav!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9Zav!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 424w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 848w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 1272w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9Zav!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png" width="1456" height="693" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:693,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:455392,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187017747?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9Zav!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 424w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 848w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 1272w, https://substackcdn.com/image/fetch/$s_!9Zav!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F645d6c1b-762a-4ebd-816f-f057b9534d90_2774x1320.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.04118">using this link</a>.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab Your Offer NOW&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab Your Offer NOW</span></a></p><div><hr></div><h4>10. EchoJEPA: A Latent Predictive Foundation Model for Echocardiography</h4><p>This research paper introduces <strong>EchoJEPA</strong>, a new latent predictive foundation model trained on 18 million echocardiograms from about 300,000 patients, the largest dataset for this type of imaging to date. </p><p>The model uses a latent-prediction objective to learn strong anatomical representations that ignore speckle noise and artifacts inherent to ultrasound, thereby improving clinical metrics.</p><p>Compared with leading baselines, EchoJEPA achieves about 20% better performance in estimating left ventricular ejection fraction (LVEF) and 17% better performance in estimating right ventricular systolic pressure (RVSP).</p><p>Furthermore, it generalizes significantly better under acoustic disturbances and performs well on pediatric data without any fine-tuning. </p><p>The research suggests that latent predictive modeling is a promising approach for developing reliable and generalizable medical imaging AI.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!KsAE!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!KsAE!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 424w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 848w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!KsAE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png" width="1456" height="824" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:824,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:962846,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/187017747?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!KsAE!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 424w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 848w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 1272w, https://substackcdn.com/image/fetch/$s_!KsAE!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fababab60-0fb6-4806-9c4e-c5ac451bb8c4_2324x1316.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Read more about this research <a href="https://arxiv.org/abs/2602.02603">using this link</a>.</p><div><hr></div><p>This article is entirely free to read. If you loved reading it, restack and share it with others. &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/this-week-in-ai-research-1-7-february?utm_source=substack&utm_medium=email&utm_content=share&action=share&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/p/this-week-in-ai-research-1-7-february?utm_source=substack&utm_medium=email&utm_content=share&action=share"><span>Share</span></a></p><p><strong>Don&#8217;t forget to grab your flat 50% discount on the annual membership of Into AI today.</strong></p><p>&#128073; Upgrade here, lock in the discount, and use &#8216;Into AI&#8217; as your <strong>unfair advantage</strong>:</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/9e599873&quot;,&quot;text&quot;:&quot;Grab Your Offer NOW&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/9e599873"><span>Grab Your Offer NOW</span></a></p><div><hr></div><p>You can also check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[How RNNs Work (And Why Everyone Stopped Using Them)]]></title><description><![CDATA[A gentle walkthrough of how Recurrent Neural Networks (RNNs) work, and the math that breaks them.]]></description><link>https://www.intoai.pub/p/how-rnns-work-and-why-everyone-stopped</link><guid isPermaLink="false">https://www.intoai.pub/p/how-rnns-work-and-why-everyone-stopped</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Sat, 07 Feb 2026 11:00:51 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!9c1g!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9c1g!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9c1g!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 424w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 848w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 1272w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9c1g!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png" width="725.203125" height="314.78597184065933" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:632,&quot;width&quot;:1456,&quot;resizeWidth&quot;:725.203125,&quot;bytes&quot;:196671,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9c1g!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 424w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 848w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 1272w, https://substackcdn.com/image/fetch/$s_!9c1g!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcdd79ef2-0e39-4cfa-9f6a-c2779ca33851_2428x1054.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3><a href="https://apps.apple.com/ca/app/reshape-fat-loss-fitness/id1630709539">Reshape (Affiliate Partner)</a></h3><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Lzm4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Lzm4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 424w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 848w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Lzm4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5443096,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Lzm4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 424w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 848w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 1272w, https://substackcdn.com/image/fetch/$s_!Lzm4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F793b845b-6dda-4081-8aa3-69bb4424b5f0_3072x3072.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Tired of trying to reach your weight loss goals and failing each time? We have all been there. But not anymore. </p><p><strong>It&#8217;s time to start seeing real results with Reshape!</strong></p><ul><li><p>Just snap a photo or say what you ate, and instantly get precise calorie, macro, and nutrient tracking with Reshape&#8217;s AI, skipping all that tedious logging work. </p></li><li><p>Get workout plans tailored for your equipment, schedule, and fitness level that change as you progress. Whether you&#8217;re building muscle, losing fat, or getting stronger, these plans adapt to your needs.</p></li><li><p>Your 24/7 AI coach, Fio, is a personal trainer in your pocket that provides instant feedback on your form, meal ideas, and motivation whenever you need it.</p></li><li><p>Track everything that matters in one powerful dashboard: meals, workouts, sleep, steps, and body composition, with easy Apple Health integration. </p></li><li><p>Track your weekly insights to learn what&#8217;s working for you so that you keep winning each and every day.</p></li></ul><p>Use the code &#8220;<strong>INTOAI&#8221;</strong> for a <strong>free</strong> <strong>Reshape Pro trial</strong> and <strong>join&nbsp;45K+ people</strong> who have transformed their bodies with Reshape. Your smarter fitness journey starts now!</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://apps.apple.com/ca/app/reshape-fat-loss-fitness/id1630709539&quot;,&quot;text&quot;:&quot;Try Reshape for FREE!&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://apps.apple.com/ca/app/reshape-fat-loss-fitness/id1630709539"><span>Try Reshape for FREE!</span></a></p><div><hr></div><p>This week&#8217;s newsletter is written by <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;id&quot;:255728031,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!h_mv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0c4dad41-478b-4960-a5e0-98ed1e54657e_1168x1046.jpeg&quot;,&quot;uuid&quot;:&quot;cb42fc38-7048-43e0-8cae-84c6dd2558d0&quot;}" data-component-name="MentionToDOM"></span>. He is a senior Data Science manager at Skyscanner. </p><p>He regularly shares insights on building effective teams, developing leadership skills, and advancing careers in Data Science and Machine Learning through his newsletter, <strong><a href="https://joseparreogarcia.substack.com/">Senior Data Science Lead</a></strong>.</p><p>You can also find him and stay up to date with his content on <a href="https://www.linkedin.com/in/joseparrenogarcia/">LinkedIn</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rNHj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rNHj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 424w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 848w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 1272w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rNHj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png" width="1456" height="783" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:783,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:995662,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rNHj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 424w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 848w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 1272w, https://substackcdn.com/image/fetch/$s_!rNHj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe1cc7dc6-477a-4462-922b-5c5523534e48_1670x898.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><p>This week, I went all the way back to 2017. That&#8217;s when the now-legendary &#8216;<em><a href="https://arxiv.org/abs/1706.03762">Attention is All You Need</a></em>&#8217; paper came out &#8212; the one that introduced the world to Transformers, and set the foundation for everything from ChatGPT to image generation to code-writing copilots.</p><p>And sure, I could jump straight into explaining how Transformers work. But given the impact these models have had &#8212; and the fact that you probably see the word &#8220;attention&#8221; 30 times a week now &#8212; I thought it would be worth taking a step back (actually 2 steps back).</p><p>Before we can truly understand Transformers, we need to understand where they came from. And that means revisiting the architectures that paved the way: Recurrent Neural Networks (RNNs) and Long Short-Term Memory (LSTM) networks.</p><p>In this blog post, I am diving into RNNs.</p><p>We will walk through what they are, how they work, and most importantly, why they struggle. By the end, RNNs will feel like a clever little for-loop with memory instead of scary maths magic.</p><p>Ready? Let&#8217;s jump in!</p><div><hr></div><h3>What we will cover in this article</h3><ol><li><p>How is an RNN different from a classical Deep Neural Network? (or other classical sequence models)</p></li><li><p>Introducing a made-up use case with 3 data points for&nbsp;Stock price prediction</p></li><li><p>The scary official diagram of an RNN cell. (Don&#8217;t worry, we will break it down super easily.)</p></li><li><p>A walkthrough of RNN calculations (And you will see how the maths is not that scary after all.)</p></li><li><p>Three problems associated with RNNs</p></li></ol><div><hr></div><h3>How is an RNN different from a classical Deep Neural Network?</h3><p>I assume in this post that you have worked with (or are familiar with) the basics of classical Deep Neural Networks (from here on, DNNs). </p><p>If my assumption is correct, then the diagram below should feel really familiar. It is a diagram of a DNN with:</p><ul><li><p><code>X:</code> A set of input nodes, representing the variables you want to use for prediction.</p></li><li><p><code>H1, H2:</code> 2 hidden layers with 4 nodes. This is where the parameters that learn how to dial up or down specific signals from X. Basically, the knobs the model adjusts to learn.</p></li><li><p><code>Y:</code> The prediction node. In this case, it&#8217;s only 1 for simplicity.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6Uc0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6Uc0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 424w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 848w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 1272w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6Uc0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png" width="1069" height="713" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:713,&quot;width&quot;:1069,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:451452,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!6Uc0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 424w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 848w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 1272w, https://substackcdn.com/image/fetch/$s_!6Uc0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F547a3d4e-dd00-4f4e-8635-f864cf0326c8_1069x713.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Diagram of a classical Deep neural network (DNN)</figcaption></figure></div><p>Now, there are 2 main things to highlight in this diagram:</p><ol><li><p><strong>The DNN processes all the data at once.</strong> <br>You see the <code>X</code> input? From a DNN&#8217;s perspective, it&#8217;s a torrent of data, all pushed and processed at once. There is nowhere in this diagram that the network can say, &#8220;Hey, can you just send me <code>X1</code> and <code>X2 </code>first, and then I can process <code>X3</code> and <code>X4</code>?&#8221;.</p></li><li><p><strong>The DNN is feed-forward (or sequential).</strong> <br>In other words, the data flows from left to right (from <code>X</code> &#8594; <code>Y</code>). There is nowhere in this diagram that a node can stop the data flow and ask: &#8220;Hey, what data did I have in the previous step?&#8221; It is oblivious to that.</p></li></ol><div><hr></div><h4><strong>DNNs are really powerful, but they are also &#8220;memory-less&#8221;</strong></h4><p>DNNs are &#8220;memory-less&#8221; because of the two points mentioned above. And being &#8220;memory-less&#8221; means that DNNs really struggle to predict when&nbsp;<strong>sequence</strong>&nbsp;or&nbsp;<strong>order matters</strong>.</p><p>Take a simple stock price prediction scenario. The only way that DNNs can consider what happened yesterday or the day before is if you tell them what matters. This is usually done by manually creating features like:</p><ul><li><p>Yesterday&#8217;s price</p></li><li><p>A moving average over the past 7 days</p></li><li><p>A sine-transformed day-of-week feature to capture seasonality</p></li></ul><div><hr></div><h4><strong>But wasn&#8217;t Deep learning supposed to eliminate Feature engineering?</strong></h4><p>Yes (and no). </p><p>Neural networks do learn internal representations (i.e., &#8220;features&#8221;) from raw data. But when it comes to sequences, classical deep neural nets still need all the help they can get (so, kind of back again to square one, where we have to feature engineer stuff&#8230;)</p><p>This is where Recurrent Neural Networks (RNNs) come in.</p><p>Recurrent neural networks (RNNs) are a class of artificial neural networks designed for processing sequential data, such as text, speech, and time series, where the order of elements is important. </p><p>Their two main characteristics are:</p><ol><li><p><strong>RNNs process one input at a time. <br></strong>Instead of taking all of the input <code>X</code> in one big gulp, RNNs look at one data point at a time &#8212; like reading a sentence word by word, or stepping through a stock price day by day. This allows them to focus on how each input evolves over time.</p></li><li><p><strong>RNNs are recursive. <br></strong>Yes, the information flows left to right, but at each step, it can also look at what happened before (kind of a right-to-left motion). It&#8217;s like a left-to-right with memory.</p></li></ol><p>Don&#8217;t worry if this feels dense right now, we will break it down step by step.</p><p>By the end of this post, you will not only understand what &#8220;recurrent connections&#8221; mean, but you will also see why RNNs became a foundational architecture for handling sequences.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!pyzj!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!pyzj!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 424w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 848w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!pyzj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg" width="888" height="499" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:499,&quot;width&quot;:888,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:77160,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!pyzj!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 424w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 848w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!pyzj!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b4bf9b3-c900-4c3f-95cf-e9a1666d552e_888x499.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>Introducing a made-up use case for&nbsp;Stock price prediction</h3><p>Ok, before we get into the RNN section, let me introduce you to the simplest stock price prediction exercise in the course of human history.</p><p>The diagram below shows a toy time series where:</p><ul><li><p>For simplicity, all stock prices are set to 0.</p></li><li><p>We have 3 data points. </p></li><li><p>The idea is to use yesterday&#8217;s and today&#8217;s stock prices to predict tomorrow&#8217;s stock price.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!s-k1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!s-k1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 424w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 848w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 1272w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!s-k1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png" width="526" height="380" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:380,&quot;width&quot;:526,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:23821,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!s-k1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 424w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 848w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 1272w, https://substackcdn.com/image/fetch/$s_!s-k1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67a54b94-1018-4cb9-9d65-e05bf71abfae_526x380.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>I want to introduce this in such a simple way because:</p><ol><li><p>I want to actually show you the maths of the RNN using super simple numbers.</p></li><li><p>Labelling each step as <em>yesterday</em>, <em>today</em>, and <em>tomorrow</em> will help anchor the RNN diagrams that follow.</p></li></ol><p>Let&#8217;s keep this mental model in our back pocket &#8212; it&#8217;s going to make the scary-looking RNN diagrams feel a lot less scary.</p><p>Ok, now we are ready to get scared by a diagram of an RNN cell!</p><div><hr></div><h3>The scary official diagram of an RNN cell</h3><p>So far, we have discussed how RNNs differ from classical DNNs because they remember their past. But what does that actually look like inside the model?</p><p>Well... time to face the infamous RNN diagram.</p><p>It might look like a tangle of wires and equations at first, but don&#8217;t worry, we will walk through it slowly, tie it back to the stock price example, and by the end, you will see it&#8217;s just a simple process of multiplication, addition, and a squiggly activation function or two.</p><p>The diagram below is a vanilla RNN cell. </p><p>I can sense you sweating&#8230; a flow diagram? With parameters? With maths operations? Ok, let&#8217;s break it up so that you don&#8217;t have to process it all at once.</p><p>Here are the elements to focus on:</p><ol><li><p>Note the <code>X</code>, <code>h</code> and <code>Y</code> elements (similar to the classical DNN diagram).</p></li><li><p><code>X(t)</code> represents today's data point, <code>h(t-1)</code> is what came from yesterday, <code>h(t)</code> is what is passed to predict tomorrow, and <code>Y(t+1)</code> is the predicted data point.</p></li><li><p><code>Y(t+1)</code> represents the prediction that we want.</p></li><li><p><code>tanh</code> and <code>softmax</code> are just <a href="https://en.wikipedia.org/wiki/Activation_function">activation functions</a>. The same kind you have seen in regular neural networks, so nothing special for RNNs. They take raw values and squash them into a friendlier range, like between -1 and 1 (for <code>tanh</code>) or 0 and 1 (for <code>softmax</code>).</p></li><li><p>Finally, there are a couple of blobs with <code>mult</code> and <code>sum</code>. These are just visual aids so that you can see the operations in action when we pull the numbers in.</p></li><li><p>There are maths operations outside of the cell. These are basically there just so that we can transform a squashed value coming out of <code>tanh</code>, into a real value that makes sense. For example, transform 0.9 coming out of the <code>tanh</code> function to maybe $5.</p></li></ol><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8l2J!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8l2J!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 424w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 848w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 1272w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8l2J!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png" width="1287" height="708" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:708,&quot;width&quot;:1287,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:330213,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!8l2J!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 424w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 848w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 1272w, https://substackcdn.com/image/fetch/$s_!8l2J!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b70af1e-c9f7-47fd-8c30-2e830839ba10_1287x708.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#9997;&#65039; Quick note</strong></p><blockquote><p>Technically, the RNN makes a prediction <code>y(t)</code> after seeing input <code>x(t)</code> and memory <code>h(t-1)</code>. <br><br>But since in our toy example we are trying to predict the next value, it&#8217;s tempting to call it <code>y(t+1)</code> &#8212; just know that it&#8217;s really <code>y(t)</code> in the math, but the target we are aiming for is the value at <code>t+1</code>. </p><p>I purposely named it <code>y(t+1) </code>for pedagogical reasons for this post.</p></blockquote><div><hr></div><h4><strong>Let&#8217;s map this diagram with the theoretical RNN math function</strong></h4><p>In the image below, I have added the math functions that lead to the two outputs from the RNN cell: <code>y(t+1)</code> and <code>h(t)</code>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CaqQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CaqQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 424w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 848w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 1272w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CaqQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png" width="1170" height="686" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:686,&quot;width&quot;:1170,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:259176,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!CaqQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 424w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 848w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 1272w, https://substackcdn.com/image/fetch/$s_!CaqQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbb9b1858-610c-473a-8668-bd1e76d79035_1170x686.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>So, if we wanted to either predict an output (<code>Y</code>) or carry to the next stage (<code>h</code>), then the neural network should learn:</p><ul><li><p><code>Wx</code>: This is the weight applied to today&#8217;s data, <code>X(t)</code>. It controls how much the model should care about today&#8217;s stock price. Extreme case, if <code>Wx = 0</code>, then this means we don&#8217;t care about <code>X(t)</code> because <code>Wx * X(t)</code> would yield 0.</p></li><li><p><code>Wh</code>: This is the weight applied to the previous hidden state, <code>h(t&#8211;1)</code>. It tells the model how much to rely on memory. If <code>Wh = 0</code>, the past is forgotten.</p><p></p><p>&#9888;&#65039; Don&#8217;t misinterpret <code>h(t-1)</code> by thinking it is yesterday&#8217;s stock price. It is what comes out of the cell (with its multiplications, sums, and activation functions) applied to yesterday&#8217;s stock price.<br></p></li><li><p><code>b</code>: This is the bias term. Think of it as a small correction applied regardless of the input. It&#8217;s important in training, but not very interesting for understanding how RNNs work conceptually (as it affects DNNs the same way). If you want to deep dive, <a href="https://www.turing.com/kb/necessity-of-bias-in-neural-networks">check this link</a>.</p></li></ul><p>I believe that only when we plug in numbers to these diagrams will we start really understanding what is happening inside the RNN. Let&#8217;s do this next.</p><div><hr></div><h3><strong>A Walkthrough With Real Numbers</strong></h3><p>Alright, time to take what we have learnt and run it step by step. Instead of just showing the internals of a single RNN cell, we&#8217;ll now &#8220;unroll&#8221; it (you will see what that is in a second). </p><p>This will finally answer the big question: how does an RNN actually use the past to predict the future?</p><h4>How do we represent tomorrow&#8217;s prediction diagrammatically?</h4><p>Pretty simple. We just copy and paste the same RNN cell forward in time, one per data point in our sequence. </p><p>In our toy stock price example, we only have two data points (<code>t&#8211;1</code> and <code>t</code>), so we unroll the cell twice in order to make a prediction at <code>t+1</code>.</p><p>And this is why they are called recurrent, because the same logic is applied over and over, like a <code>for</code> loop.</p><p>Cool, now that you are comfortable with what happens inside a single RNN cell, let&#8217;s walk through this unrolled diagram in detail to ensure we are all on the same page.</p><ol><li><p>We begin by plugging in yesterday&#8217;s stock price. Because there is no data prior to yesterday, we can ignore the previous hidden state input <code>h(t-2)</code>.</p></li><li><p>Using both math functions, we then calculate <code>y(t)</code> and <code>h(t-1)</code>. From these two, only <code>h(t-1)</code> is useful for us. This is the value that describes the memory, and that will be passed to the next cell. <code>y(t)</code> is irrelevant, so we ignore it.</p></li><li><p>Finally, we plug in today&#8217;s data and run through the relevant maths operations to calculate <code>y(t+1)</code>. You can see from the diagram that the RNN is using today&#8217;s data <code>X(t)</code> and yesterday&#8217;s data <code>h(t-1)</code> from memory to calculate what could happen tomorrow.</p></li></ol><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lpFa!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lpFa!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 424w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 848w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 1272w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lpFa!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png" width="1088" height="688" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:688,&quot;width&quot;:1088,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:298589,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!lpFa!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 424w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 848w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 1272w, https://substackcdn.com/image/fetch/$s_!lpFa!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4726f1c8-8b5c-494f-8df7-70f195ff6884_1088x688.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#128161; An important highlight: Shared weights and biases</strong></p><blockquote><p><em>&#8220;Wait a sec... you are using the same </em><code>Wx</code><em>, </em><code>Wh</code><em>, and </em><code>b</code><em> in both cells. Shouldn&#8217;t they be different?&#8221;</em></p></blockquote><p>Great question! This is the part that makes RNNs elegant, but also tricky (you will see at the end how these shared weights break an RNN&#8217;s learning process).</p><p>Unlike feedforward layers that might learn new weights for every input, an RNN cell reuses the same weights at every timestep. That&#8217;s the &#8220;recurrent&#8221; part. An RNN not only repeats the cell structure, but it also repeats the&nbsp;exact same function&nbsp;with the same learned parameters.</p><p>So yes &#8212; <code>Wx</code>, <code>Wh</code>, and <code>b</code> are constant across time. What changes is the input <code>x(t)</code> and the memory from the previous step <code>h(t&#8211;1)</code>, which is how the model updates its thinking as it moves forward.</p><div><hr></div><h4><strong>Plugging numbers into the diagram</strong></h4><p>Before doing some basic maths, let&#8217;s talk about the numbers being used:</p><ol><li><p>Note how I substituted <code>Wh</code>, <code>Wx</code>, <code>b</code>, <code>W_output</code> and <code>b_output</code> with numbers. I made these numbers up, but they are the ones the neural network would tweak during its learning process.</p></li><li><p>The input data points are the ones we know from the time series. <code>X(t-1)</code> and <code>X(t)</code> are both 0. </p></li></ol><p>Now we are ready to take pen and paper and perform all the calculations in this diagram.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!H3Lm!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!H3Lm!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 424w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 848w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 1272w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!H3Lm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png" width="1440" height="789" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:789,&quot;width&quot;:1440,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:323035,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!H3Lm!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 424w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 848w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 1272w, https://substackcdn.com/image/fetch/$s_!H3Lm!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0bf07a4d-dcf9-483e-92fd-daad58404cff_1440x789.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Plugging in all the numbers, <code>y(t+1)</code> comes out to be 0. Nice, this is what we expected from our mock stock price time series!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Tbzr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Tbzr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 424w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 848w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 1272w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Tbzr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png" width="679" height="384" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:384,&quot;width&quot;:679,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:40805,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!Tbzr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 424w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 848w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 1272w, https://substackcdn.com/image/fetch/$s_!Tbzr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2467ec3a-2481-42fb-a30e-617b84c449cd_679x384.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4><strong>What would the diagram look like if we had 50 data points?</strong></h4><p>Well... you would copy the RNN cell forward 49<strong> </strong>times, just like a <code>for</code> loop with 50 iterations (luckily for me, I am not drawing that diagram...)</p><p>But structurally, nothing changes. You still:</p><ul><li><p>Reuse the same weights</p></li><li><p>Pass memory from each time step to the next</p></li><li><p>And only apply the final output prediction where it matters, which is usually the last cell in the sequence</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!6Peb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!6Peb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 424w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 848w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 1272w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!6Peb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png" width="1172" height="737" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:737,&quot;width&quot;:1172,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:372022,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!6Peb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 424w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 848w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 1272w, https://substackcdn.com/image/fetch/$s_!6Peb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fae4e88e3-6c7f-40df-9f6e-0c6246b5e605_1172x737.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>What we learn from the above examples is that:</p><ul><li><p>RNNs reuse the same weights and biases at each step.</p></li><li><p>Hidden states <code>h(t)</code> are like memory, passed forward through time.</p></li><li><p>Outputs <code>y(t)</code> are generated by applying a linear transformation and softmax on the hidden state.</p></li><li><p>Even a dumb toy dataset of all zeros reveals the internal mechanics beautifully.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!gg1J!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!gg1J!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 424w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 848w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!gg1J!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg" width="500" height="666" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:666,&quot;width&quot;:500,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:72746,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!gg1J!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 424w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 848w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!gg1J!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F335d3c46-0b8d-4456-ae8d-857b29389c08_500x666.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>That wasn&#8217;t that hard, right? Well, I have bad news for you&#8230;</p><div><hr></div><h3>Three Major Problems With RNNs</h3><p>These vanilla RNNs are never used for real-world use cases because they come with three big problems:</p><ol><li><p>They are slow to train.</p></li><li><p>They suffer from the problem of vanishing gradients</p></li><li><p>They suffer from the problem of the exploding gradients</p></li></ol><p>Let&#8217;s cover these in detail in their own sections.</p><div><hr></div><h4><strong>Problem 1: Training a vanilla RNN is very slow</strong></h4><p>Deep Neural Networks (DNNs) process all their inputs at once in a single forward pass. Everything flows from left to right, layer by layer. That means training can occur in parallel across many data points and GPU cores.</p><p>This is not the case with RNNs.</p><p>Because RNNs depend on previous hidden states, they are inherently sequential. You can&#8217;t calculate <code>h(t)</code> until you have calculated <code>h(t&#8211;1)</code>. </p><p>It&#8217;s like reading a book: you can&#8217;t understand chapter 5 until you have read chapter 4.</p><p>RNNs make you walk through time, one step at a time, and this sequential dependency kills parallelism.</p><p>That is the first problem: RNNs are powerful, but they pay for it in training speed.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zX6o!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zX6o!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 424w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 848w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zX6o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg" width="555" height="450" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:450,&quot;width&quot;:555,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:61016,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!zX6o!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 424w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 848w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!zX6o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3f89bfae-e763-4e9f-a0d3-2fc18b4f25f7_555x450.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4>Problem 2: The problem of the Vanishing gradients</h4><p>To explain this problem, I would have to take some mathematical shortcuts. To really know what is happening under the hood, you need to be familiar with the chain rule used in backpropagation. But showing the full impact of vanishing gradients using the backpropagation formula would be overkill.</p><p>Let&#8217;s zoom in on just one parameter in the RNN, i.e. <code>Wh</code>, the weight that is multiplied by the previous hidden state.</p><p>In our earlier 3-step example, <code>Wh</code> was used once (just one multiplication). But, if you are training an RNN for 50 timesteps (say, 50 days of stock data), that means <code>Wh</code> shows up 49 times in the full chain of calculations.</p><p>When the model tries to update <code>Wh</code> via backpropagation, mathematically, the gradient is multiplying <code>Wh</code> over and over again, kind of like:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qAtF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qAtF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qAtF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png" width="1056" height="80" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:80,&quot;width&quot;:1056,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:13000,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qAtF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!qAtF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F06e60093-db1a-4471-94d1-5d9627a431d5_1056x80.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Uh-oh. What happens when <code>Wh &lt; 1</code>?</p><p>Try plugging in <code>Wh = 0.5</code>:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!wg1R!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!wg1R!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!wg1R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png" width="1056" height="80" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:80,&quot;width&quot;:1056,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:12499,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!wg1R!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!wg1R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F032f29c3-6582-4822-a4cd-4d956148e30b_1056x80.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>That&#8217;s basically zero. This means that the update to <code>Wh</code> during training is so tiny, it&#8217;s like the model is frozen. It can&#8217;t escape its starting point. It just sits there, unable to learn anything useful.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!2naL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!2naL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 424w, https://substackcdn.com/image/fetch/$s_!2naL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 848w, https://substackcdn.com/image/fetch/$s_!2naL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 1272w, https://substackcdn.com/image/fetch/$s_!2naL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!2naL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png" width="1036" height="411" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:411,&quot;width&quot;:1036,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:81184,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!2naL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 424w, https://substackcdn.com/image/fetch/$s_!2naL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 848w, https://substackcdn.com/image/fetch/$s_!2naL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 1272w, https://substackcdn.com/image/fetch/$s_!2naL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F75e826e9-0f67-42c4-a9c5-083477fd47fa_1036x411.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xsb4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xsb4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 424w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 848w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xsb4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg" width="634" height="393" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:393,&quot;width&quot;:634,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:60379,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!xsb4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 424w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 848w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!xsb4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F891cfca1-8184-4a91-a1a0-118886007bef_634x393.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h4><strong>Problem 3: The problem of the Exploding gradients</strong></h4><p>This is the opposite problem. If vanishing gradients are the slow death of learning, exploding gradients are the chaotic opposite.</p><p>What happens when <code>Wh &gt; 1</code>? Say <code>Wh = 1.5</code>.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ra7W!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ra7W!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ra7W!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png" width="1056" height="80" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/be29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:80,&quot;width&quot;:1056,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:11955,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ra7W!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 424w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 848w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 1272w, https://substackcdn.com/image/fetch/$s_!Ra7W!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbe29b9e9-8d39-4a55-a531-cb8cf61f6651_1056x80.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>That&#8217;s nearly a billion. This means that during backpropagation, the gradient becomes massive. And with a gradient that large, your weight update becomes a wild jump.</p><p>The result is that your model overshoots the loss minimum, bounces around the optimisation landscape like a drunk pinball (love that game), and probably never converges.</p><p>This is the exploding gradients problem. Same root cause as vanishing gradients &#8212; compounding multiplications through time &#8212; but now the problem is too much signal, instead of too little.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!o1Hc!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!o1Hc!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 424w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 848w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 1272w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!o1Hc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png" width="974" height="439" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:439,&quot;width&quot;:974,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:75577,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!o1Hc!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 424w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 848w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 1272w, https://substackcdn.com/image/fetch/$s_!o1Hc!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1b4e31ce-817c-4cc5-86e1-006686a3fb30_974x439.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-fig!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-fig!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 424w, https://substackcdn.com/image/fetch/$s_!-fig!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 848w, https://substackcdn.com/image/fetch/$s_!-fig!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!-fig!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-fig!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg" width="640" height="426" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/caf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:426,&quot;width&quot;:640,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:80615,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://joseparreogarcia.substack.com/i/161045044?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!-fig!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 424w, https://substackcdn.com/image/fetch/$s_!-fig!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 848w, https://substackcdn.com/image/fetch/$s_!-fig!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!-fig!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcaf095cf-c634-40d2-be56-4fbba57ba65d_640x426.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3><strong>What&#8217;s next: How LSTMs fixed all of this (mostly)</strong></h3><p>With everything we have covered, you might think, &#8220;Hey, that vanilla RNN was easy enough to understand, but with the stated problems, it also looks pretty useless, right?&#8221;</p><p>I would mostly agree. Training a vanilla RNN is not impossible, but it requires skill, feature engineering, fine-tuning, and time. The vanishing and exploding gradient problem is the one that mostly holds back an RNN.</p><p>This is why LSTMs were introduced. </p><p>LSTMs are improved versions of RNNs. With built-in mechanisms (called Gates) to decide what to keep, what to forget, and what to pass forward, they were designed specifically to beat vanishing and exploding gradients at their own game.</p><p>LSTMs are for the next post, where we will explore how they work and why they became the go-to tool for sequence modelling&#8230; at least until Transformers came along.</p><div><hr></div><h4><strong>Now, I want to hear from you!</strong></h4><p>In this post, we broke down how RNNs work, from the vanilla cell structure to why they struggle to train in the real world. </p><p>We kept it simple, even used an all-zero dataset, and uncovered the quiet math that makes RNNs nearly impossible to scale.</p><p>But now I&#8217;m curious about your experience.</p><ul><li><p>Have you ever built or trained an RNN model?</p></li><li><p>Did you run into vanishing gradients or the joys of exploding updates?</p></li><li><p>Maybe you were introduced to LSTMs (or GRUs) straight away and skipped vanilla RNNs altogether?</p></li><li><p>Or maybe you are just now connecting the dots between hidden states, time steps, and why Transformers were such a leap.</p></li></ul><p>Drop your thoughts, experiences, or lingering questions in the comments. I would love to hear how you&#8217;ve approached sequence modelling in your ML journey.</p><div><hr></div><p>Thanks again to <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;id&quot;:255728031,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!h_mv!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0c4dad41-478b-4960-a5e0-98ed1e54657e_1168x1046.jpeg&quot;,&quot;uuid&quot;:&quot;af3579fd-38d5-43d0-a6cd-d2e06d1097bb&quot;}" data-component-name="MentionToDOM"></span> for writing this week&#8217;s newsletter.</p><p>Don&#8217;t forget to subscribe to his newsletter and connect with him on <a href="https://www.linkedin.com/in/joseparrenogarcia/">LinkedIn</a>.</p><div class="embedded-publication-wrap" data-attrs="{&quot;id&quot;:2833541,&quot;name&quot;:&quot;Senior Data Science Lead&quot;,&quot;logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!t4IN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbbe3704e-4589-40b2-bbb8-007336c4f09a_990x990.png&quot;,&quot;base_url&quot;:&quot;https://joseparreogarcia.substack.com&quot;,&quot;hero_text&quot;:&quot;I write about Data Science, Machine Learning and leading data teams. I have built teams from scratch and lead 50+ data scientists @Skyscanner. Now, I share my experience with you.&quot;,&quot;author_name&quot;:&quot;Jose Parre&#241;o Garcia&quot;,&quot;show_subscribe&quot;:true,&quot;logo_bg_color&quot;:&quot;#ffffff&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="EmbeddedPublicationToDOMWithSubscribe"><div class="embedded-publication show-subscribe"><a class="embedded-publication-link-part" native="true" href="https://joseparreogarcia.substack.com?utm_source=substack&amp;utm_campaign=publication_embed&amp;utm_medium=web"><img class="embedded-publication-logo" src="https://substackcdn.com/image/fetch/$s_!t4IN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbbe3704e-4589-40b2-bbb8-007336c4f09a_990x990.png" width="56" height="56" style="background-color: rgb(255, 255, 255);"><span class="embedded-publication-name">Senior Data Science Lead</span><div class="embedded-publication-hero-text">I write about Data Science, Machine Learning and leading data teams. I have built teams from scratch and lead 50+ data scientists @Skyscanner. Now, I share my experience with you.</div><div class="embedded-publication-author-name">By Jose Parre&#241;o Garcia</div></a><form class="embedded-publication-subscribe" method="GET" action="https://joseparreogarcia.substack.com/subscribe?"><input type="hidden" name="source" value="publication-embed"><input type="hidden" name="autoSubmit" value="true"><input type="email" class="email-input" name="email" placeholder="Type your email..."><input type="submit" class="button primary" value="Subscribe"></form></div></div><div><hr></div><p>This article is free to read, so share it with others. &#10084;&#65039;</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/p/build-and-train-an-llm-from-scratch?utm_source=substack&amp;utm_medium=email&amp;utm_content=share&amp;action=share&amp;token=eyJ1c2VyX2lkIjoxNTU0NTczMDgsInBvc3RfaWQiOjE4MjMxNDQ2OCwiaWF0IjoxNzY5ODg1NTY5LCJleHAiOjE3NzI0Nzc1NjksImlzcyI6InB1Yi0xODEzMjYwIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.5R9qiObU609w-rZ8URe228YwqEyWeVul8_ksSD0Upu0&quot;,&quot;text&quot;:&quot;Share&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/p/build-and-train-an-llm-from-scratch?utm_source=substack&amp;utm_medium=email&amp;utm_content=share&amp;action=share&amp;token=eyJ1c2VyX2lkIjoxNTU0NTczMDgsInBvc3RfaWQiOjE4MjMxNDQ2OCwiaWF0IjoxNzY5ODg1NTY5LCJleHAiOjE3NzI0Nzc1NjksImlzcyI6InB1Yi0xODEzMjYwIiwic3ViIjoicG9zdC1yZWFjdGlvbiJ9.5R9qiObU609w-rZ8URe228YwqEyWeVul8_ksSD0Upu0"><span>Share</span></a></p><p>If you want to get even more value from this publication and support me in creating these in-depth tutorials, consider becoming a paid subscriber.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe now&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/subscribe?"><span>Subscribe now</span></a></p><p>You can also check out my books on <a href="https://bamaniaashish.gumroad.com/">Gumroad</a> and connect with me on <a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!yORo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!yORo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!yORo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!yORo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!yORo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!yORo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:76022,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186415859?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!yORo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!yORo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!yORo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!yORo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3355004b-b38e-4918-820d-64e788ed3e45_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item><item><title><![CDATA[Memory For AI Agents: Everything That You Need To Know (Part-1)]]></title><description><![CDATA[Part 1: What is Memory and why do modern-day AI systems need it?]]></description><link>https://www.intoai.pub/p/ai-memory</link><guid isPermaLink="false">https://www.intoai.pub/p/ai-memory</guid><dc:creator><![CDATA[Dr. Ashish Bamania]]></dc:creator><pubDate>Wed, 04 Feb 2026 10:30:36 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!UGCo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!UGCo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 424w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 848w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1272w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png" width="1456" height="645" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:645,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1347923,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186643994?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!UGCo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 424w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 848w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1272w, https://substackcdn.com/image/fetch/$s_!UGCo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F91e8d3ad-a02d-499b-bb09-c94e213ec67d_2638x1168.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>&#127873; Become a paid subscriber to &#8216;Into AI&#8217; today at a special 25% discount on the annual subscription.</strong></p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:null}" data-component-name="ButtonCreateButton"><a class="button primary" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><p>LLMs are <strong>stateless</strong>. This means that their state is not permanent and refreshes with each session. This is why LLMs do not remember past interactions between sessions.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ytq9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ytq9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 424w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 848w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 1272w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ytq9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png" width="1456" height="451" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:451,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:90909,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186643994?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ytq9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 424w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 848w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 1272w, https://substackcdn.com/image/fetch/$s_!Ytq9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c6b75f7-72ab-4a70-a6c4-ec2c1d5b32aa_1930x598.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>AI Memory systems aim to fix this problem. They help LLMs retain and use past information. This helps them stay relevant, learn over time, and respond more consistently in user interactions, grounded in truth.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9CZn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9CZn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 424w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 848w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 1272w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9CZn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png" width="1456" height="413" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:413,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:121900,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186643994?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9CZn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 424w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 848w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 1272w, https://substackcdn.com/image/fetch/$s_!9CZn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5d8f9471-75f7-4541-8819-f673ad1b8a8b_2386x676.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h3>What are the different types of Memory?</h3><p>Modern-day AI memory systems are based on how humans store and retrieve knowledge.</p><p>The <a href="https://en.wikipedia.org/wiki/Soar_(cognitive_architecture)">Soar cognitive architecture</a> is one of the earliest and most comprehensive attempts to model human-like intelligence in agents (in our case, an LLM). </p><p>According to it, there are two types of memory:</p><ol><li><p><strong><a href="https://en.wikipedia.org/wiki/Baddeley%27s_model_of_working_memory">Working memory</a></strong><a href="https://en.wikipedia.org/wiki/Baddeley%27s_model_of_working_memory">,</a>&nbsp;or <strong>Short-term memory</strong>, which holds information about what the agent is thinking about during ongoing problem-solving.</p></li><li><p><strong>Long-term memory</strong>, which is further divided into three:</p><ul><li><p><strong><a href="https://en.wikipedia.org/wiki/Procedural_memory">Procedural memory</a></strong>: This stores rules or skills that can be applied to working memory to determine the agent&#8217;s behavior. </p></li><li><p><strong><a href="https://en.wikipedia.org/wiki/Semantic_memory">Semantic memory</a></strong>: This stores general facts and knowledge about the world.</p></li><li><p><strong><a href="https://en.wikipedia.org/wiki/Episodic_memory">Episodic memory</a></strong>: This stores sequences of the agent&#8217;s past behaviors and interactions.</p></li></ul></li></ol><p>Learn about this architecture makes discussing memory in LLMs easier.</p><div><hr></div><h3>Are LLMs completely memoryless?</h3><p>It&#8217;s not that LLMs lack all sorts of memory by default. </p><p>LLMs have a context window that stores information about an ongoing conversation with a user/ another agent. The <strong><a href="https://en.wikipedia.org/wiki/Context_window">context window</a></strong> is the maximum length of input an LLM can consider at once.</p><p>This acts as an LLM&#8217;s <strong>Short-term</strong> or <strong>Working memory.</strong></p><p>The context window is a limited space for storing short-term information, but recent LLMs have really pushed its boundaries.</p><p>LLMs like Google&#8217;s Gemini 2.5 and xAI&#8217;s Grok 3 have a context window of 1 million tokens, which means that they can process up to 1,500 pages of text or 30K lines of code simultaneously!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8sqd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8sqd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png" width="1080" height="1080" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1080,&quot;width&quot;:1080,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:50789,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186643994?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8sqd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 424w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 848w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1272w, https://substackcdn.com/image/fetch/$s_!8sqd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c4037c9-f1f7-4dd4-a7ae-5cbf72811692_1080x1080.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Context Window (Image from <a href="https://bamaniaashish.gumroad.com/l/llmbook/">LLMs In 100 Images</a>)</figcaption></figure></div><p>Alongside this, LLMs have something called <strong>Parametric memory,</strong> which is the knowledge that is stored directly in their parameters / weights as a result of training.</p><p>For example, if you ask an LLM about the capital of the UK, it will respond with London.</p><p>This is an LLM&#8217;s <strong>Long-term memory</strong> (specifically <strong>Procedural</strong> and <strong>Semantic memory</strong>). </p><p>Note that Procedural memory also includes an LLM&#8217;s <strong>system prompt</strong> that persists across sessions.</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><div><hr></div><h3>If LLMs already have memory, why do we need more?</h3><p>Sure, LLMs can use their context window as their working/ short-term memory, but this is limited.</p><p>Meaningful human interactions and relationship-building take place over a long period (weeks, months, or years). As conversations accumulate and get longer, the context window fills up pretty quickly.</p><p>There is also no thematic continuity in conversations, which means a person could talk about writing code one day, then describe how sad and lonely they feel about a past relationship, and then return to writing code another day.</p><p>Even if we capture this interaction information in the context window, store it in a persistent database (creating Long-term memory), and reload it each time, such a discontinuity in the conversational theme in a huge conversation trail creates a messy context (a phenomenon called <a href="https://research.trychroma.com/context-rot">Context Rot</a>).</p><p>Even with a million-token context window, LLMs cannot always accurately extract the most meaningful information from such a context in their context window.</p><p>Check out the <a href="https://github.com/gkamradt/LLMTest_NeedleInAHaystack">Needle-in-a-Haystack (NIAH) analysis</a> of GPT-4, which checks whether the model can find a small, specific fact (&#8216;Needle&#8217;) hidden somewhere in a very long context (&#8216;Haystack&#8217;).</p><p>GPT-4 performs very poorly on retrieval accuracy as context length increases.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lKVQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lKVQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 424w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 848w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 1272w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lKVQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png" width="728" height="407.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:815,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;GPT_4_testing.png&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:&quot;center&quot;,&quot;offset&quot;:false}" class="sizing-normal" alt="GPT_4_testing.png" title="GPT_4_testing.png" srcset="https://substackcdn.com/image/fetch/$s_!lKVQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 424w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 848w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 1272w, https://substackcdn.com/image/fetch/$s_!lKVQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60e2e877-a2e9-4a9f-8728-1e681b4da3e7_1712x958.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><a href="https://github.com/gkamradt/LLMTest_NeedleInAHaystack/blob/main/img/GPT_4_testing.png">Source</a></figcaption></figure></div><p>Many modern-day LLMs have significantly improved on the NIAH test and show nearly perfect accuracy, but they still perform poorly on more comprehensive benchmarks like <a href="https://arxiv.org/abs/2404.06654">RULER</a>.</p><p>RULER checks how well an LLM can really use a long text context (information tracking, aggregating, and reasoning across long contexts), and not just find bits of information in it.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!XAwe!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!XAwe!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 424w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 848w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 1272w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!XAwe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png" width="1456" height="674" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/674310b1-b234-4648-8677-5739efc23d81_2462x1140.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:674,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:459666,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://www.intoai.pub/i/186643994?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!XAwe!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 424w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 848w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 1272w, https://substackcdn.com/image/fetch/$s_!XAwe!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F674310b1-b234-4648-8677-5739efc23d81_2462x1140.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Claimed context length and effective context length of different LLMs based on RULER (<a href="https://arxiv.org/abs/2404.06654">Source</a>)</figcaption></figure></div><p>It seems that pushing the limits of context length only delays, rather than resolves, the fundamental limitation.</p><p>That&#8217;s about the problems with LLM&#8217;s current short-term memory and using it towards building long-term memory.</p><p>There&#8217;s another way where we can directly update an LLM's long-term memory. This is with mid-training and post-training methods, but this is an expensive process (in terms of time, money, and compute).</p><p>And even if we try to do this, there&#8217;s the curse of <a href="https://en.wikipedia.org/wiki/Catastrophic_interference">Catastrophic forgetting</a>, where LLMs can abruptly and drastically forget previously learned information upon being trained with new information.</p><p>This is why AI needs memory that goes beyond just longer context windows. </p><p>A robust AI memory should:</p><ul><li><p>Selectively store important information</p></li><li><p>Connect related information</p></li><li><p>Provide the right information when needed</p></li><li><p><a href="https://pmc.ncbi.nlm.nih.gov/articles/PMC5657245/">Forget information</a> that is not frequently needed over a long period </p></li></ul><p>much like <a href="https://arxiv.org/abs/2411.00489">how humans remember things</a>.</p><p>With this type of memory, an LLM-based AI system can stay consistent, remember user preferences, build on past conversations, and become a reliable long-term partner that grows with the user.</p><div><hr></div><p>That&#8217;s everything for the first part of this series on AI memory. </p><p>In the next one, we will discuss a memory system called <a href="https://mem0.ai/">Mem0</a> in depth, which acts as a universal, self&#8209;improving memory layer for LLM applications.</p><div><hr></div><p>Upgrade to a paid subscription today and use &#8216;Into AI&#8217; as your <strong>unfair advantage</strong>:</p><p class="button-wrapper" data-attrs="{&quot;url&quot;:&quot;https://www.intoai.pub/741bb486&quot;,&quot;text&quot;:&quot;Grab your 25% discount today&quot;,&quot;action&quot;:null,&quot;class&quot;:&quot;button-wrapper&quot;}" data-component-name="ButtonCreateButton"><a class="button primary button-wrapper" href="https://www.intoai.pub/741bb486"><span>Grab your 25% discount today</span></a></p><p>Check out my books on <strong><a href="https://bamaniaashish.gumroad.com/">Gumroad</a></strong> and connect with me on <strong><a href="https://www.linkedin.com/in/ashishbamania/">LinkedIn</a></strong> to stay in touch.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B_dw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png" width="1344" height="256" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:256,&quot;width&quot;:1344,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:&quot;&quot;,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!B_dw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 424w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 848w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1272w, https://substackcdn.com/image/fetch/$s_!B_dw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F86094759-95d8-4755-b13e-b97b7235f04e_1344x256.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div>]]></content:encoded></item></channel></rss>