<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>HuggingFace Tokenizers on Rauf Ibishov</title><link>http://raufibishov.com/tags/huggingface-tokenizers/</link><description>Recent content in HuggingFace Tokenizers on Rauf Ibishov</description><generator>Hugo -- gohugo.io</generator><language>en</language><copyright>© 2026 Rauf Ibishov</copyright><lastBuildDate>Mon, 01 Dec 2025 00:00:00 +0000</lastBuildDate><atom:link href="http://raufibishov.com/tags/huggingface-tokenizers/index.xml" rel="self" type="application/rss+xml"/><item><title>Azerbaijani Tokenizer — Three Algorithms, 64k Vocab, 1.727 Fertility</title><link>http://raufibishov.com/projects/az-tokenizer/</link><pubDate>Mon, 01 Dec 2025 00:00:00 +0000</pubDate><guid>http://raufibishov.com/projects/az-tokenizer/</guid><description>&lt;style&gt;
 .modal {
 display: none;
 position: fixed;
 z-index: 1000;
 left: 0;
 top: 0;
 width: 100%;
 height: 100%;
 overflow: auto;
 background-color: #ffffff;
 }
 
 .modal.light-mode {
 background-color: #ffffff;
 }
 
 @media (prefers-color-scheme: dark) {
 .modal.dark-mode {
 background-color: #1a1a1a;
 }
 }
 
 .modal-content {
 margin: auto;
 padding: 40px 20px;
 max-width: 90vw;
 max-height: 90vh;
 overflow: auto;
 display: flex;
 align-items: center;
 justify-content: center;
 }
 
 .modal-content img {
 max-width: 100%;
 max-height: 85vh;
 height: auto;
 }
 
 .modal-close {
 position: absolute;
 top: 20px;
 right: 30px;
 font-size: 28px;
 font-weight: bold;
 cursor: pointer;
 color: #666;
 }
 
 @media (prefers-color-scheme: dark) {
 .modal-close {
 color: #999;
 }
 }
 
 .modal-close:hover {
 color: #000;
 }
 
 @media (prefers-color-scheme: dark) {
 .modal-close:hover {
 color: #fff;
 }
 }
 
 img.clickable {
 cursor: zoom-in;
 }
&lt;/style&gt;
&lt;script&gt;
 function setupModals() {
 const modals = document.querySelectorAll('.modal');
 
 // Close all modals
 function closeAllModals() {
 modals.forEach(modal =&gt; {
 modal.style.display = 'none';
 });
 document.body.style.overflow = 'auto';
 }
 
 // Set up each image-modal pair
 document.getElementById('pipeline-img').addEventListener('click', (e) =&gt; {
 e.stopPropagation();
 closeAllModals();
 const modal = document.getElementById('pipeline-modal');
 modal.style.display = 'block';
 document.body.style.overflow = 'hidden';
 applyTheme(modal);
 });
 
 document.getElementById('fertility-img').addEventListener('click', (e) =&gt; {
 e.stopPropagation();
 closeAllModals();
 const modal = document.getElementById('fertility-modal');
 modal.style.display = 'block';
 document.body.style.overflow = 'hidden';
 applyTheme(modal);
 });
 
 // Set up close buttons and overlay clicks
 modals.forEach(modal =&gt; {
 const closeBtn = modal.querySelector('.modal-close');
 const content = modal.querySelector('.modal-content');
 
 closeBtn.addEventListener('click', (e) =&gt; {
 e.stopPropagation();
 modal.style.display = 'none';
 document.body.style.overflow = 'auto';
 });
 
 // Close on background click, but not on image click
 modal.addEventListener('click', (e) =&gt; {
 if (e.target === modal) {
 modal.style.display = 'none';
 document.body.style.overflow = 'auto';
 }
 });
 
 // Prevent closing when clicking on the image
 content.addEventListener('click', (e) =&gt; {
 e.stopPropagation();
 });
 });
 }
 
 function applyTheme(modal) {
 if (window.matchMedia &amp;&amp; window.matchMedia('(prefers-color-scheme: dark)').matches) {
 modal.classList.add('dark-mode');
 modal.classList.remove('light-mode');
 } else {
 modal.classList.add('light-mode');
 modal.classList.remove('dark-mode');
 }
 }
 
 // Initialize when DOM is ready
 document.addEventListener('DOMContentLoaded', setupModals);
&lt;/script&gt;
&lt;p&gt;&lt;em&gt;Status: &lt;strong&gt;Complete&lt;/strong&gt; · WordPiece-uncased (1.727 fertility) selected for AzBERT pretraining · Component 1 of the AzBERT pipeline&lt;/em&gt;&lt;/p&gt;</description><media:content xmlns:media="http://search.yahoo.com/mrss/" url="http://raufibishov.com/projects/az-tokenizer/feature.svg"/></item></channel></rss>