<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
    <title>Wil's Blog</title>
    <link>https://blog.wilsworld.net</link>
    <description>a blog mostly about software engineering</description>
    <language>en-us</language>
    <lastBuildDate>Sun, 26 Apr 2026 20:27:08 +0000</lastBuildDate>
    <atom:link href="https://blog.wilsworld.net/rss.xml" rel="self" type="application/rss+xml" />
    <item>
        <title>Turning the Filesystem into a Database</title>
        <link>https://blog.wilsworld.net/posts/2025_11_11-Turning_the_Filesystem_into_a_Database.html</link>
        <guid>https://blog.wilsworld.net/posts/2025_11_11-Turning_the_Filesystem_into_a_Database.html</guid>
        <pubDate>Tue, 11 Nov 2025 00:00:00 +0000</pubDate>
        <description>Explaining a scheme for implementing useful multi-process ACID abstractions on top of a standard filesystem.</description>
        <content:encoded><![CDATA[<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
  <meta charset="utf-8" />
  <meta name="generator" content="pandoc" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
  <meta name="description" content="Explaining a scheme for implementing useful multi-process ACID abstractions on top of a standard filesystem." />
  <title>Turning the Filesystem into a Database</title>
  <style>
    html {
      color: #1a1a1a;
      background-color: #fdfdfd;
    }
    body {
      margin: 0 auto;
      max-width: 36em;
      padding-left: 50px;
      padding-right: 50px;
      padding-top: 50px;
      padding-bottom: 50px;
      hyphens: auto;
      overflow-wrap: break-word;
      text-rendering: optimizeLegibility;
      font-kerning: normal;
    }
    @media (max-width: 600px) {
      body {
        font-size: 0.9em;
        padding: 12px;
      }
      h1 {
        font-size: 1.8em;
      }
    }
    @media print {
      html {
        background-color: white;
      }
      body {
        background-color: transparent;
        color: black;
        font-size: 12pt;
      }
      p, h2, h3 {
        orphans: 3;
        widows: 3;
      }
      h2, h3, h4 {
        page-break-after: avoid;
      }
    }
    p {
      margin: 1em 0;
    }
    a {
      color: #1a1a1a;
    }
    a:visited {
      color: #1a1a1a;
    }
    img {
      max-width: 100%;
    }
    h1, h2, h3, h4, h5, h6 {
      margin-top: 1.4em;
    }
    h5, h6 {
      font-size: 1em;
      font-style: italic;
    }
    h6 {
      font-weight: normal;
    }
    ol, ul {
      padding-left: 1.7em;
      margin-top: 1em;
    }
    li > ol, li > ul {
      margin-top: 0;
    }
    blockquote {
      margin: 1em 0 1em 1.7em;
      padding-left: 1em;
      border-left: 2px solid #e6e6e6;
      color: #606060;
    }
    code {
      font-family: Menlo, Monaco, Consolas, 'Lucida Console', monospace;
      font-size: 85%;
      margin: 0;
      hyphens: manual;
    }
    pre {
      margin: 1em 0;
      overflow: auto;
    }
    pre code {
      padding: 0;
      overflow: visible;
      overflow-wrap: normal;
    }
    .sourceCode {
     background-color: transparent;
     overflow: visible;
    }
    hr {
      background-color: #1a1a1a;
      border: none;
      height: 1px;
      margin: 1em 0;
    }
    table {
      margin: 1em 0;
      border-collapse: collapse;
      width: 100%;
      overflow-x: auto;
      display: block;
      font-variant-numeric: lining-nums tabular-nums;
    }
    table caption {
      margin-bottom: 0.75em;
    }
    tbody {
      margin-top: 0.5em;
      border-top: 1px solid #1a1a1a;
      border-bottom: 1px solid #1a1a1a;
    }
    th {
      border-top: 1px solid #1a1a1a;
      padding: 0.25em 0.5em 0.25em 0.5em;
    }
    td {
      padding: 0.125em 0.5em 0.25em 0.5em;
    }
    header {
      margin-bottom: 4em;
      text-align: center;
    }
    #TOC li {
      list-style: none;
    }
    #TOC ul {
      padding-left: 1.3em;
    }
    #TOC > ul {
      padding-left: 0;
    }
    #TOC a:not(:hover) {
      text-decoration: none;
    }
    code{white-space: pre-wrap;}
    span.smallcaps{font-variant: small-caps;}
    div.columns{display: flex; gap: min(4vw, 1.5em);}
    div.column{flex: auto; overflow-x: auto;}
    div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
    /* The extra [class] is a hack that increases specificity enough to
       override a similar rule in reveal.js */
    ul.task-list[class]{list-style: none;}
    ul.task-list li input[type="checkbox"] {
      font-size: inherit;
      width: 0.8em;
      margin: 0 0.8em 0.2em -1.6em;
      vertical-align: middle;
    }
    /* CSS for syntax highlighting */
    pre > code.sourceCode { white-space: pre; position: relative; }
    pre > code.sourceCode > span { line-height: 1.25; }
    pre > code.sourceCode > span:empty { height: 1.2em; }
    .sourceCode { overflow: visible; }
    code.sourceCode > span { color: inherit; text-decoration: inherit; }
    div.sourceCode { margin: 1em 0; }
    pre.sourceCode { margin: 0; }
    @media screen {
    div.sourceCode { overflow: auto; }
    }
    @media print {
    pre > code.sourceCode { white-space: pre-wrap; }
    pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
    }
    pre.numberSource code
      { counter-reset: source-line 0; }
    pre.numberSource code > span
      { position: relative; left: -4em; counter-increment: source-line; }
    pre.numberSource code > span > a:first-child::before
      { content: counter(source-line);
        position: relative; left: -1em; text-align: right; vertical-align: baseline;
        border: none; display: inline-block;
        -webkit-touch-callout: none; -webkit-user-select: none;
        -khtml-user-select: none; -moz-user-select: none;
        -ms-user-select: none; user-select: none;
        padding: 0 4px; width: 4em;
        color: #aaaaaa;
      }
    pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
    div.sourceCode
      {   }
    @media screen {
    pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
    }
    code span.al { color: #ff0000; font-weight: bold; } /* Alert */
    code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
    code span.at { color: #7d9029; } /* Attribute */
    code span.bn { color: #40a070; } /* BaseN */
    code span.bu { color: #008000; } /* BuiltIn */
    code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
    code span.ch { color: #4070a0; } /* Char */
    code span.cn { color: #880000; } /* Constant */
    code span.co { color: #60a0b0; font-style: italic; } /* Comment */
    code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
    code span.do { color: #ba2121; font-style: italic; } /* Documentation */
    code span.dt { color: #902000; } /* DataType */
    code span.dv { color: #40a070; } /* DecVal */
    code span.er { color: #ff0000; font-weight: bold; } /* Error */
    code span.ex { } /* Extension */
    code span.fl { color: #40a070; } /* Float */
    code span.fu { color: #06287e; } /* Function */
    code span.im { color: #008000; font-weight: bold; } /* Import */
    code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
    code span.kw { color: #007020; font-weight: bold; } /* Keyword */
    code span.op { color: #666666; } /* Operator */
    code span.ot { color: #007020; } /* Other */
    code span.pp { color: #bc7a00; } /* Preprocessor */
    code span.sc { color: #4070a0; } /* SpecialChar */
    code span.ss { color: #bb6688; } /* SpecialString */
    code span.st { color: #4070a0; } /* String */
    code span.va { color: #19177c; } /* Variable */
    code span.vs { color: #4070a0; } /* VerbatimString */
    code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
  </style>
</head>
<body>
<a href="../index.html">Home</a>
<header id="title-block-header">
<h1 class="title">Turning the Filesystem into a Database</h1>
</header>
<p>This article is an introduction to and explanation of SubsidiaDB.
Usage and precise implementation details will not be covered; instead,
we will gradually explore the systems and intuitions behind how it
works. It’s important to know where we are headed, so to reiterate the
tagline: SubsidiaDB is a transactional, concurrent, embedded database
that utilizes the filesystem as its storage engine.</p>
<p>Disclaimer: This project was more of a fun thought experiment and
programming project than somthing anyone should actually try to use in
production.</p>
<h2 id="file-locking">File Locking</h2>
<p>Let’s start by considering how we can safely read and write a single
file from multiple processes. All of the major desktop operating systems
(Windows, MacOS, and Linux) have file locking APIs supporting
multi-reader/single-writer. In pseudocode we will use the hypothetical
functions <code>lock_shared/unlock_shared</code> and
<code>lock_exclusive/unlock_exclusive</code>. It may seem sufficient to
simply use these functions, but the problem is that none of these
operating systems guarantee that mixed reading and writing is fair. If,
for instance, a file is constantly being read, the shared lock will
always be taken and patiently waiting writers will block indefinitely.
In a database, the possibility of writers never making forward progress
is not acceptable.</p>
<p>In order for add fairness between readers and writers, we will
introduce an adjacent file called <code>_filename_.queue</code>. Before
either a reader or writer attempts to lock the file, it will first take
an exclusive lock on the queue file, then immediately release it once it
acquires the file lock (see pseudo-code below). By forcing an initial
exclusive synchronization point, incoming concurrent readers and writers
will both have an equal chance of making forward progress. While this
does come with a performance penalty, in a database, well behaved
concurrency with steady throughput is generally preferable over lopsided
read/write throughput and exploding tail latencies.</p>
<div class="sourceCode" id="cb1"><pre
class="sourceCode rust"><code class="sourceCode rust"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="kw">fn</span> lock_read(file) <span class="op">{</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    queue <span class="op">=</span> file <span class="op">+</span> <span class="st">&quot;.queue&quot;</span></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>    lock_exclusive(queue)</span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    lock_shared(file)</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>    unlock_exclusive(queue)</span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a><span class="op">}</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a><span class="kw">fn</span> lock_write(file) <span class="op">{</span></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    queue <span class="op">=</span> file <span class="op">+</span> <span class="st">&quot;.queue&quot;</span></span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    lock_exclusive(queue)</span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>    lock_exclusive(file)</span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a>    unlock_exclusive(queue)</span>
<span id="cb1-13"><a href="#cb1-13" aria-hidden="true" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<h2 id="directories">Directories</h2>
<p>In treating the filesystem as a database, we additionally need to
consider reading and writing directories (by that I mean CRUD operations
on a directory’s children). Since directories can’t be locked, we will
simply have an adjacent file next to each directory called
<code>_dirname_.lock</code>. Read and write locks on a single directory
will then work exactly the same way as files.</p>
<p>It’s important to also note at this point that unlike most database
systems, which are relatively flat, filesystems are hierarchical. When
taking a read or write lock on a file or directory, concurrent
modifications to the parent directory may cause problems. To remedy
this, our lock procedure (read and write) will first take a shared lock
on each parent directory starting from the root and going down to the
target.</p>
<div class="sourceCode" id="cb2"><pre
class="sourceCode rust"><code class="sourceCode rust"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="kw">fn</span> prepare_for_lock(root<span class="op">,</span> target) <span class="op">{</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> ancestor from root to target (exclusive) <span class="op">{</span></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>        lock_read(ancestor)</span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>    <span class="op">}</span></span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
<h2 id="atomic-modification">Atomic Modification</h2>
<p>Even if an application does have an exclusive write lock on a file,
modifying that file is inherently risky because filesystems generally
only protect their metadata and make little attempt at preventing file
content corruption. A sudden shutdown or failure during ongoing file
writing will leave it in an incomplete state. We can minimize this
problem, by instead mutating a copy of the file, then performing an
atomic rename that overwrites the original file with the new version;
for those unfamiliar, this pattern is commonly referred to as
copy-on-write (CoW).</p>
<p>There is an unfortunate caveat for entire directories. While the
atomic rename operation can be used to replace a non-empty file, it
cannot be used for replacing non-empty directories. Instead, we must use
two sequential rename operations: first, rename the original directory
with a backup name; second, rename the new directory to replace the
original. While this is not strictly atomic, it significatly minimizes
the possiblity of a partial commit compared to performing a series of
mutations directly.</p>
<p>I’d also like to note here that making copies of files and
directories will typically add considerable overhead, but some modern
filesystems like Btrfs, XFS, APFS and others, actually have support for
CoW operations. This means performing a copy is fast and does not
duplicate data on disk; mutations are essentially stored as a diff of
the original content.</p>
<h2 id="transactions">Transactions*</h2>
<p>This section header contains an asterisk because transactions in
SubsidiaDB do not offer multi-entry rollback; while this is not uncommon
in NoSQL databases, it is a notable limitation compared to transactions
offered by many popular DBMSs. With that disclaimer out of the way,
let’s explore how they are implemented.</p>
<p>We have already established a robust locking mechanism, so the
obvious algorithm for transactions would be some form of <a
href="https://en.wikipedia.org/wiki/Two-phase_locking">two-phase
locking</a> (2PL). For those unfamiliar, the idea is that all necessary
locks are acquired (first phase) then released (second phase) strictly
in that order. Standard 2PL, where locks are gradually acquired as
needed during the transaction, is not a good fit here because it is
susceptible to deadlocks, and unlike normal DBMSs, performing global
deadlock detection would be very complicated, bordering on infeasible.
Instead, SubsidiaDB uses conservative 2PL, so all possible reads and
writes must be declared at the beginning of the transaction. This would
still not entirely solve deadlocks if locks are acquired in random
order, but we can guarantee a total ordering on locks by simply sorting
the file paths lexicographically.</p>
<p>Without getting too wrapped up in an analysis of different
concurrency control solutions, 2PL is great because it guarantees strict
serializability: every transaction is applied as if it was run in order
on a single thread. I find that weaker guarantees can be hard to reason
about and lead to subtle logic bugs. While many praises have been sung
about the performance benefits of MVCC, I think it’s validating that
Google Spanner, the company’s primary OLTP datastore, uses 2PL on write
transactions for exactly this reason.</p>
<h2 id="atomic-directory-modifications-revisited">Atomic Directory
Modifications Revisited</h2>
<p>So far, I have mentioned two major shortcomings, non-atomic directory
commit and lack of multi-entry rollback. SubsidiaDB does offer a
solution to these problems, but it comes with drawbacks.</p>
<p>As mentioned before, write commits are atomic for files, so by
representing our directory with a symbolic link, which is just a file,
we actually can perform atomic commit for an entire directory. Let’s
demonstrate how it works with a hypothetical directory called
<code>target</code>. Targect is actually a symbolic link with the
contents <code>./&lt;uuid-1&gt;</code>. First, we take a write lock on
<code>target</code>, then copy the actual content directory
<code>&lt;uuid-1&gt;</code> to <code>&lt;uuid-2&gt;</code> and perform
mutations on the copy. To commit, we make a new symbolic link
<code>target.tmp</code> and atomically rename it to <code>target</code>.
Finally, we can safely delete <code>&lt;uuid-1&gt;</code> to cleanup.
Symbolic links aren’t an ideal solution in many cases though, as they
have serious problems on Windows and can complicate programmatic
traversal. For these reasons, it is left to the discretion of users to
what extent they employ this feature.</p>
<p>For multi-entry rollback, this may have already been apparent to some
readers, but it can be achieved by performing CoW + commit on the
highest encompassing parent directory for all the data a transaction
touches. This approach can be very heavy handed, and requires the
foresight to structure data in accordance with transactions that will be
performed.</p>
<h2 id="conclusion">Conclusion</h2>
<p>This database came about from wrestling with a simple question: how
could one create an embedded database that supports truly concurrent
multi-process writes? From this starting point, it felt to me like the
entire design fell into place as a natural logical progression. I see
this system as filling a neglected niche, small applications that want
resilient storage with ACID guarantees but don’t want the large leap in
complexity of standard embedded databases (or full DBMSs for that
matter). Working with files is one of the first proramming topics people
learn about, so it’s a huge benefit that this database is simply
providing additional safety to a persistence interface that everyone is
already familiar with.</p>
<h3 id="addendum-on-hierarchical-databases">Addendum on Hierarchical
Databases</h3>
<p>The tech nerd and history buff in me feels it necessary to mention
that SubsidiaDB falls squarely into the now rarely mentioned category of
<a
href="https://en.wikipedia.org/wiki/Hierarchical_database_model">hierarchical
databases</a>. These are the original NoSQL storage solutions because
they came about in the 1960s and actually predate relational algebra
itself! <a
href="https://en.wikipedia.org/wiki/IBM_Information_Management_System">IBM
IMS</a> (Information Management System) is the most popular example and
is still widely used in industries today.</p>
</body>
</html>]]></content:encoded>
    </item>
    <item>
        <title>Exponentially Smoothed FPS Counters</title>
        <link>https://blog.wilsworld.net/posts/2026_04_25-Exponentially_Smoothed_FPS_Counters.html</link>
        <guid>https://blog.wilsworld.net/posts/2026_04_25-Exponentially_Smoothed_FPS_Counters.html</guid>
        <pubDate>Sat, 25 Apr 2026 00:00:00 +0000</pubDate>
        <description>Exploring how to apply an interesting numerical technique with constant time and space complexity to FPS counters.</description>
        <content:encoded><![CDATA[<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<head>
  <meta charset="utf-8" />
  <meta name="generator" content="pandoc" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes" />
  <meta name="description" content="Exploring how to apply an interesting numerical technique with constant time and space complexity to FPS counters." />
  <title>Exponentially Smoothed FPS Counters</title>
  <style>
    html {
      color: #1a1a1a;
      background-color: #fdfdfd;
    }
    body {
      margin: 0 auto;
      max-width: 36em;
      padding-left: 50px;
      padding-right: 50px;
      padding-top: 50px;
      padding-bottom: 50px;
      hyphens: auto;
      overflow-wrap: break-word;
      text-rendering: optimizeLegibility;
      font-kerning: normal;
    }
    @media (max-width: 600px) {
      body {
        font-size: 0.9em;
        padding: 12px;
      }
      h1 {
        font-size: 1.8em;
      }
    }
    @media print {
      html {
        background-color: white;
      }
      body {
        background-color: transparent;
        color: black;
        font-size: 12pt;
      }
      p, h2, h3 {
        orphans: 3;
        widows: 3;
      }
      h2, h3, h4 {
        page-break-after: avoid;
      }
    }
    p {
      margin: 1em 0;
    }
    a {
      color: #1a1a1a;
    }
    a:visited {
      color: #1a1a1a;
    }
    img {
      max-width: 100%;
    }
    h1, h2, h3, h4, h5, h6 {
      margin-top: 1.4em;
    }
    h5, h6 {
      font-size: 1em;
      font-style: italic;
    }
    h6 {
      font-weight: normal;
    }
    ol, ul {
      padding-left: 1.7em;
      margin-top: 1em;
    }
    li > ol, li > ul {
      margin-top: 0;
    }
    blockquote {
      margin: 1em 0 1em 1.7em;
      padding-left: 1em;
      border-left: 2px solid #e6e6e6;
      color: #606060;
    }
    code {
      font-family: Menlo, Monaco, Consolas, 'Lucida Console', monospace;
      font-size: 85%;
      margin: 0;
      hyphens: manual;
    }
    pre {
      margin: 1em 0;
      overflow: auto;
    }
    pre code {
      padding: 0;
      overflow: visible;
      overflow-wrap: normal;
    }
    .sourceCode {
     background-color: transparent;
     overflow: visible;
    }
    hr {
      background-color: #1a1a1a;
      border: none;
      height: 1px;
      margin: 1em 0;
    }
    table {
      margin: 1em 0;
      border-collapse: collapse;
      width: 100%;
      overflow-x: auto;
      display: block;
      font-variant-numeric: lining-nums tabular-nums;
    }
    table caption {
      margin-bottom: 0.75em;
    }
    tbody {
      margin-top: 0.5em;
      border-top: 1px solid #1a1a1a;
      border-bottom: 1px solid #1a1a1a;
    }
    th {
      border-top: 1px solid #1a1a1a;
      padding: 0.25em 0.5em 0.25em 0.5em;
    }
    td {
      padding: 0.125em 0.5em 0.25em 0.5em;
    }
    header {
      margin-bottom: 4em;
      text-align: center;
    }
    #TOC li {
      list-style: none;
    }
    #TOC ul {
      padding-left: 1.3em;
    }
    #TOC > ul {
      padding-left: 0;
    }
    #TOC a:not(:hover) {
      text-decoration: none;
    }
    code{white-space: pre-wrap;}
    span.smallcaps{font-variant: small-caps;}
    div.columns{display: flex; gap: min(4vw, 1.5em);}
    div.column{flex: auto; overflow-x: auto;}
    div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
    /* The extra [class] is a hack that increases specificity enough to
       override a similar rule in reveal.js */
    ul.task-list[class]{list-style: none;}
    ul.task-list li input[type="checkbox"] {
      font-size: inherit;
      width: 0.8em;
      margin: 0 0.8em 0.2em -1.6em;
      vertical-align: middle;
    }
    /* CSS for syntax highlighting */
    pre > code.sourceCode { white-space: pre; position: relative; }
    pre > code.sourceCode > span { line-height: 1.25; }
    pre > code.sourceCode > span:empty { height: 1.2em; }
    .sourceCode { overflow: visible; }
    code.sourceCode > span { color: inherit; text-decoration: inherit; }
    div.sourceCode { margin: 1em 0; }
    pre.sourceCode { margin: 0; }
    @media screen {
    div.sourceCode { overflow: auto; }
    }
    @media print {
    pre > code.sourceCode { white-space: pre-wrap; }
    pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
    }
    pre.numberSource code
      { counter-reset: source-line 0; }
    pre.numberSource code > span
      { position: relative; left: -4em; counter-increment: source-line; }
    pre.numberSource code > span > a:first-child::before
      { content: counter(source-line);
        position: relative; left: -1em; text-align: right; vertical-align: baseline;
        border: none; display: inline-block;
        -webkit-touch-callout: none; -webkit-user-select: none;
        -khtml-user-select: none; -moz-user-select: none;
        -ms-user-select: none; user-select: none;
        padding: 0 4px; width: 4em;
        color: #aaaaaa;
      }
    pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
    div.sourceCode
      {   }
    @media screen {
    pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
    }
    code span.al { color: #ff0000; font-weight: bold; } /* Alert */
    code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
    code span.at { color: #7d9029; } /* Attribute */
    code span.bn { color: #40a070; } /* BaseN */
    code span.bu { color: #008000; } /* BuiltIn */
    code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
    code span.ch { color: #4070a0; } /* Char */
    code span.cn { color: #880000; } /* Constant */
    code span.co { color: #60a0b0; font-style: italic; } /* Comment */
    code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
    code span.do { color: #ba2121; font-style: italic; } /* Documentation */
    code span.dt { color: #902000; } /* DataType */
    code span.dv { color: #40a070; } /* DecVal */
    code span.er { color: #ff0000; font-weight: bold; } /* Error */
    code span.ex { } /* Extension */
    code span.fl { color: #40a070; } /* Float */
    code span.fu { color: #06287e; } /* Function */
    code span.im { color: #008000; font-weight: bold; } /* Import */
    code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
    code span.kw { color: #007020; font-weight: bold; } /* Keyword */
    code span.op { color: #666666; } /* Operator */
    code span.ot { color: #007020; } /* Other */
    code span.pp { color: #bc7a00; } /* Preprocessor */
    code span.sc { color: #4070a0; } /* SpecialChar */
    code span.ss { color: #bb6688; } /* SpecialString */
    code span.st { color: #4070a0; } /* String */
    code span.va { color: #19177c; } /* Variable */
    code span.vs { color: #4070a0; } /* VerbatimString */
    code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
  </style>
  <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
  <script
  src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml-full.js"
  type="text/javascript"></script>
</head>
<body>
<a href="../index.html">Home</a>
<header id="title-block-header">
<h1 class="title">Exponentially Smoothed FPS Counters</h1>
</header>
<p>I recently read <a
href="https://vplesko.com/posts/how_to_implement_an_fps_counter.html">this
article</a> about implementing FPS counters which looks at a few
different methods, covering the benefits and drawbacks; while
interesting, it focuses exclusively on the use of simple moving averages
(SMA). I’m not a game developer, but I’ve done enough graphical
programming that I’ve encountered this problem a number of times and
instead tend to opt for <a
href="https://en.wikipedia.org/wiki/Exponential_smoothing">exponential
moving average (EMA)</a>, which I haven’t seen discussed much online for
this use case.</p>
<p>I’m not going to explain EMA (because I’d just be rephrasing the
first paragraph of Wikipedia) or analyze the tradeoffs from a signal
processing perspective. I don’t have much experience in that field, and
I’m more interested in its applications to software engineering. The
plain and simple reason for preferring EMA is that it has constant time
and space complexity regardless of window size, while also being easier
to implement, in my opinion. These properties make it an interesting
technique for low-overhead moving average calculations on just about any
real-time metric.</p>
<p>For our application, we will be keeping track of the moving average
of frame duration which we denote <span
class="math inline">\(\bar{d}_i\)</span>. Our smoothed frames per second
that we display to the screen as a counter is trivially calculated as
<span class="math inline">\(1/\bar{d}_i\)</span>. We let <span
class="math inline">\(d_i\)</span> be the duration of the current frame
and <span class="math inline">\(\bar{d}_{i-1}\)</span> be the moving
average after the previous frame. So the formula is:</p>
<p><span class="math display">\[\bar{d_i} = \alpha d_i +
(1-\alpha)\bar{d}_{i-1}\]</span></p>
<h2 id="fixed-smoothing-factor">Fixed Smoothing Factor</h2>
<p>The most important thing to figure out is what value we use for <span
class="math inline">\(\alpha\)</span>, which can be thought of as the
smoothing or forgetting factor. When using EMA on stocks it is common to
see visualizations of n-day EMA. The way they figure out <span
class="math inline">\(\alpha\)</span> for this financial calculation
relies on the concept of the average age of datapoints. For an SMA with
a fixed sample window, the average age of datapoints is simply <span
class="math inline">\(n/2\)</span>. For fixed frequency EMA, the average
age of its datapoints is <span
class="math inline">\((1-\alpha)/\alpha\)</span>. By setting these two
as equal, we derive a formula for <span
class="math inline">\(\alpha\)</span>, whereby the EMA smoothing
approximates the smoothing of an n-sample SMA:</p>
<p><span class="math display">\[
\alpha=\frac{2}{n+1}
\]</span></p>
<p>This can be used as is for a quick and dirty FPS counter if you just
want the average over last n frames. However, as discussed in the
original post, the problem with using fixed sample averages for FPS is
that it doesn’t properly depend on time. This is especially noticeable
when graphing the values over time: a slower framerate will be smoother,
while a high framerate will be more jittery.</p>
<h2 id="time-based-dynamic-smoothing-factor">Time-based (Dynamic)
Smoothing Factor</h2>
<p>What we actually want is the average framerate over the last <span
class="math inline">\(T\)</span> duration in some time unit. To do this
we will consider that we want our EMA to maintain a constant average age
of datapoints <span class="math inline">\(T\)</span>, therefore:</p>
<p><span class="math display">\[T = \alpha (0) + (1-\alpha)(T +
d_i)\]</span> <span class="math display">\[\alpha = \frac{d_i}{T +
d_i}\]</span></p>
<p>Substituting this into our original formula we get:</p>
<p><span class="math display">\[\bar{d_i} = \frac{d_i}{T + d_i} d_i +
(1-\frac{d_i}{T + d_i})\bar{d}_{i-1}\]</span></p>
<p>And after some simplification:</p>
<p><span class="math display">\[\bar{d}_i = \frac{d_i^2 +
\bar{d}_{i-1}T}{T+d_i}\]</span></p>
<p>So if we want to approximate the smoothing of a duration based SMA
like the method arrived at in the original blog, we set <span
class="math inline">\(T\)</span> to the SMA window duration divided by
two because the average age of time windowed SMA data is simply half the
window duration.</p>
<h2 id="real-world-analysis">Real World Analysis</h2>
<p>I took a random capture of 5 seconds of ARC Raiders gameplace with
CapFrameX then implemented assorted smoothing techniques to demonstrate
the resulting FPS values.</p>
<p><a href="../resources/time_fps_graph.png" target="_blank">
<img src="../resources/time_fps_graph.png"> </a></p>
<p><a href="../resources/sample_fps_graph.png" target="_blank">
<img src="../resources/sample_fps_graph.png"> </a></p>
<p><a
href="https://github.com/wilgaboury/blog/blob/master/other/fps-analysis/frame_analysis.ipynb">notebook
source</a></p>
<h2 id="fps-calculation-routine-using-sdl">FPS Calculation Routine Using
SDL</h2>
<p>Provided is example C code for using this technique with the popular
<a href="https://www.libsdl.org/">SDL library</a>.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode c"><code class="sourceCode c"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="dt">void</span> update_ema_frame_duration_sec<span class="op">(</span></span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a>    Uint64 <span class="op">*</span>prev_frame_start<span class="op">,</span></span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>    <span class="dt">float</span> <span class="op">*</span>ema_dur<span class="op">,</span></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>    <span class="dt">float</span> window_dur</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="op">)</span> <span class="op">{</span></span>
<span id="cb1-6"><a href="#cb1-6" aria-hidden="true" tabindex="-1"></a>    Uint64 frame_start <span class="op">=</span> SDL_GetPerformanceCounter<span class="op">();</span></span>
<span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    <span class="dt">float</span> frame_dur_unitless <span class="op">=</span> <span class="op">(</span><span class="dt">float</span><span class="op">)(</span>frame_start<span class="op">-*</span>prev_frame_start<span class="op">);</span></span>
<span id="cb1-8"><a href="#cb1-8" aria-hidden="true" tabindex="-1"></a>    <span class="op">*</span>prev_frame_start <span class="op">=</span> frame_start<span class="op">;</span></span>
<span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>    <span class="dt">float</span> frame_dur <span class="op">=</span> frame_dur_unitless<span class="op">/(</span><span class="dt">float</span><span class="op">)</span>SDL_GetPerformanceFrequence<span class="op">();</span></span>
<span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>    <span class="dt">float</span> t <span class="op">=</span> window_dur<span class="op">/</span><span class="fl">2.0</span><span class="bu">f</span><span class="op">;</span></span>
<span id="cb1-11"><a href="#cb1-11" aria-hidden="true" tabindex="-1"></a>    <span class="op">*</span>ema_dur <span class="op">=</span> <span class="op">(</span>frame_dur<span class="op">*</span>frame_dur <span class="op">+</span> <span class="op">*</span>ema_dur<span class="op">*</span>t<span class="op">)/(</span>t<span class="op">+</span>frame_dur<span class="op">);</span></span>
<span id="cb1-12"><a href="#cb1-12" aria-hidden="true" tabindex="-1"></a><span class="op">}</span></span></code></pre></div>
</body>
</html>]]></content:encoded>
    </item>
</channel>
</rss>
