Search for static page with Hugo

Note: Was used on the old blog version

The Hugo.io project allows easy and fast website generation through static web pages. No external dependencies like databases, server-side scripting languages or running processes are needed to run the website, simple file-hosting like GitHub pages, Amazon S3 or similar is needed.

Searching is however difficult. It needs to be handled on the client-side. Using the lunrjs project and huge inspiration by sebz, who implemented the search also for Hugo, I adapted a search for this site. To work, basically during the generation of the site, also a search index file is created. This file can then be downloaded and searched by the client.

It starts with adding a search page to the site. For me, this is a simple search.md file in content folder. Besides, the frontmatter, no extra data is written.

---
title = "Search"
id = "Search"
type = "search"
showPagination = false
---

Notice, the type parameter as it specifies the layout to load. The search layout is specified in /layouts/search/single.html. The required functionality is listed below, and sure the design might need an improvement eventually… Make sure to download a recent version of lunr.js and put it into the static/js/vendor/ folder or change the path in this code piece accordingly.

    <div id="categories-archives" class="main-content-wrap">
        <h5 class="archive-result text-color-base text-xlarge" data-message-zero="nothing found"
            data-message-one="1 match" data-message-other="{n} matches"></h5>
        <form id="filter-form">
            <input id="search" class="form-control input--xlarge" placeholder="Search" autofocus="autofocus"
                   type="text">
        </form>
        <section class="boxes">
            <ul id="results">
            </ul>
        </section>
    </div>
    <script type="text/javascript" src="https://code.jquery.com/jquery-2.1.3.min.js"></script>
    <script type="text/javascript" src="/js/vendor/lunr.js"></script>
    <script type="text/javascript">
      var lunrIndex,
        $results,
        pagesIndex;

      // Initialize lunrjs using our generated index file
      function initLunr() {
        // First retrieve the index file
        $.getJSON("/js/lunr/PagesIndex.json")
          .done(function (index) {
            pagesIndex = index;
            console.log("index:", pagesIndex);

            // Set up lunrjs by declaring the fields we use
            // Also provide their boost level for the ranking
            lunrIndex = lunr(function () {
              this.field("title", {
                boost: 10
              });
              this.field("tags", {
                boost: 5
              });
              this.field("content");

              // ref is the result item identifier (I chose the page URL)
              this.ref("href");

              // Feed lunr with each file and let lunr actually index them
              const lunr = this;
              pagesIndex.forEach(function (page) {
                lunr.add(page);
              });

              this.build();

              $("#search").trigger('keyup');
            });
          })
          .fail(function (jqxhr, textStatus, error) {
            var err = textStatus + ", " + error;
            console.error("Error getting Hugo index flie:", err);
          });
      }

      // Nothing crazy here, just hook up a listener on the input field
      function initUI() {
        $results = $("#results");
        $("#search").keyup(function () {
          $results.empty();

          // Only trigger a search when 2 chars. at least have been provided
          var query = $(this).val();
          if (query.length < 2) {
            return;
          }

          var results = search(query);

          renderResults(results);
        });
      }

      /**
       * Trigger a search in lunr and transform the result
       *
       * @param  {String} query
       * @return {Array}  results
       */
      function search(query) {
        // Find the item in our index corresponding to the lunr one to have more info
        // Lunr result:
        //  {ref: "/section/page1", score: 0.2725657778206127}
        // Our result:
        //  {title:"Page1", href:"/section/page1", ...}
        return lunrIndex.search(query).map(function (result) {
          return pagesIndex.filter(function (page) {
            return page.href === result.ref;
          })[0];
        });
      }

      /**
       * Display the 10 first results
       *
       * @param  {Array} results to display
       */
      function renderResults(results) {
        if (!results.length) {
          return;
        }

        // Only show the ten first results
        results.slice(0, 10).forEach(function (result) {
          var $result = $("<li>");
          $result.append($("<a>", {
            href: '/' + result.href,
            text: "» " + result.title
          }));
          $results.append($result);
        });
      }

      // Let's get started
      initLunr();

      $(document).ready(function () {
        initUI();
      });
    </script>

And now finally, the interesting part of generating the search index file. As I am using gulp for the task of image conversion and so on, I rewrote it to match it the gulp style as shown below. It requires some nodejs plugins to work, which can be installed via npm install --save bluebird gulp recursive-readdir string toml".

const Promise = require("bluebird");

const fs = Promise.promisifyAll(require("fs"));
const gulp = require('gulp');
const path = require("path");
const recursiveReaddir = require('recursive-readdir');
const s = require("string");
const toml = require("toml");

gulp.task('lunr', function () {
  const indexPages = function (contentFolder) {
    return recursiveReaddir(contentFolder)
      .then(function (files) {
        let pagesIndex = [];
        files.forEach(function (file) {
          console.debug(`Processing ${file}`);
          const abspath = path.normalize(file);
          const filename = path.basename(abspath);
          pagesIndex.push(processFile(abspath, filename));
        });
        return pagesIndex;
      }, function (error) {
        console.error("unable to read blog data", error);
      });
  };

  const processFile = function (abspath, filename) {
    let pageIndex;

    if (s(filename).endsWith(".html")) {
      pageIndex = processHTMLFile(abspath, filename);
    } else if (s(filename).endsWith(".md")) {
      pageIndex = processMDFile(abspath, filename);
    }

    return pageIndex;
  };

  const processHTMLFile = function (abspath, filename) {
    return fs.readFile(abspath, function (err, fileContent) {
      const content = fileContent.toString();
      const pageName = s(filename).chompRight(".html").s;
      const href = s(abspath).chompLeft(CONTENT_PATH_PREFIX).s;
      return {
        title: pageName,
        href: href,
        content: s(content).trim().stripTags().stripPunctuation().s
      };
    });
  };

  const configToHref = function (pageConfig) {
    const dateObj = new Date(pageConfig.date);
    const dateStr = dateObj.toISOString().slice(0, 8).replace(/-/g, '/');
    const title = pageConfig.title.toLowerCase().replace(/ /g, '-');
    return dateStr + title;
  };

  const processMDFile = function (abspath, filename) {
    return fs.readFileAsync(abspath)
      .then(function (fileContent) {
        // First separate the Front Matter from the content and parse it
        const contentSplit = fileContent.toString().split("+++");
        try {
          const frontMatter = toml.parse(contentSplit[1].trim());

          //let href = s(abspath).chompLeft(CONTENT_PATH_PREFIX).chompRight(".md").s;
          // href for index.md files stops at the folder name
          //if (filename === "index.md") {
          //  href = s(abspath).chompLeft(CONTENT_PATH_PREFIX).chompRight(filename).s;
          //}
          const href = configToHref(frontMatter);

          // Build Lunr index for this page
          return {
            title: frontMatter.title,
            tags: frontMatter.tags,
            href: href,
            content: s(contentSplit[2]).trim().stripTags().stripPunctuation().s
          };
        } catch (e) {
          console.error(e.message);
        }
      });
  };

  const CONTENT_PATH_PREFIX = "content\\post\\";
  indexPages(CONTENT_PATH_PREFIX)
    .then(function (data) {
      //resolve promises
      Promise.map(data, Promise.props)
        .then(indexPageData => {
          const indexPagesContent = JSON.stringify(indexPageData, null, 2);
          console.debug('Writing PagesIndex.json');
          fs.writeFile('static/js/lunr/PagesIndex.json', indexPagesContent);
        });
    });
});

That’s it. A simple search for a static-page website using Hugo. To see it in action, just head over to the search.