Overview

Parsers inform how content should be parsed into a valid hast tree. Parsers are assigned to their corresponding mime type (inferred from the filename). You can add a new parser, or override an existing parser, by assigning a custom parser to a mime type.

Supported parsers

unified-doc infers the mime type from the filename to determine which parser should be used. unified-doc currently supports, and will eventually support, the following document formats (and relating mime types):

text/html: HTML parser
text/markdown: Markdown parser
text/csv: CSV parser
application/vnd.openxmlformats-officedocument.wordprocessing: DOCX parser
application/pdf: PDF parser
application/x-latex: LaTeX parser
application/rtf: RTF parser
and potentially more formats.

The filename should be related to the content intending to be parsed in a natural way. This is usually not an issue if both pieces of information are read from a common source/file/document.

Live Code Editor

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';

function() {
  const markdownDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '> **some** markdown content',
    filename: 'doc.md',
  });

  const htmlDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '<blockquote><strong>some</strong> HTML content</blockquote>',
    filename: 'doc.html',
  });

  const csvDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'Column 0,Column 1,Column 2\nrow1col0,row1col1,row1col2',
    filename: 'doc.csv',
  });

  return (
    <div>
      <h4><code>doc.md</code></h4>
      {markdownDoc.compile().result}
      <h4><code>doc.html</code></h4>
      {htmlDoc.compile().result}
      <h4><code>doc.csv</code></h4>
      {csvDoc.compile().result}
    </div>
  );
}

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';
function() {
  const markdownDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '> **some** markdown content',
    filename: 'doc.md',
  });
  const htmlDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '<blockquote><strong>some</strong> HTML content</blockquote>',
    filename: 'doc.html',
  });
  const csvDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'Column 0,Column 1,Column 2\nrow1col0,row1col1,row1col2',
    filename: 'doc.csv',
  });
  return (
    <div>
      <h4><code>doc.md</code></h4>
      {markdownDoc.compile().result}
      <h4><code>doc.html</code></h4>
      {htmlDoc.compile().result}
      <h4><code>doc.csv</code></h4>
      {csvDoc.compile().result}
    </div>
  );
}

Preview

`doc.md`

some markdown content

`doc.html`

some HTML content

`doc.csv`

Column 0	Column 1	Column 2
row1col0	row1col1	row1col2

Fallback code block parser

If a parser is not supported from its inferred mime type, a fallback parser that renders content into a code block is applied. This is useful for syntax highlighters, and will be further explored in the Syntax highlighting section. Source code documents usually fall under this category.

Live Code Editor

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';

function() {
  const jsonDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: JSON.stringify({
      one: 2,
      three: [true, false, null, 'four', 5],
    }, null, 2),
    filename: 'doc.json',
  });

  const jsDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'function greet() {\n  return "hello world";\n}',
    filename: 'doc.js',
  });

  const pythonDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'def greet():\n\treturn "hello world"',
    filename: 'doc.py',
  });

  return (
    <div>
      <h4><code>doc.json</code></h4>
      {jsonDoc.compile().result}
      <h4><code>doc.js</code></h4>
      {jsDoc.compile().result}
      <h4><code>doc.py</code></h4>
      {pythonDoc.compile().result}
    </div>
  );
}

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';
function() {
  const jsonDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: JSON.stringify({
      one: 2,
      three: [true, false, null, 'four', 5],
    }, null, 2),
    filename: 'doc.json',
  });
  const jsDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'function greet() {\n  return "hello world";\n}',
    filename: 'doc.js',
  });
  const pythonDoc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: 'def greet():\n\treturn "hello world"',
    filename: 'doc.py',
  });
  return (
    <div>
      <h4><code>doc.json</code></h4>
      {jsonDoc.compile().result}
      <h4><code>doc.js</code></h4>
      {jsDoc.compile().result}
      <h4><code>doc.py</code></h4>
      {pythonDoc.compile().result}
    </div>
  );
}

Preview

`doc.json`

{
  "one": 2,
  "three": [
    true,
    false,
    null,
    "four",
    5
  ]
}

`doc.js`

function greet() {
  return "hello world";
}

`doc.py`

def greet():
	return "hello world"

Custom parser

Parsers are applied using the PluggableList interface and can include multiple steps e.g. [parser1] or [parser2, parser3]. Custom parsers are specified through a mapping of mimeTypes to associated parsers.

You can override an existing parser with a new parser by assigning to its supported mime type or assign specific parsers to unsupported mime types.

Live Code Editor

// import Doc from 'unified-doc';

function () {
  function flattenToString() {
    this.Parser = function(doc) {
      return {
        type: 'root',
        children: [
          {
            type: 'text',
            value: doc,
          },
        ],
      };
    };
  };

  function capitalize() {
    return function transformer(tree) {
      const children = tree.children.map(child => ({
        ...child,
        value: child.value.toUpperCase(),
      }));
      return {
        ...tree,
        children,
      };
    };
  }

  const customParser = [flattenToString, capitalize];

  const markdownDoc = Doc({
    content: '> **some** markdown content',
    filename: 'doc.md',
    parsers: {
      'text/markdown': customParser,
      'application/json': customParser,
    },
  });

  const jsonDoc = Doc({
    content: '{ "one": 2, "three": [true, false, null, "four", 5]}',
    filename: 'doc.json',
    parsers: {
      'text/markdown': [flattenToString, capitalize],
      'application/json': [flattenToString, capitalize],
    },
  });

  return (
    <div>
      <h4>Override markdown parser</h4>
      <pre>{markdownDoc.compile().contents}</pre>
      <h4>New JSON parser</h4>
      <pre>{jsonDoc.compile().contents}</pre>
    </div>
  );
}

// import Doc from 'unified-doc';
function () {
  function flattenToString() {
    this.Parser = function(doc) {
      return {
        type: 'root',
        children: [
          {
            type: 'text',
            value: doc,
          },
        ],
      };
    };
  };
  function capitalize() {
    return function transformer(tree) {
      const children = tree.children.map(child => ({
        ...child,
        value: child.value.toUpperCase(),
      }));
      return {
        ...tree,
        children,
      };
    };
  }
  const customParser = [flattenToString, capitalize];
  const markdownDoc = Doc({
    content: '> **some** markdown content',
    filename: 'doc.md',
    parsers: {
      'text/markdown': customParser,
      'application/json': customParser,
    },
  });
  const jsonDoc = Doc({
    content: '{ "one": 2, "three": [true, false, null, "four", 5]}',
    filename: 'doc.json',
    parsers: {
      'text/markdown': [flattenToString, capitalize],
      'application/json': [flattenToString, capitalize],
    },
  });
  return (
    <div>
      <h4>Override markdown parser</h4>
      <pre>{markdownDoc.compile().contents}</pre>
      <h4>New JSON parser</h4>
      <pre>{jsonDoc.compile().contents}</pre>
    </div>
  );
}

Preview

Override markdown parser

> **SOME** MARKDOWN CONTENT

New JSON parser

{ "ONE": 2, "THREE": [TRUE, FALSE, NULL, "FOUR", 5]}

Disable supported parser

If you would prefer to use the fallback code block parser to render the source code of the file, you can disable the parser by setting its value to null for the associated mime type in the parsers option. The following shows how the default HTML parser can be disabled to rely on the fallback code block parser to render the source code of the HTML content.

Live Code Editor

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';

function() {
  const doc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '<blockquote><strong>some</strong> HTML content</blockquote>',
    filename: 'doc.html',
    parsers: {
      'text/html': null,
    },
  });

  return <div>{doc.compile().result}</div>
}

// import { createElement } from 'react';
// import rehype2react from 'rehype-react';
// import Doc from 'unified-doc';
function() {
  const doc = Doc({
    compiler: [[rehype2react, { createElement }]],
    content: '<blockquote><strong>some</strong> HTML content</blockquote>',
    filename: 'doc.html',
    parsers: {
      'text/html': null,
    },
  });
  return <div>{doc.compile().result}</div>
}

Preview

<blockquote><strong>some</strong> HTML content</blockquote>