/**
 * Latex parser
 * @module latex
 */
import { unwrap } from 'ms-utils/typescript-utils';

import symbols from './symbols';

type AstTextNode = {
  type: 'text';
  value: string;
};

type AstFracNode = {
  type: 'frac';
  denominator: AstTextNode;
  numerator: AstTextNode;
};

export type AstNode = AstTextNode | AstFracNode;

// Regexes
// TODO: These regexes are not very future proof.
const number = /^\s*(\-?[0-9.]*)/;
const group = /^\s*\{([^\{\}]*)\}/;
const command = /^\s*(\\[^0-9\{\}]*)/;

/**
 * Get the next token
 * @param {string} source - The source string to fetch from
 * @return {Object}
 * An object containing the token, and the remainder of the source string
 */
export function next(source: string): { token: string; remainder: string } {
  let match = source.match(number);
  if (!match || !match[1] || !match[1].length) match = source.match(group);
  if (!match || !match[1] || !match[1].length) match = source.match(command);

  if (!match) {
    // TODO: Other commands
    throw new Error(`Unexpected token! ${source}`);
  }

  return {
    token: unwrap(match[1]),
    remainder: source.slice(match[0].length, source.length),
  };
}

/**
 * Convert a source string into a tokens array.
 * @param {string} source - Source string to be tokenized
 * @return {Array} An array of tokens (strings)
 *
 * @example
 * lex('12\\frac{1}{2}'); // -> ['12', '\\frac', '1', '2']
 */
export function lex(source: string): string[] {
  const tokens: string[] = [];

  if (!source) {
    return tokens;
  }

  let sourceString = source.trim();

  while (sourceString.length > 0) {
    const { remainder, token } = next(sourceString);
    sourceString = remainder;
    tokens.push(token);
  }

  return tokens;
}

/**
 * AST node creator - Creates a text node
 * @param {string} a - The text to store in the node
 * @return {Node} The node
 */
const text = (a: string): AstTextNode => ({
  type: 'text',
  value: a,
});

/**
 * AST node creator - Creates a frac node
 * @param {string} a - The text to store in the numerator node
 * @param {string} b - The text to store in the denominator node
 * @return {Node} The node
 */
const frac = (a: string, b: string): AstFracNode => ({
  type: 'frac',
  denominator: text(b),
  numerator: text(a),
});

/**
 * Parse a tokens array into an abstract syntax tree (AST)
 * @param {Array.<string>} tokens - The array of tokens to parse
 * @return {Array.<Object>} The abstract syntax tree interpretation of the tokens
 *
 * @example
 * parse(['1', '\\frac', '1', '2']);
 * // -> [
 * //   {
 * //     type: 'text',
 * //     value: '1',
 * //   },
 * //   {
 * //     type: 'frac',
 * //     denominator: {
 * //       type: 'text',
 * //       value: '2',
 * //     },
 * //     numerator: {
 * //       type: 'text',
 * //       value: '1',
 * //     },
 * //   },
 * // ]
 */
export function parse(tokens: string[]): Array<AstNode> {
  const ast: AstNode[] = [];

  while (tokens.length > 0) {
    const token = unwrap(tokens.shift());
    if (token === '\\frac') {
      // TODO: This is really naive and doesn't allow for anything other than strings in each token.
      // It also doesn't handle failure to provide both fraction tokens!
      const a = unwrap(tokens.shift()); // Unsafe unwrap
      const b = unwrap(tokens.shift()); // Unsafe unwrap
      ast.push(frac(a, b));
    } else if (symbols[token] !== undefined) {
      ast.push(text(unwrap(symbols[token])));
    } else {
      ast.push(text(token));
    }
  }

  return ast;
}
