import '@tensorflow/tfjs-core';
import * as faceLandmarksDetection from '@tensorflow-models/face-landmarks-detection';
import { createSnack } from '../actions/SnackActions';

const model = faceLandmarksDetection.SupportedModels.MediaPipeFaceMesh;
const detectorConfig = {
  runtime: 'mediapipe',
  // TODO these should be hosted by our backend?
  solutionPath: 'https://cdn.jsdelivr.net/npm/@mediapipe/face_mesh',
};

const patternSize = 10;
const minChange = 0.16;

function hasWebGLSupport() {
  try {
    var canvas = document.createElement('canvas');
    return (
      !!window.WebGLRenderingContext &&
      (canvas.getContext('webgl') || canvas.getContext('experimental-webgl'))
    );
  } catch (e) {
    return false;
  }
}

class TalkingDetector {
  constructor(stream, enabled = true) {
    this.enabled = enabled;
    this.stream = stream;
    this.detector = null;
    this.pattern = [];
    this.i = 0;
    this.video = document.createElement('video');
  }

  async load() {
    if (!this.enabled) return;
    await this._setInputStream(this.stream);
    await this._loadDetector();
  }

  async _setInputStream(stream) {
    if (!stream) {
      this.destroy();
    }

    this.video.srcObject = stream;
    this.video.muted = true;
    this.video.onplaying = () => {
      this.video.width = this.video.videoWidth;
      this.video.height = this.video.videoHeight;
    };
    await this.video.play();
  }

  destroy() {
    this.enabled = false;
    if (this.video.srcObject) {
      this.video.srcObject = null;
    }
  }

  calculateSumDifference(arr) {
    if (arr.length < patternSize) {
      return 0;
    }

    let difference = 0;
    for (let i = 1; i < arr.length; i++) {
      difference += Math.abs(arr[i] - arr[i - 1]);
    }
    return difference;
  }

  async update() {
    const [prediction] = await this.detector.estimateFaces(this.video);
    if (prediction) {
      const lipKeypoints = prediction.keypoints.filter(
        (b) => b.name === 'lips'
      );
      const topLip = lipKeypoints[0];
      const bottomLip = lipKeypoints[3];
      const faceHeight = prediction.box.yMax - prediction.box.yMin;
      const lipDistance = bottomLip.y - topLip.y;
      const lipDistanceRelative = lipDistance / faceHeight;
      this.pattern[this.i] = lipDistanceRelative;
      this.i = (this.i + 1) % patternSize;
      const sumDifference = this.calculateSumDifference(this.pattern);
      const isTalking = sumDifference > minChange;

      if (isTalking) {
        this.reset();
      }
      return isTalking;
    } else {
      this.reset();
    }
  }

  reset() {
    this.pattern = [];
    this.i = 0;
  }

  async _loadDetector() {
    if (this.detector) return;
    if (!hasWebGLSupport()) return;
    this.detector = await faceLandmarksDetection.createDetector(
      model,
      detectorConfig
    );
  }
}

export default TalkingDetector;
