#!/bin/sh
# Based from Bugswriter's (https://github.com/Bugswriter)
# music-fairy (https://github.com/Bugswriter/music_fairy)
#
# Generate a string of text based on 4s of listening through default input.
# The first word of the string will dictate which function to execute.

# Configuration
# Speech recognition engine
VOSK="$HOME/.local/pipx/venvs/vosk/bin/vosk-transcriber"
VOSK_MODEL="$HOME/.local/share/vosk-models/vosk-model-small-en-us-0.15/"
TIME=4 # Time in seconds for ffmpeg to record $SPEECH
# Cache
CACHE="$HOME/.local/cache/assistant"
# Multimedia
INVIDIOUS_INSTANCE="https://vid.puffyan.us"

# Read SPEECH
mkdir -p $CACHE
ffmpeg -y \
    -f alsa \
    -i default \
    -acodec pcm_s16le \
    -ac 1 -ar 44100 \
    -t $TIME \
    -f wav \
    $CACHE/input.wav

# Analyze with vosk-transcriber
$VOSK -m $VOSK_MODEL -i $CACHE/input.wav -o $CACHE/output.txt
read SPEECH < $CACHE/output.txt  # Save $SPEECH variable
rm -rf $CACHE/input.wav          # Remove 
rm -rf $CACHE/output.txt         # cached files

play() { # Play a song
killall mpv
notify-send "󰝚  Playing" "${SPEECH##*play}"
QUERY="song audio ${SPEECH##*play}"
mpv "$( \
    yt-dlp ytsearch:"$QUERY" -f bestaudio --get-url |
    grep -v manifest | tail -n 1 \
    )"
}

watch() { # Watch a video
killall mpv
notify-send "  Loading" "${SPEECH##*watch}"
QUERY="$(echo ${SPEECH##*watch} | tr ' ' '+')"
mpv "https://youtube.com/$( \
    curl -s "$INVIDIOUS_INSTANCE/search?q=$QUERY" |
    grep -Eo "watch\?v=.{11}" | head -n 1 \
    )"
}

# Main Menu
CMD=$(echo "$SPEECH" | cut -d ' ' -f 1)
[ "$CMD" = "play" ] && play
[ "$CMD" = "watch" ] && watch
[ "$CMD" = "stop" ] && killall mpv