#!/bin/sh # Based from Bugswriter's (https://github.com/Bugswriter) # music-fairy (https://github.com/Bugswriter/music_fairy) # # Generate a string of text based on 4s of listening through default input. # The first word of the string will dictate which function to execute. # Configuration # Speech recognition engine VOSK="$HOME/.local/pipx/venvs/vosk/bin/vosk-transcriber" VOSK_MODEL="$HOME/.local/share/vosk-models/vosk-model-small-en-us-0.15/" TIME=4 # Time in seconds for ffmpeg to record $SPEECH # Cache CACHE="$HOME/.local/cache/assistant" # Multimedia INVIDIOUS_INSTANCE="https://vid.puffyan.us" # Read SPEECH mkdir -p $CACHE ffmpeg -y \ -f alsa \ -i default \ -acodec pcm_s16le \ -ac 1 -ar 44100 \ -t $TIME \ -f wav \ $CACHE/input.wav # Analyze with vosk-transcriber $VOSK -m $VOSK_MODEL -i $CACHE/input.wav -o $CACHE/output.txt read SPEECH < $CACHE/output.txt # Save $SPEECH variable rm -rf $CACHE/input.wav # Remove rm -rf $CACHE/output.txt # cached files play() { # Play a song killall mpv notify-send "󰝚 Playing" "${SPEECH##*play}" QUERY="song audio ${SPEECH##*play}" mpv "$( \ yt-dlp ytsearch:"$QUERY" -f bestaudio --get-url | grep -v manifest | tail -n 1 \ )" } watch() { # Watch a video killall mpv notify-send " Loading" "${SPEECH##*watch}" QUERY="$(echo ${SPEECH##*watch} | tr ' ' '+')" mpv "https://youtube.com/$( \ curl -s "$INVIDIOUS_INSTANCE/search?q=$QUERY" | grep -Eo "watch\?v=.{11}" | head -n 1 \ )" } # Main Menu CMD=$(echo "$SPEECH" | cut -d ' ' -f 1) [ "$CMD" = "play" ] && play [ "$CMD" = "watch" ] && watch [ "$CMD" = "stop" ] && killall mpv