~geb/numen

b025766e84fa852eebb7dc8421302b2604647db4 — John Gebbie 10 months ago 301275b
gofmt, use 0o octal literals
5 files changed, 129 insertions(+), 115 deletions(-)

M handler.go
M noise.go
M numen.go
M vox/phrasesplit/phrasesplit.go
M vox/vox.go
M handler.go => handler.go +31 -30
@@ 1,8 1,8 @@
package main

import (
	"fmt"
	"bytes"
	"fmt"
	"io"
	"os"
	"os/exec"


@@ 49,10 49,10 @@ func delay(ms, env string, def int, line string) int {
}

var (
	defaultKeyDelay = delay("2", "NUMEN_KEY_DELAY", 0, "")
	defaultKeyHold = delay("8", "NUMEN_KEY_HOLD", 0, "")
	defaultKeyDelay  = delay("2", "NUMEN_KEY_DELAY", 0, "")
	defaultKeyHold   = delay("8", "NUMEN_KEY_HOLD", 0, "")
	defaultTypeDelay = delay("2", "NUMEN_TYPE_DELAY", 0, "")
	defaultTypeHold = delay("8", "NUMEN_TYPE_HOLD", 0, "")
	defaultTypeHold  = delay("8", "NUMEN_TYPE_HOLD", 0, "")
)

type Handler interface {


@@ 90,7 90,7 @@ func cutWord(s, word string) (string, bool) {
	if s == word {
		return "", true
	}
	if strings.HasPrefix(s, word + " ") || strings.HasPrefix(s, word + "\t") {
	if strings.HasPrefix(s, word+" ") || strings.HasPrefix(s, word+"\t") {
		return s[len(word)+1:], true
	}
	return "", false


@@ 98,9 98,12 @@ func cutWord(s, word string) (string, bool) {

func button(b string) int {
	switch strings.TrimSpace(b) {
	case "left", "1": return 1
	case "middle", "2": return 2
	case "right", "3": return 3
	case "left", "1":
		return 1
	case "middle", "2":
		return 2
	case "right", "3":
		return 3
	}
	return 0
}


@@ 169,7 172,7 @@ func handle(handler *Handler, action string) {
		} else if s, ok := cutWord(line, "mousemove"); ok {
			s = strings.TrimSpace(s)
			var x, y float64
			_, err := fmt.Sscanf(s + "\n", "%f %f\n", &x, &y)
			_, err := fmt.Sscanf(s+"\n", "%f %f\n", &x, &y)
			if err == nil {
				h.MouseMove(x, y)
			} else {


@@ 179,7 182,7 @@ func handle(handler *Handler, action string) {
		} else if s, ok := cutWord(line, "mouseto"); ok {
			s = strings.TrimSpace(s)
			var x, y float64
			_, err := fmt.Sscanf(s + "\n", "%f %f\n", &x, &y)
			_, err := fmt.Sscanf(s+"\n", "%f %f\n", &x, &y)
			if err == nil {
				h.MouseTo(x, y)
			} else {


@@ 230,7 233,7 @@ func handle(handler *Handler, action string) {
		} else if s, ok := cutWord(line, "wheel"); ok {
			s = strings.TrimSpace(s)
			var n int
			_, err := fmt.Sscanf(s + "\n", "%d\n", &n)
			_, err := fmt.Sscanf(s+"\n", "%d\n", &n)
			if err == nil {
				h.Wheel(n)
			} else {


@@ 240,7 243,7 @@ func handle(handler *Handler, action string) {
		} else if s, ok := cutWord(line, "hwheel"); ok {
			s = strings.TrimSpace(s)
			var n int
			_, err := fmt.Sscanf(s + "\n", "%d\n", &n)
			_, err := fmt.Sscanf(s+"\n", "%d\n", &n)
			if err == nil {
				h.Hwheel(n)
			} else {


@@ 285,13 288,13 @@ func mods(mod string, super, ctrl, alt, shift bool) (bool, bool, bool, bool) {
}

type UinputHandler struct {
	dotool *exec.Cmd
	stdin io.WriteCloser
	load func(files []string)
	dotool                  *exec.Cmd
	stdin                   io.WriteCloser
	load                    func(files []string)
	super, ctrl, alt, shift bool
	caps bool
	cache string
	stuck string
	caps                    bool
	cache                   string
	stuck                   string
}

func NewUinputHandler(load func(files []string)) *UinputHandler {


@@ 313,7 316,7 @@ func NewUinputHandler(load func(files []string)) *UinputHandler {
}

func (uh *UinputHandler) write(s string) {
	_, err := io.WriteString(uh.stdin, s + "\n")
	_, err := io.WriteString(uh.stdin, s+"\n")
	if err != nil {
		fatal(err)
	}


@@ 366,7 369,7 @@ func (uh *UinputHandler) Buttonup(button int) {

func (uh *UinputHandler) Caps(b bool) {
	caps := uh.caps
	time.Sleep(time.Duration(50)*time.Millisecond)
	time.Sleep(time.Duration(50) * time.Millisecond)
	files, _ := filepath.Glob("/sys/class/leds/input*::capslock/brightness")
	for _, f := range files {
		data, _ := os.ReadFile(f)


@@ 468,7 471,6 @@ func (uh *UinputHandler) Close() {
	}
}


type GadgetHandler struct {
	*UinputHandler
}


@@ 502,12 504,11 @@ func (gh *GadgetHandler) Caps(b bool) {
	gh.caps = b
}


type X11Handler struct {
	load func(files []string)
	super, ctrl, alt, shift bool
	cache string
	stuck string
	load                                   func(files []string)
	super, ctrl, alt, shift                bool
	cache                                  string
	stuck                                  string
	keyDelay, keyHold, typeDelay, typeHold int
}



@@ 537,18 538,18 @@ func (xh *X11Handler) run(args ...string) {

func (xh *X11Handler) key(chords string) {
	d := fmt.Sprint(xh.keyDelay + xh.keyHold)
	xh.run(strings.Split("key --delay " + d + " -- " +  chords, " ")...)
	xh.run(strings.Split("key --delay "+d+" -- "+chords, " ")...)
}
func (xh *X11Handler) keydown(chords string) {
	d := fmt.Sprint(xh.keyDelay)
	xh.run(strings.Split("keydown --delay " + d + " -- " +  chords, " ")...)
	xh.run(strings.Split("keydown --delay "+d+" -- "+chords, " ")...)
}
func (xh *X11Handler) keyup(chords string) {
	d := fmt.Sprint(xh.keyDelay)
	xh.run(strings.Split("keyup --delay " + d + " -- " +  chords, " ")...)
	xh.run(strings.Split("keyup --delay "+d+" -- "+chords, " ")...)
}
func (xh *X11Handler) type_(text string) {
	d := fmt.Sprint((xh.typeDelay + xh.typeHold)*2)
	d := fmt.Sprint((xh.typeDelay + xh.typeHold) * 2)
	xh.run("type", "--delay", d, "--", text)
}


M noise.go => noise.go +28 -20
@@ 13,6 13,7 @@ import (
const wavHeader = "RIFF$\x00\x00\x80WAVEfmt \x10\x00\x00\x00\x01\x00\x01\x00\x80>\x00\x00\x00}\x00\x00\x02\x00\x10\x00data\x00\x00\x00\x80"

var noiseThreshold float64

func init() {
	e := os.Getenv("NUMEN_NOISE_THRESHOLD")
	noiseThreshold, _ = strconv.ParseFloat(e, 64)


@@ 25,6 26,7 @@ func init() {
}

type Noise int

const (
	NoiseNone Noise = iota
	NoiseBlow


@@ 34,26 36,32 @@ const (

func noiseBeginString(n Noise) string {
	switch n {
	case NoiseBlow: return "<blow-begin>"
	case NoiseHiss: return "<hiss-begin>"
	case NoiseShush: return "<shush-begin>"
	case NoiseBlow:
		return "<blow-begin>"
	case NoiseHiss:
		return "<hiss-begin>"
	case NoiseShush:
		return "<shush-begin>"
	}
	return ""
}
func noiseEndString(n Noise) string {
	switch n {
	case NoiseBlow: return "<blow-end>"
	case NoiseHiss: return "<hiss-end>"
	case NoiseShush: return "<shush-end>"
	case NoiseBlow:
		return "<blow-end>"
	case NoiseHiss:
		return "<hiss-end>"
	case NoiseShush:
		return "<shush-end>"
	}
	return ""
}

type NoiseRecognizer struct {
	r io.Reader
	decoder *wav.Wav
	r                 io.Reader
	decoder           *wav.Wav
	Blow, Hiss, Shush bool
	PrevNoise, Noise Noise
	PrevNoise, Noise  Noise
}

func NewNoiseRecognizer(r io.Reader, blow, hiss, shush bool) *NoiseRecognizer {


@@ 85,7 93,7 @@ func (nr *NoiseRecognizer) Proceed(n int) {

	energy := 0.0
	for i := 3; i < 1333; i++ {
		energy += real(spectrum[i]) *  real(spectrum[i])
		energy += real(spectrum[i]) * real(spectrum[i])
	}

	centroid := 0.0


@@ 97,8 105,8 @@ func (nr *NoiseRecognizer) Proceed(n int) {
	moment2 := 0.0
	moment3 := 0.0
	for i := 3; i < 1333; i++ {
		moment2 += math.Pow(float64(i) - centroid, 2) * real(spectrum[i]) * real(spectrum[i]);
		moment3 += math.Pow(float64(i) - centroid, 3) * real(spectrum[i]) * real(spectrum[i]);
		moment2 += math.Pow(float64(i)-centroid, 2) * real(spectrum[i]) * real(spectrum[i])
		moment3 += math.Pow(float64(i)-centroid, 3) * real(spectrum[i]) * real(spectrum[i])
	}
	moment2 /= energy
	moment3 /= energy


@@ 108,7 116,7 @@ func (nr *NoiseRecognizer) Proceed(n int) {
	rolloff := 3
	{
		rollsum := 0.0
		for rollsum < energy * 0.95 && rolloff < 1333 {
		for rollsum < energy*0.95 && rolloff < 1333 {
			rollsum += real(spectrum[rolloff]) * real(spectrum[rolloff])
			rolloff++
		}


@@ 130,28 138,28 @@ func (nr *NoiseRecognizer) Proceed(n int) {
	for i := 3; i < 29; i++ {
		a := real(spectrum[i])
		b := real(spectrum[i+1])
		fuzz += math.Abs(a - b) * math.Abs(a - b)
		fuzz += math.Abs(a-b) * math.Abs(a-b)
	}

	over := 0
	for i := 3; i < 30; i++ {
		a := real(spectrum[i]) * real(spectrum[i])
		if a > math.Sqrt(energy)/4 * noiseThreshold {
		if a > math.Sqrt(energy)/4*noiseThreshold {
			over++
		}
	}

	var blow, hiss, shush bool
	if energy > 500.0 * noiseThreshold {
		blow = fuzz > 500.0 * noiseThreshold && over > 10 && rolloff < 55 && centroid > 8.0
	if energy > 500.0*noiseThreshold {
		blow = fuzz > 500.0*noiseThreshold && over > 10 && rolloff < 55 && centroid > 8.0
	}
	if energy > 5.0 * noiseThreshold {
	if energy > 5.0*noiseThreshold {
		hiss = whisp > 0.82 && skewness > -2.7 && skewness < -0.3 && buzz < 0.021
		shush = whisp > 0.24 && whisp < 0.8 && skewness > -0.4 && skewness < 1.65 && moment2 > 96000.0 && buzz < 0.07
	}
	var blowMore, hissMore, shushMore bool
	if energy > 1.0 * noiseThreshold {
		blowMore = fuzz > 80.0 * noiseThreshold
	if energy > 1.0*noiseThreshold {
		blowMore = fuzz > 80.0*noiseThreshold
		hissMore = whisp > 0.6 && skewness > -2.7 && skewness < -0.15
		shushMore = whisp > 0.1 && whisp < 0.9 && skewness > -0.5 && skewness < 1.8
	}

M numen.go => numen.go +33 -28
@@ 7,6 7,7 @@ import (
	"fmt"
	"git.sr.ht/~geb/numen/vox"
	"git.sr.ht/~geb/opt"
	vosk "github.com/alphacep/vosk-api/go"
	"io"
	"os"
	"os/exec"


@@ 15,14 16,13 @@ import (
	"strings"
	"syscall"
	"time"
	vosk "github.com/alphacep/vosk-api/go"
)

var (
	Version string
	Version             string
	DefaultModelPackage = "vosk-model-small-en-us"
	DefaultModelPaths = "/usr/local/share/vosk-models/small-en-us /usr/share/vosk-models/small-en-us"
	DefaultPhrasesDir = "/etc/numen/phrases"
	DefaultModelPaths   = "/usr/local/share/vosk-models/small-en-us /usr/share/vosk-models/small-en-us"
	DefaultPhrasesDir   = "/etc/numen/phrases"
)

func usage() {


@@ 70,7 70,7 @@ func pipeBeingRead(path string) bool {

func writeLine(f *os.File, s string) {
	if f != nil {
		_, err := io.WriteString(f, s + "\n")
		_, err := io.WriteString(f, s+"\n")
		if err != nil {
			warn(err)
		}


@@ 91,14 91,14 @@ func init() {
		}
	}
	p += "/numen"
	err := os.MkdirAll(p, 0700)
	err := os.MkdirAll(p, 0o700)
	if err != nil {
		fatal(err)
	}
	os.Setenv("NUMEN_STATE_DIR", p)
}
func writeStateFile(name string, data []byte) {
	err := os.WriteFile(os.Getenv("NUMEN_STATE_DIR") + "/" + name, data, 0600)
	err := os.WriteFile(os.Getenv("NUMEN_STATE_DIR")+"/"+name, data, 0o600)
	if err != nil {
		warn(err)
	}


@@ 111,10 111,14 @@ type Action struct {

func knownSpecialPhrase(phrase string) bool {
	switch phrase {
	case "<complete>": return true
	case "<blow-begin>", "<blow-end>": return true
	case "<hiss-begin>", "<hiss-end>": return true
	case "<shush-begin>", "<shush-end>": return true
	case "<complete>":
		return true
	case "<blow-begin>", "<blow-end>":
		return true
	case "<hiss-begin>", "<hiss-end>":
		return true
	case "<shush-begin>", "<shush-end>":
		return true
	}
	return false
}


@@ 253,9 257,8 @@ func handleTranscribe(h *Handler, results []vox.Result, action Action) {
}

func do(cmdRec, transRec *vox.Recognizer, handler *Handler, sentence []vox.PhraseResult, actions map[string]Action, audio []byte, phraseLog *os.File) string {

	cancel := 0
	CANCEL:
CANCEL:
	for i := range sentence {
		act, _ := actions[sentence[i].Text]
		for _, tag := range act.Tags {


@@ 275,7 278,7 @@ func do(cmdRec, transRec *vox.Recognizer, handler *Handler, sentence []vox.Phras
		transcribe := false
		for _, tag := range act.Tags {
			if tag == "transcribe" {
				 transcribe = true
				transcribe = true
			}
		}
		if transcribe {


@@ 306,13 309,13 @@ func do(cmdRec, transRec *vox.Recognizer, handler *Handler, sentence []vox.Phras

func main() {
	var opts struct {
		Audio string
		AudioLog *os.File
		Files []string
		Handler string
		Mic string
		Audio     string
		AudioLog  *os.File
		Files     []string
		Handler   string
		Mic       string
		PhraseLog *os.File
		Verbose bool
		Verbose   bool
	}
	opts.Handler = "uinput"
	{


@@ 442,7 445,7 @@ func main() {
			fatal("you need to install the " + DefaultModelPackage + " package or set $NUMEN_MODEL")
		}
		if opts.Verbose {
			fmt.Fprintln(os.Stderr, "Model: " + m)
			fmt.Fprintln(os.Stderr, "Model: "+m)
		}

		var err error


@@ 486,7 489,7 @@ func main() {
	if opts.Audio == "" {
		mic = getMic(opts.Mic)
		if opts.Verbose {
			fmt.Fprintln(os.Stderr, "Microphone: " + mic)
			fmt.Fprintln(os.Stderr, "Microphone: "+mic)
		}
		var err error
		audio, err = record(mic)


@@ 537,7 540,7 @@ func main() {
		} else {
			panic("unreachable")
		}
		defer func(){ (*handler).Close() }()
		defer func() { (*handler).Close() }()
	}

	pipe := make(chan func())


@@ 547,7 550,7 @@ func main() {
			p = "/tmp/numen-pipe"
		}
		if opts.Verbose {
			fmt.Fprintln(os.Stderr, "Pipe: " + p)
			fmt.Fprintln(os.Stderr, "Pipe: "+p)
		}

		if pipeBeingRead(p) {


@@ 557,7 560,7 @@ func main() {
		if err := os.Remove(p); err != nil && !errors.Is(err, os.ErrNotExist) {
			fatal(err)
		}
		if err := syscall.Mkfifo(p, 0600); err != nil {
		if err := syscall.Mkfifo(p, 0o600); err != nil {
			panic(err)
		}
		defer os.Remove(p)


@@ 569,7 572,7 @@ func main() {
		go func() {
			sc := bufio.NewScanner(f)
			for sc.Scan() {
				pipe <- func(){ handle(handler, sc.Text()) }
				pipe <- func() { handle(handler, sc.Text()) }
			}
			if sc.Err() != nil {
				warn(sc.Err())


@@ 584,8 587,10 @@ func main() {
	transcribing := ""
	for {
		select {
		case <-terminate: return
		case f := <- pipe: f()
		case <-terminate:
			return
		case f := <-pipe:
			f()
		default:
		}
		chunk := make([]byte, 4096)

M vox/phrasesplit/phrasesplit.go => vox/phrasesplit/phrasesplit.go +2 -2
@@ 24,7 24,7 @@ func Split(sentence string, phraseMap map[string][]string) ([]string, bool) {
	var phrases, beforeBacktrack []string
	var forks []int
	i, backtrack := 0, false
	OUTER:
OUTER:
	for ; i < len(words) || backtrack; i++ {
		skip := 0
		if backtrack {


@@ 52,7 52,7 @@ func Split(sentence string, phraseMap map[string][]string) ([]string, bool) {
					continue OUTER
				}
				n := strings.Count(args, " ") + 1
				if i + n >= len(words) {
				if i+n >= len(words) {
					continue
				}
				if args == strings.Join(words[i+1:i+n+1], " ") {

M vox/vox.go => vox/vox.go +35 -35
@@ 4,10 4,10 @@ import (
	"encoding/json"
	"errors"
	"git.sr.ht/~geb/numen/vox/phrasesplit"
	vosk "github.com/alphacep/vosk-api/go"
	"github.com/m7shapan/njson"
	"math"
	"strings"
	vosk "github.com/alphacep/vosk-api/go"
)

func init() {


@@ 19,30 19,30 @@ func NewModel(filepath string) (*vosk.VoskModel, error) {
}

type PhraseResult struct {
	Text string
	Text       string
	Confidence float64
	Start, End int
}

type Result struct {
	Text string
	Phrases []PhraseResult
	Confidence float64
	Text           string
	Phrases        []PhraseResult
	Confidence     float64
	Valid, Partial bool
}

type Recognizer struct {
	VoskRecognizer *vosk.VoskRecognizer
	phraseMap map[string][]string
	VoskRecognizer        *vosk.VoskRecognizer
	phraseMap             map[string][]string
	sampleRate, byteDepth int
	bytesRead int
	Audio []byte
	finalized bool
	keyphrases bool
	bytesRead             int
	Audio                 []byte
	finalized             bool
	keyphrases            bool
}

func NewRecognizer(model *vosk.VoskModel, sampleRate, bitDepth int, phrases []string) (*Recognizer, error) {
	if bitDepth % 8 != 0 {
	if bitDepth%8 != 0 {
		panic("bitDepth must be a multiple of eight")
	}
	var r *vosk.VoskRecognizer


@@ 63,7 63,7 @@ func NewRecognizer(model *vosk.VoskModel, sampleRate, bitDepth int, phrases []st
		}
	}
	p := phrasesplit.Parse(phrases)
	return &Recognizer{r, p, sampleRate, bitDepth/8, 0, nil, false, false}, nil
	return &Recognizer{r, p, sampleRate, bitDepth / 8, 0, nil, false, false}, nil
}

func (r *Recognizer) Free() {


@@ 104,9 104,9 @@ func (r *Recognizer) SetWords(b bool) {

func (r *Recognizer) index(time float64) int {
	rate := float64(r.sampleRate * r.byteDepth)
	i := time * rate - float64(r.bytesRead - len(r.Audio))
	i := time*rate - float64(r.bytesRead-len(r.Audio))
	// round to byteDepth multiple
	i = math.Round(i / float64(r.byteDepth)) * float64(r.byteDepth)
	i = math.Round(i/float64(r.byteDepth)) * float64(r.byteDepth)

	if i < 0 {
		return 0


@@ 119,29 119,29 @@ func (r *Recognizer) index(time float64) int {

func (r *Recognizer) parseVoskResults(json string) []Result {
	type ResultJson struct {
		Text string `njson:"text"`
		Words []string `njson:"result.#.word"`
		Confs []float64 `njson:"result.#.conf"`
		Starts []float64 `njson:"result.#.start"`
		Ends []float64 `njson:"result.#.end"`
		Confidence float64 `njson:"confidence"`  // only with alternatives
		Text       string    `njson:"text"`
		Words      []string  `njson:"result.#.word"`
		Confs      []float64 `njson:"result.#.conf"`
		Starts     []float64 `njson:"result.#.start"`
		Ends       []float64 `njson:"result.#.end"`
		Confidence float64   `njson:"confidence"` // only with alternatives
	}
	var s struct {
		Alternatives []ResultJson `njson:"alternatives"`

		// copy paste of ResultJson
		Text string `njson:"text"`
		Words []string `njson:"result.#.word"`
		Confs []float64 `njson:"result.#.conf"`
		Starts []float64 `njson:"result.#.start"`
		Ends []float64 `njson:"result.#.end"`
		Confidence float64 `njson:"confidence"`  // only with alternatives
		Text       string    `njson:"text"`
		Words      []string  `njson:"result.#.word"`
		Confs      []float64 `njson:"result.#.conf"`
		Starts     []float64 `njson:"result.#.start"`
		Ends       []float64 `njson:"result.#.end"`
		Confidence float64   `njson:"confidence"` // only with alternatives

		ParText string `njson:"partial"`
		ParWords []string `njson:"partial_result.#.word"`
		ParConfs []float64 `njson:"partial_result.#.conf"`
		ParText   string    `njson:"partial"`
		ParWords  []string  `njson:"partial_result.#.word"`
		ParConfs  []float64 `njson:"partial_result.#.conf"`
		ParStarts []float64 `njson:"partial_result.#.start"`
		ParEnds []float64 `njson:"partial_result.#.end"`
		ParEnds   []float64 `njson:"partial_result.#.end"`
	}
	err := njson.Unmarshal([]byte(json), &s)
	if err != nil {


@@ 157,7 157,7 @@ func (r *Recognizer) parseVoskResults(json string) []Result {
			for p := range results[a].Phrases {
				results[a].Phrases[p] = PhraseResult{
					s.Alternatives[a].Words[p],
					-1,  // conf isn't given
					-1, // conf isn't given
					r.index(s.Alternatives[a].Starts[p]),
					r.index(s.Alternatives[a].Ends[p]),
				}


@@ 167,7 167,7 @@ func (r *Recognizer) parseVoskResults(json string) []Result {
	}
	if len(s.Text) > 0 {
		result := Result{Text: s.Text}
		result.Confidence = -1  // confidence isn't given
		result.Confidence = -1 // confidence isn't given
		result.Phrases = make([]PhraseResult, len(s.Words))
		for p := range result.Phrases {
			result.Phrases[p] = PhraseResult{


@@ 178,7 178,7 @@ func (r *Recognizer) parseVoskResults(json string) []Result {
		return []Result{result}
	}
	result := Result{Text: s.ParText, Partial: true}
	result.Confidence = -1  // confidence isn't given
	result.Confidence = -1 // confidence isn't given
	result.Phrases = make([]PhraseResult, len(s.ParWords))
	for p := range result.Phrases {
		result.Phrases[p] = PhraseResult{


@@ 208,7 208,7 @@ func (r *Recognizer) parseResults(json string) []Result {
			if n > 1 {
				text := results[ri].Phrases[pi].Text
				conf := results[ri].Phrases[pi].Confidence
				for _, p := range results[ri].Phrases[pi+1:pi+n] {
				for _, p := range results[ri].Phrases[pi+1 : pi+n] {
					text += " " + p.Text
					conf += p.Confidence
				}