Neural Sketch

Moshi / Mimi

examples/mimi.tex
\begin{nskFigure}[]
	\nskBlock*[id=audio_i, text-south=24kHz, embed-gfx={../assets/wave_24khz.pdf}, \wave]
	\nskMeasure[from=\nskID{1}.south, to=\nskID{1}.north, axis=vertical, into=h]
 
  % encoder ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
	\nskContainer*[id=enc, last-pos={right=}, \enc]{
    \nskBlock*[id=cb, text-center=Convnet, \b]
    \inodes{cb}
    \nskBlock*[id=tb, text-center=Transformer, pos={right=of \nskID{3}}, \b]
	}
	\inodes-{tb}
  \nskConnect*[from=audio_i.east, to=enc3.west, arrow-type=dashed, \ttip]
	% conn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \foreach \i in {3,5,...,7} {
    \nskConnect*[to=\nskID!{\i}, from={\nskID!{2}.east |- \nskID!{\i}}, \ttip]
    \nskConnect*[from=\nskID!{\i}, to={\nskID!{8}.west |- \nskID!{\i}}, \ttip]
  }
  \foreach \i in {10,12,...,14} {
    \nskConnect*[to=\nskID!{\i}, from={\nskID!{8}.east |- \nskID!{\i}}, \ttip]
  }
  % quantization ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \nskContainer*[id=qc, pos={right=of ib5}, \quant]{
    \nskBlock*[id=sn0, \sn]
	  \foreach \i in {1,...,7} {\nskBlock*[id={sn\i}, last-pos={below=2mm}, \sn]}
    % left --v ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
    \nskBlock*[id=vq, text-center=VQ, pos={left=2cm of \nskID{6}}, shift-y=0mm, \t]
    \nskBlock*[last-pos={left=.5cm}, \sn]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
    \nskBlock*[id=lin1, text-center=Lin., last-pos={left=.5cm}, \t]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
 
    \nskWrap[under=\nskID{1}]
 
    \nskBlock*[id=lin2, text-center=Lin., \t]
    \nskBlock*[last-pos={right=.5cm}, \sn]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
    \nskBlock*[id=rvq, text-center=RVQ, last-pos={right=.5cm}, \t]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
    % right --v  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
    \nskBlock*[\add, id=add1,  pos={right=1.8cm of sn4}]
    \nskBlock*[id=lin3, text-center=Lin., pos={right=6cm of vq}, \t]
    \nskBlock*[last-pos={right=}, \n]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
    \nskBlock*[id=lin4, text-center=Lin., pos={right=6cm of rvq}, \t]
    \nskBlock*[last-pos={right=}, \n]
    \nskConnect[from=\nskID{1}, to=\nskID{2}]
    \nskBlock*[\add, id=add2,  last-pos={above right=.2cm and 1cm}]
    \nskConnect[from=\nskID{2}.east, to=add2.west, bend-type=single, bend-direction=right, bend-distance=5mm]
    \nskConnect[from=\nskID{4}.east, to=add2.west, bend-type=single, bend-direction=right, bend-distance=5mm]
  }
  % conn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \nskConnect*[from=ib5.east, to=lin1.west, bend-type=straight, bend-direction=right, bend-distance=5mm, \ttip]
  \nskConnect*[from=ib5.east, to=lin2.west, bend-type=straight, bend-direction=right, bend-distance=5mm, \ttip]
  \nskConnect[from=vq.east, to=sn0.west, bend-type=straight, bend-direction=right, bend-distance=5mm, corner-radius=3mm]
 
	  % bend alt. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
	  % \nskConnect[from=vq.east, to=sn0.west, bend-type=single, bend-direction=right, bend-distance=5mm, corner-radius=5mm]
	  % \nskConnect[from=rvq.east, to=sn5.west]
	  % \foreach \i in {1,...,4} {\nskConnect[from=rvq.east, to=sn\i.west, bend-type=single, bend-direction=right]}
	  % \foreach \i in {6,...,7} {\nskConnect[from=rvq.east, to=sn\i.west, bend-type=single, bend-direction=right]}
	  % \foreach \i in {1,...,7} {\nskConnect[from=sn\i.east, to=add1.west, bend-type=single, bend-direction=right]}
	  % bend alt. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
 
  \foreach \i in {1,...,7} {
    \nskConnect[from=rvq.east, to=sn\i.west, bend-type=straight, bend-direction=right, bend-distance=5mm]
    \nskConnect*[from=sn\i.east, to=add1.west, bend-type=straight, bend-direction=right, bend-distance=5mm, \ttip]
  }
  \nskConnect[from=sn0.east, to=lin3.west, bend-type=straight, bend-direction=right, bend-distance=5mm]
  \nskConnect[from=add1.east, to=lin4.west, bend-type=straight, bend-direction=right, bend-distance=3mm]
 
  % decoder ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \nskContainer*[id=dec, pos={right=3cm of qc5}, \dec]{
    \nskBlock*[id=tb, text-center=Transformer, \b] % <- autoincremented
    \inodes{tb2}
    \nskBlock*[id=cb, text-center=Convnet, pos={right=of \nskID{3}}, \b]
	}
	\inodes-!{tb2}
	% conn ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \foreach \i in {7,...,9} {
    \nskConnect*[to=ib\i, from={tb2.east |- ib\i}, \ttip]
    \nskConnect*[from=ib\i, to={cb2.west |- ib\i}, \ttip]
  }
  \foreach \i in {10,...,12} {\nskConnect*[from=ib\i, to={tb2.west |- ib\i}, \ttip]}
  \nskConnect*[from=add2, to=ib11, \ttip]
 
  \nskBlock*[id=audio_o, pos={right=of dec3}, text-south=24kHz, embed-gfx={../assets/wave_24khz.pdf}, \wave]
  \nskConnect*[from=dec3, to=audio_o, arrow-type=dashed, \ttip]
 
  % training ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ <<<
  \nskContainer*[pos={above=of qc5.north}, anchor=south, \trn]{
    \nskBlock[id=wavlm, text-center={\nskIcon[name=audio-lines]\\WavLM}, text-south=\textbf{Distillation}, width=0cm, padding-y=8mm, fill=nskSecondaryAccent, padding-x=8mm]
    \nskBlock[id=cs, text-center=Cosine\\Similarity, last-pos={right=5cm}, shift-y=-5mm, width=0cm, padding-y=8mm]
    \nskBlock[id=lin, text-center=Lin., last-pos={below left=-11mm and 8mm}, width=0cm, padding-y=4mm]
    \nskMark[id=cst, at=cs.west, shift-y=4mm]
    \nskMark[id=csb, at=cs.west, shift-y=-4mm]
 
    \nskConnect*[id=wcp, from=wavlm, to=cst, \ttip]
    \nskConnect*[from={lin.east |- csb}, to=csb, \ttip]
    \nskMarkPath[at=.5]{wcp}{
      \nskMark[]
      \nskBlock[
        pos={at={(\nskID{1})}},
        type=icon, height=1mm, width=1mm, text-north-style={yshift=-3mm},
        text-north={\nskIcon[name=audio-waveform]\quad\nskIcon[name=box]\quad\nskIcon[name=package-open]}
      ]
    }
  }
  \nskBlock[
    id=al, last-pos={above=5mm}, text-center=Adversarial Losses,
    width=0cm, fill=nskRed,
  ]
  \nskConnect[from=sn0, to=lin, bend-type=double, bend-direction=up]
 
  \nskConnect*[from=audio_i.north, to=wavlm.west, bend-type=single, bend-direction=up, corner-radius=3cm, \ttip ]
  \nskConnect*[from=audio_i.north, to=al.west, bend-type=single, bend-direction=up, corner-radius=5cm, \ttip]
 
  \nskConnect*[from=audio_o.north, to=al.east, bend-type=single, bend-direction=up, corner-radius=3cm, \ttip]
 
\end{nskFigure}