Publications

2023

Time-Domain Speech Separation Networks With Graph Encoding Auxiliary

Tingting Wang, Zexu Pan, Meng Ge, Zhen Yang, and Haizhou Li

IEEE Signal Processing Letters, 2023

Bib

@article{wang2023graph,
  author = {Wang, Tingting and Pan, Zexu and Ge, Meng and Yang, Zhen and Li, Haizhou},
  journal = {IEEE Signal Processing Letters},
  title = {Time-Domain Speech Separation Networks With Graph Encoding Auxiliary},
  year = {2023},
  volume = {30},
  number = {},
  pages = {110-114},
}

2022

Selective Listening by Synchronizing Speech with Lips

Zexu Pan, Ruijie Tao, Chenglin Xu, and Haizhou Li

IEEE/ACM Trans. Audio, Speech, Lang. Process., 2022

Bib

@article{pan2021reentry,
  select = {true},
  author = {Pan, Zexu and Tao, Ruijie and Xu, Chenglin and Li, Haizhou},
  journal = {IEEE/ACM Trans. Audio, Speech, Lang. Process.},
  title = {Selective Listening by Synchronizing Speech with Lips},
  year = {2022},
  volume = {30},
  number = {},
  pages = {1650-1664},
}

USEV: Universal Speaker Extraction With Visual Cue

Zexu Pan, Meng Ge, and Haizhou Li

IEEE/ACM Trans. Audio, Speech, Lang. Process., 2022

Bib

@article{usev21,
  select = {true},
  author = {Pan, Zexu and Ge, Meng and Li, Haizhou},
  journal = {IEEE/ACM Trans. Audio, Speech, Lang. Process.},
  title = {{USEV}: Universal Speaker Extraction With Visual Cue},
  year = {2022},
  volume = {30},
  number = {},
  pages = {3032-3045},
}

Speaker Extraction with Co-Speech Gestures Cue

Zexu Pan, Xinyuan Qian, and Haizhou Li

IEEE Signal Processing Letters, 2022

Bib

@article{pan2022seg,
  select = {true},
  author = {Pan, Zexu and Qian, Xinyuan and Li, Haizhou},
  journal = {IEEE Signal Processing Letters},
  title = {Speaker Extraction with Co-Speech Gestures Cue},
  year = {2022},
  volume = {29},
  number = {},
  pages = {1467-1471},
}

2023

ImagineNet: Target Speaker Extraction with Intermittent Visual Cue Through Embedding Inpainting

Zexu Pan, Wupeng Wang, Marvin Borsdorf, and Haizhou Li

In Proc. IEEE Int. Conf. Acoust., Speech, Signal Process., 2023

Bib

@inproceedings{pan2023imaginenet,
  select = {true},
  author = {Pan, Zexu and Wang, Wupeng and Borsdorf, Marvin and Li, Haizhou},
  booktitle = {Proc. IEEE Int. Conf. Acoust., Speech, Signal Process.},
  title = {ImagineNet: Target Speaker Extraction with Intermittent Visual Cue Through Embedding Inpainting},
  year = {2023},
}

Target Active Speaker Detection with Audio-visual Cues

Yidi Jiang, Ruijie Tao, Zexu Pan, and Haizhou Li

In Proc. INTERSPEECH, 2023

Bib

@inproceedings{jiang2023target,
  select = {true},
  author = {Jiang, Yidi and Tao, Ruijie and Pan, Zexu and Li, Haizhou},
  title = {Target Active Speaker Detection with Audio-visual Cues},
  year = {2023},
  booktitle = {Proc. INTERSPEECH},
}

Speaker Extraction with Detection of Presence and Absence of Target Speakers

Ke Zhang, Marvin Borsdorf, Zexu Pan, Haizhou Li, Yangjie Wei, and Yi Wang

In Proc. INTERSPEECH, 2023

Bib

@inproceedings{zhang2023absence,
  author = {Zhang, Ke and Borsdorf, Marvin and Pan, Zexu and Li, Haizhou and Wei, Yangjie and Wang, Yi},
  title = {Speaker Extraction with Detection of Presence and Absence of Target Speakers},
  year = {2023},
  booktitle = {Proc. INTERSPEECH},
}

Rethinking the Visual Cues in Audio-visual Speaker Extraction

Junjie Li, Meng Ge, Zexu Pan, Rui Cao, Longbiao Wang, Jianwu Dang, and Shiliang Zhang

In Proc. INTERSPEECH, 2023

Bib

@inproceedings{li2023rethinking,
  author = {Li, Junjie and Ge, Meng and Pan, Zexu and Cao, Rui and Wang, Longbiao and Dang, Jianwu and Zhang, Shiliang},
  title = {Rethinking the Visual Cues in Audio-visual Speaker Extraction},
  year = {2023},
  booktitle = {Proc. INTERSPEECH},
}

2022

A Hybrid Continuity Loss to Reduce Over-Suppression for Time-domain Target Speaker Extraction

Zexu Pan, Meng Ge, and Haizhou Li

In Proc. INTERSPEECH, 2022

Bib

@inproceedings{pan2022hybrid,
  select = {true},
  author = {Pan, Zexu and Ge, Meng and Li, Haizhou},
  title = {A Hybrid Continuity Loss to Reduce Over-Suppression for Time-domain Target Speaker Extraction},
  year = {2022},
  pages = {1786--1790},
  booktitle = {Proc. INTERSPEECH},
}

VCSE: Time-Domain Visual-Contextual Speaker Extraction Network

Junjie Li, Meng Ge, Zexu Pan, Longbiao Wang, and Jianwu Dang

In Proc. INTERSPEECH, 2022

Bib

@inproceedings{tavcse2022,
  author = {Li, Junjie and Ge, Meng and Pan, Zexu and Wang, Longbiao and Dang, Jianwu},
  title = {{VCSE}: Time-Domain Visual-Contextual Speaker Extraction Network},
  year = {2022},
  pages = {906--910},
  booktitle = {Proc. INTERSPEECH},
}

2021

Muse: Multi-Modal Target Speaker Extraction with Visual Cues

Zexu Pan, Ruijie Tao, Chenglin Xu, and Haizhou Li

In Proc. IEEE Int. Conf. Acoust., Speech, Signal Process., 2021

Bib

@inproceedings{pan2020muse,
  select = {true},
  author = {Pan, Zexu and Tao, Ruijie and Xu, Chenglin and Li, Haizhou},
  booktitle = {Proc. IEEE Int. Conf. Acoust., Speech, Signal Process.},
  title = {Muse: Multi-Modal Target Speaker Extraction with Visual Cues},
  year = {2021},
  volume = {},
  number = {},
  pages = {6678-6682},
}

Is Someone Speaking? Exploring Long-term Temporal Features for Audio-visual Active Speaker Detection

Ruijie Tao, Zexu Pan, Rohan Kumar Das, Xinyuan Qian, Mike Zheng Shou, and Haizhou Li

In Proc. of the 29th ACM Int. Conf. on Multimedia, 2021

Bib

@inproceedings{tao2021someone,
  select = {true},
  title = {Is Someone Speaking? {E}xploring Long-term Temporal Features for Audio-visual Active Speaker Detection},
  author = {Tao, Ruijie and Pan, Zexu and Das, Rohan Kumar and Qian, Xinyuan and Shou, Mike Zheng and Li, Haizhou},
  booktitle = {Proc. of the 29th ACM Int. Conf. on Multimedia},
  pages = {3927--3935},
  year = {2021},
}

Multi-target DoA Estimation with an Audio-visual Fusion Mechanism

Xinyuan Qian, Maulik Madhavi, Zexu Pan, Jiadong Wang, and Haizhou Li

In Proc. IEEE Int. Conf. Acoust., Speech, Signal Process., 2021

Bib

@inproceedings{qian2021multi,
  title = {Multi-target {DoA} Estimation with an Audio-visual Fusion Mechanism},
  author = {Qian, Xinyuan and Madhavi, Maulik and Pan, Zexu and Wang, Jiadong and Li, Haizhou},
  booktitle = {Proc. IEEE Int. Conf. Acoust., Speech, Signal Process.},
  pages = {4280--4284},
  year = {2021},
}

2020

Multi-Modal Attention for Speech Emotion Recognition

Zexu Pan, Zhaojie Luo, Jichen Yang, and Haizhou Li

In Proc. INTERSPEECH, 2020

Bib

@inproceedings{pan2020multi,
  select = {true},
  author = {Pan, Zexu and Luo, Zhaojie and Yang, Jichen and Li, Haizhou},
  title = {Multi-Modal Attention for Speech Emotion Recognition},
  year = {2020},
  booktitle = {Proc. INTERSPEECH},
  pages = {364--368},
}

2023

Towards End-to-end Speaker Diarization in the Wild

Zexu Pan, Gordon Wichern, François G Germain, Aswin Subramanian, and Jonathan Le Roux

Submitted to Autom. Speech Recognit. Understanding Workshop, 2023

Bib

@article{pan2022towards,
  select = {true},
  title = {Towards End-to-end Speaker Diarization in the Wild},
  author = {Pan, Zexu and Wichern, Gordon and Germain, Fran{\c{c}}ois G and Subramanian, Aswin and Roux, Jonathan Le},
  journal = {Submitted to Autom. Speech Recognit. Understanding Workshop},
  year = {2023},
}