Last active
April 2, 2025 02:27
-
-
Save lmmx/0970a01295e12531f6a3f0ac5537e0b8 to your computer and use it in GitHub Desktop.
Revisions
-
lmmx revised this gist
Sep 27, 2022 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -44,4 +44,5 @@ def td_time_format(td): for order in speaker_order for speaker, (start_idx, stop_idx) in order.items() ] rollup_df = df.from_records(rollup_records) # rollup_df["stype"] = rollup_df.stype.replace("SPEAKER_00", "Name0").replace("SPEAKER_01", "Name1").replace("SPEAKER_02", "Name2").replace("SPEAKER_03", "Name3") -
lmmx revised this gist
Sep 27, 2022 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -23,8 +23,7 @@ def td_time_format(td): # via https://stackoverflow.com/a/71214440/2668831 speaker_runs = { speaker: [ np.array(grp)[[0,-1]].tolist() for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)] for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items() } -
lmmx revised this gist
Sep 27, 2022 . 1 changed file with 27 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,23 +7,42 @@ # RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf with open("foo.rttm", "w") as rttm: diarization.write_rttm(rttm) import pandas as pd df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split()) def td_time_format(td): parts = td.components return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}" df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format) df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format) # Get consecutive speaker runs, or single points # via https://stackoverflow.com/a/71214440/2668831 speaker_runs = { speaker: [ # np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist() np.array([int(g) for g in grp])[[0,-1]].tolist() for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)] for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items() } # 'Roll up' the timestamps over consecutive runs by inverting the dict speaker_order = sorted( [{speaker: run} for speaker, runs in speaker_runs.items() for run in runs], key=lambda d: [*d.values()] ) rollup_records = [ { "tbeg": df.tbeg[start_idx], "tdur": df.tbeg[stop_idx] + df.tdur[stop_idx] - df.tbeg[start_idx], "stype": df.stype[start_idx], "tbeg_fmt": df.tbeg_fmt[start_idx], "tend_fmt": df.tend_fmt[stop_idx], } for order in speaker_order for speaker, (start_idx, stop_idx) in order.items() ] rollup_df = df.from_records(rollup_records) -
lmmx created this gist
Sep 27, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,29 @@ # ffmpeg -i foo.m4a foo.wav from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization") diarization = pipeline("foo.wav") # RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf with open("foo.rttm", "w") as rttm: diarization.write_rttm(rttm) import pandas as pd df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split()) def td_time_format(td): parts = td.components return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}" df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format) df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format) # Get consecutive speaker runs, or single points # via https://stackoverflow.com/a/71214440/2668831 speaker_runs = { speaker: [ np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist() for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)] for speaker, group in x.groups.items() }