Skip to content

Instantly share code, notes, and snippets.

@lmmx
Last active April 2, 2025 02:27
Show Gist options
  • Select an option

  • Save lmmx/0970a01295e12531f6a3f0ac5537e0b8 to your computer and use it in GitHub Desktop.

Select an option

Save lmmx/0970a01295e12531f6a3f0ac5537e0b8 to your computer and use it in GitHub Desktop.

Revisions

  1. lmmx revised this gist Sep 27, 2022. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion diarise.py
    Original file line number Diff line number Diff line change
    @@ -44,4 +44,5 @@ def td_time_format(td):
    for order in speaker_order
    for speaker, (start_idx, stop_idx) in order.items()
    ]
    rollup_df = df.from_records(rollup_records)
    rollup_df = df.from_records(rollup_records)
    # rollup_df["stype"] = rollup_df.stype.replace("SPEAKER_00", "Name0").replace("SPEAKER_01", "Name1").replace("SPEAKER_02", "Name2").replace("SPEAKER_03", "Name3")
  2. lmmx revised this gist Sep 27, 2022. 1 changed file with 1 addition and 2 deletions.
    3 changes: 1 addition & 2 deletions diarise.py
    Original file line number Diff line number Diff line change
    @@ -23,8 +23,7 @@ def td_time_format(td):
    # via https://stackoverflow.com/a/71214440/2668831
    speaker_runs = {
    speaker: [
    # np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist()
    np.array([int(g) for g in grp])[[0,-1]].tolist()
    np.array(grp)[[0,-1]].tolist()
    for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)]
    for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items()
    }
  3. lmmx revised this gist Sep 27, 2022. 1 changed file with 27 additions and 8 deletions.
    35 changes: 27 additions & 8 deletions diarise.py
    Original file line number Diff line number Diff line change
    @@ -7,23 +7,42 @@

    # RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf
    with open("foo.rttm", "w") as rttm:
    diarization.write_rttm(rttm)
    diarization.write_rttm(rttm)

    import pandas as pd
    df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split())

    def td_time_format(td):
    parts = td.components
    return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}"
    parts = td.components
    return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}"

    df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format)
    df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format)

    # Get consecutive speaker runs, or single points
    # via https://stackoverflow.com/a/71214440/2668831
    speaker_runs = {
    speaker: [
    np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist()
    for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)]
    for speaker, group in x.groups.items()
    }
    speaker: [
    # np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist()
    np.array([int(g) for g in grp])[[0,-1]].tolist()
    for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)]
    for speaker, group in df.groupby("stype").agg("tbeg_fmt").groups.items()
    }

    # 'Roll up' the timestamps over consecutive runs by inverting the dict
    speaker_order = sorted(
    [{speaker: run} for speaker, runs in speaker_runs.items() for run in runs],
    key=lambda d: [*d.values()]
    )
    rollup_records = [
    {
    "tbeg": df.tbeg[start_idx],
    "tdur": df.tbeg[stop_idx] + df.tdur[stop_idx] - df.tbeg[start_idx],
    "stype": df.stype[start_idx],
    "tbeg_fmt": df.tbeg_fmt[start_idx],
    "tend_fmt": df.tend_fmt[stop_idx],
    }
    for order in speaker_order
    for speaker, (start_idx, stop_idx) in order.items()
    ]
    rollup_df = df.from_records(rollup_records)
  4. lmmx created this gist Sep 27, 2022.
    29 changes: 29 additions & 0 deletions diarise.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    # ffmpeg -i foo.m4a foo.wav

    from pyannote.audio import Pipeline

    pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization")
    diarization = pipeline("foo.wav")

    # RTTM format https://catalog.ldc.upenn.edu/docs/LDC2004T12/RTTM-format-v13.pdf
    with open("foo.rttm", "w") as rttm:
    diarization.write_rttm(rttm)

    import pandas as pd
    df = pd.read_csv("foo.rttm", sep=" ", header=None, usecols=[3,4,7], names="tbeg tdur stype".split())

    def td_time_format(td):
    parts = td.components
    return f"{parts.minutes}:{parts.seconds:02}.{parts.milliseconds:03}"

    df["tbeg_fmt"] = pd.to_timedelta(df.tbeg, unit="s").apply(td_time_format)
    df["tend_fmt"] = pd.to_timedelta(df.tbeg + df.tdur, unit="s").apply(td_time_format)

    # Get consecutive speaker runs, or single points
    # via https://stackoverflow.com/a/71214440/2668831
    speaker_runs = {
    speaker: [
    np.unique(np.array([int(g) for g in grp])[[0,-1]]).tolist()
    for grp in np.split(group, np.where(np.diff(group) != 1)[0]+1)]
    for speaker, group in x.groups.items()
    }