Skip to content

Instantly share code, notes, and snippets.

@neuromaancer
Last active September 26, 2022 14:16
Show Gist options
  • Save neuromaancer/ada3534ded3c1aa6a3f39e83ef930a1f to your computer and use it in GitHub Desktop.
Save neuromaancer/ada3534ded3c1aa6a3f39e83ef930a1f to your computer and use it in GitHub Desktop.

Revisions

  1. neuromaancer revised this gist Sep 26, 2022. No changes.
  2. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 7 additions and 4 deletions.
    11 changes: 7 additions & 4 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -68,12 +68,15 @@ def round_timestamps(
    Returns:
    DataFrame: new df with rounded timestamps
    """
    # df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
    # pd.Timestamp.ceil, args=(level,).dt.time
    # )
    df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
    pd.Timestamp.ceil, args=(level,).dt.time
    )
    pd.Timestamp.ceil, args=(level,)
    ).dt.time
    df[end] = pd.to_datetime(df[end], format="%H:%M:%S.%f").apply(
    pd.Timestamp.ceil, args=(level,).dt.time
    )
    pd.Timestamp.ceil, args=(level,)
    ).dt.time
    return df


  3. neuromaancer revised this gist Sep 26, 2022. No changes.
  4. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 8 additions and 3 deletions.
    11 changes: 8 additions & 3 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -77,7 +77,9 @@ def round_timestamps(
    return df


    def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dict:
    def get_role_pair(
    df: pd.DataFrame, period: str, dyad: str, session: str, key="p"
    ) -> dict:
    """
    get_role_pair get role pair by period and dyad and session
    @@ -95,8 +97,11 @@ def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dic
    ]
    role_pair = r["Role"].tolist()
    participant_pair = r["Participant"].tolist()
    dic = dict(zip(participant_pair, role_pair))
    return dic
    if key == "p":
    return dict(zip(participant_pair, role_pair))
    elif key == "r":
    return dict(zip(role_pair, participant_pair))



    def rename_file(f: Path, addition: str, position: str = "postfix") -> Path:
  5. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 208 additions and 1 deletion.
    209 changes: 208 additions & 1 deletion utils.py
    Original file line number Diff line number Diff line change
    @@ -1 +1,208 @@
    ‎‎​
    """
    @Created Date: Friday February 4th 2022
    @Author: Alafate Abulimiti
    @Company: INRIA
    @Lab: CoML/Articulab
    @School: PSL/ENS
    @Description: Save the frequent useful functions
    --------------
    @HISTORY:
    Date By Comments
    ---------------- ----- -----------------------------------------------------------------
    13-02-2022 06:12:32 Alafate Abulimiti add get_segments function
    13-02-2022 05:47:41 Alafate Abulimiti add check_identity function
    7-02-2022 01:25:50 Alafate Abulimiti modify extract dyad and session function with regex
    4-02-2022 03:19:36 Alafate Abulimiti add insert_row function
    4-02-2022 02:01:23 Alafate Abulimiti add rename file function
    4-02-2022 01:29:37 Alafate add get role pair function
    4-02-2022 12:49:8 Alafate add round timestamps function
    4-02-2022 11:48:45 Alafate add extract dyad session from a string
    """
    import pandas as pd
    from pathlib import Path
    from rich import print as rprint

    import re


    def extract_dyad_session(s):
    """
    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
    Args:
    s (str): string
    Returns:
    int: dyad and session in int format
    """
    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

    return int(dyad), int(session)


    def round_timestamps(
    df: pd.DataFrame,
    begin: str = "Begin Time - hh:mm:ss.ms",
    end: str = "End Time - hh:mm:ss.ms",
    level: str = "100ms",
    ) -> pd.DataFrame:
    """
    round_timestamps round the timestamps of for dataframe
    Args:
    df (DataFrame): dataframe with timestamps, normally it is a transcript file or an annotation file.
    begin (str, optional): Begin time column name. Defaults to "Begin Time - hh:mm:ss.ms".
    end (str, optional): End time column name. Defaults to "End Time - hh:mm:ss.ms".
    level (str, optional): Round level. Defaults to "100ms".
    Returns:
    DataFrame: new df with rounded timestamps
    """
    df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
    pd.Timestamp.ceil, args=(level,).dt.time
    )
    df[end] = pd.to_datetime(df[end], format="%H:%M:%S.%f").apply(
    pd.Timestamp.ceil, args=(level,).dt.time
    )
    return df


    def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dict:
    """
    get_role_pair get role pair by period and dyad and session
    Args:
    df (pd.DataFrame): df with period, dyad, session,and role
    period (str): period -> T: task, S: social
    dyad (str): dyad
    session (str): session: 1, 2
    Returns:
    dict: role pair dictionary, key is person, value is role.
    """
    r = df.loc[
    (df["Dyad"] == dyad) & (df["Session"] == session) & (df["Period"] == period)
    ]
    role_pair = r["Role"].tolist()
    participant_pair = r["Participant"].tolist()
    dic = dict(zip(participant_pair, role_pair))
    return dic


    def rename_file(f: Path, addition: str, position: str = "postfix") -> Path:
    """
    rename_file rename a file with a postfix or prefix
    Args:
    f (Path): File path
    addition (str): Additional string
    position (str, optional): Additional string postion. Defaults to "postfix".
    Returns:
    Path: Renamed file `pathlib` Path object
    """
    if position == "postfix":
    return Path(*f.parts[:-1]) / Path(f.stem + "_" + addition + f.suffix)
    elif position == "prefix":
    return Path(*f.parts[:-1]) / Path(addition + "_" + f.stem + f.suffix)


    def insert_row(df: pd.DataFrame, row_number: int, row_value: dict) -> pd.DataFrame:
    """
    insert_row insert a row in a dataframe at a given row number
    Args:
    df (pd.DataFrame): target dataframe
    row_number (int): row number
    row_value (dict): row value with dict format {column_name: value}
    Returns:
    pd.DataFrame: new dataframe with inserted row
    """
    # Slice the upper half of the dataframe
    df1 = df[0:row_number]

    # Store the result of lower half of the dataframe
    df2 = df[row_number:]

    # Insert the row in the upper half dataframe
    df1.loc[row_number] = row_value

    # Concat the two dataframes
    df_result = pd.concat([df1, df2])

    # Reassign the index labels
    df_result.index = [*range(df_result.shape[0])]

    # Return the updated dataframe
    return df_result


    def check_identity(
    rapport_df: pd.DataFrame,
    line: int,
    reference: int,
    cols: list[str] = ["Dyad", "Session"],
    ):
    """
    check_identity check if a transcript line and a reference line in the same session with same dyad.
    Args:
    rapport_df (pd.DataFrame): 2016 dataframe with rapport annotations
    line (int): transcript row index number
    reference (int): reference row index number
    cols (list, optional): Defaults to ["Dyad", "Session"].
    Returns:
    bool: if a transcript line and a reference line in the same session with same dyad, return True, else False.
    """
    return (
    False
    if False
    in ((rapport_df.iloc[line][cols] == rapport_df.iloc[reference][cols]).tolist())
    else True
    )


    def get_segments(segment_idx: list(int)):
    """
    get segments from a list of segment index.
    example: [1, 2, 3, 5, 6, 8] -> [[1,2,3], [5,6], [8]]
    Args:
    segment_idx (list): list with indexes.
    Returns:
    list(list(int)): a list of segments index with list of int format.
    """
    segments = []
    if len(segment_idx) != 0:
    tmp = [segment_idx[0]]
    seg = segment_idx[1:]
    for i, item in enumerate(seg):
    if item - 1 == tmp[-1]:
    tmp.append(item)
    else:
    segments.append(tmp)
    tmp = []
    tmp.append(item)
    return segments


    if __name__ == "__main__":

    l1 = ["Tutor", "Tutee"]
    l2 = ["P1", "P2"]

    d = dict(zip(l1, l2))
    rprint(d)
  6. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion utils.py
    Original file line number Diff line number Diff line change
    @@ -1 +1 @@
    ]
    ‎‎​
  7. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion utils.py
    Original file line number Diff line number Diff line change
    @@ -1 +1 @@
    p
    ]
  8. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 1 addition and 17 deletions.
    18 changes: 1 addition & 17 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -1,17 +1 @@
    def extract_dyad_session(s):
    """
    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
    Args:
    s (str): string
    Returns:
    int: dyad and session in int format
    """
    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

    return int(dyad), int(session)



    p
  9. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -12,3 +12,6 @@ def extract_dyad_session(s):
    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

    return int(dyad), int(session)



  10. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion utils.py
    Original file line number Diff line number Diff line change
    @@ -11,4 +11,4 @@ def extract_dyad_session(s):
    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

    return int(dyad), int(session)
    return int(dyad), int(session)
  11. neuromaancer revised this gist Sep 26, 2022. 1 changed file with 14 additions and 1 deletion.
    15 changes: 14 additions & 1 deletion utils.py
    Original file line number Diff line number Diff line change
    @@ -1 +1,14 @@
    ‎‎​
    def extract_dyad_session(s):
    """
    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
    Args:
    s (str): string
    Returns:
    int: dyad and session in int format
    """
    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

    return int(dyad), int(session)
  12. neuromaancer created this gist Sep 26, 2022.
    1 change: 1 addition & 0 deletions utils.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1 @@
    ‎‎​