neuromaancer · September 26, 2022 14:16 · Sep 26, 2022 · Sep 26, 2022 · Sep 26, 2022 · Sep 26, 2022
diff --git a/utils.py b/utils.py
@@ -68,12 +68,15 @@ def round_timestamps(
     Returns:
         DataFrame: new df with rounded timestamps
     """
+    # df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
+    #     pd.Timestamp.ceil, args=(level,).dt.time
+    # )
     df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
-        pd.Timestamp.ceil, args=(level,).dt.time
-    )
+        pd.Timestamp.ceil, args=(level,)
+    ).dt.time
     df[end] = pd.to_datetime(df[end], format="%H:%M:%S.%f").apply(
-        pd.Timestamp.ceil, args=(level,).dt.time
-    )
+        pd.Timestamp.ceil, args=(level,)
+    ).dt.time
     return df
 
 

diff --git a/utils.py b/utils.py
@@ -77,7 +77,9 @@ def round_timestamps(
     return df
 
 
-def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dict:
+def get_role_pair(
+    df: pd.DataFrame, period: str, dyad: str, session: str, key="p"
+) -> dict:
     """
     get_role_pair get role pair by period and dyad and session
 
@@ -95,8 +97,11 @@ def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dic
     ]
     role_pair = r["Role"].tolist()
     participant_pair = r["Participant"].tolist()
-    dic = dict(zip(participant_pair, role_pair))
-    return dic
+    if key == "p":
+        return dict(zip(participant_pair, role_pair))
+    elif key == "r":
+        return dict(zip(role_pair, participant_pair))
+
 
 
 def rename_file(f: Path, addition: str, position: str = "postfix") -> Path:

diff --git a/utils.py b/utils.py
@@ -1 +1,208 @@
-‎‎
+"""
+@Created Date: Friday February 4th 2022
+@Author: Alafate Abulimiti
+@Company: INRIA
+@Lab: CoML/Articulab
+@School: PSL/ENS
+@Description: Save the frequent useful functions
+--------------
+@HISTORY:
+Date						 By				            Comments
+----------------			 -----			            -----------------------------------------------------------------
+
+13-02-2022 06:12:32			 Alafate Abulimiti			add get_segments function
+
+13-02-2022 05:47:41			 Alafate Abulimiti			add check_identity function
+
+7-02-2022 01:25:50			 Alafate Abulimiti			modify extract dyad and session function with regex
+
+4-02-2022 03:19:36			 Alafate Abulimiti			add insert_row function
+
+4-02-2022 02:01:23			 Alafate Abulimiti			add rename file function
+
+4-02-2022 01:29:37			Alafate			            add get role pair function
+
+4-02-2022 12:49:8			Alafate			            add round timestamps function
+
+4-02-2022 11:48:45			Alafate			            add extract dyad session from a string
+
+"""
+import pandas as pd
+from pathlib import Path
+from rich import print as rprint
+
+import re
+
+
+def extract_dyad_session(s):
+    """
+    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
+
+    Args:
+        s (str): string
+
+    Returns:
+        int: dyad and session in int format
+    """
+    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
+    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")
+
+    return int(dyad), int(session)
+
+
+def round_timestamps(
+    df: pd.DataFrame,
+    begin: str = "Begin Time - hh:mm:ss.ms",
+    end: str = "End Time - hh:mm:ss.ms",
+    level: str = "100ms",
+) -> pd.DataFrame:
+    """
+    round_timestamps round the timestamps of for dataframe
+
+    Args:
+        df (DataFrame): dataframe with timestamps, normally it is a transcript file or an annotation file.
+        begin (str, optional): Begin time column name. Defaults to "Begin Time - hh:mm:ss.ms".
+        end (str, optional): End time column name. Defaults to "End Time - hh:mm:ss.ms".
+        level (str, optional): Round level. Defaults to "100ms".
+
+    Returns:
+        DataFrame: new df with rounded timestamps
+    """
+    df[begin] = pd.to_datetime(df[begin], format="%H:%M:%S.%f").apply(
+        pd.Timestamp.ceil, args=(level,).dt.time
+    )
+    df[end] = pd.to_datetime(df[end], format="%H:%M:%S.%f").apply(
+        pd.Timestamp.ceil, args=(level,).dt.time
+    )
+    return df
+
+
+def get_role_pair(df: pd.DataFrame, period: str, dyad: str, session: str) -> dict:
+    """
+    get_role_pair get role pair by period and dyad and session
+
+    Args:
+        df (pd.DataFrame): df with period, dyad, session,and role
+        period (str): period -> T: task, S: social
+        dyad (str): dyad
+        session (str): session: 1, 2
+
+    Returns:
+        dict: role pair dictionary, key is person, value is role.
+    """
+    r = df.loc[
+        (df["Dyad"] == dyad) & (df["Session"] == session) & (df["Period"] == period)
+    ]
+    role_pair = r["Role"].tolist()
+    participant_pair = r["Participant"].tolist()
+    dic = dict(zip(participant_pair, role_pair))
+    return dic
+
+
+def rename_file(f: Path, addition: str, position: str = "postfix") -> Path:
+    """
+    rename_file rename a file with a postfix or prefix
+
+    Args:
+        f (Path): File path
+        addition (str): Additional string
+        position (str, optional): Additional string postion. Defaults to "postfix".
+
+    Returns:
+        Path:  Renamed file `pathlib` Path object
+    """
+    if position == "postfix":
+        return Path(*f.parts[:-1]) / Path(f.stem + "_" + addition + f.suffix)
+    elif position == "prefix":
+        return Path(*f.parts[:-1]) / Path(addition + "_" + f.stem + f.suffix)
+
+
+def insert_row(df: pd.DataFrame, row_number: int, row_value: dict) -> pd.DataFrame:
+    """
+    insert_row insert a row in a dataframe at a given row number
+
+    Args:
+        df (pd.DataFrame): target dataframe
+        row_number (int): row number
+        row_value (dict): row value with dict format {column_name: value}
+
+    Returns:
+        pd.DataFrame: new dataframe with inserted row
+    """
+    # Slice the upper half of the dataframe
+    df1 = df[0:row_number]
+
+    # Store the result of lower half of the dataframe
+    df2 = df[row_number:]
+
+    # Insert the row in the upper half dataframe
+    df1.loc[row_number] = row_value
+
+    # Concat the two dataframes
+    df_result = pd.concat([df1, df2])
+
+    # Reassign the index labels
+    df_result.index = [*range(df_result.shape[0])]
+
+    # Return the updated dataframe
+    return df_result
+
+
+def check_identity(
+    rapport_df: pd.DataFrame,
+    line: int,
+    reference: int,
+    cols: list[str] = ["Dyad", "Session"],
+):
+    """
+    check_identity check if a transcript line and a reference line in the same session with same dyad.
+
+    Args:
+        rapport_df (pd.DataFrame): 2016 dataframe with rapport annotations
+        line (int): transcript row index number
+        reference (int): reference row index number
+        cols (list, optional): Defaults to ["Dyad", "Session"].
+
+    Returns:
+        bool: if a transcript line and a reference line in the same session with same dyad, return True, else False.
+    """
+    return (
+        False
+        if False
+        in ((rapport_df.iloc[line][cols] == rapport_df.iloc[reference][cols]).tolist())
+        else True
+    )
+
+
+def get_segments(segment_idx: list(int)):
+    """
+    get segments from a list of segment index.
+    example: [1, 2, 3, 5, 6, 8] -> [[1,2,3], [5,6], [8]]
+
+    Args:
+        segment_idx (list): list with indexes.
+
+    Returns:
+        list(list(int)): a list of segments index with list of int format.
+    """
+    segments = []
+    if len(segment_idx) != 0:
+        tmp = [segment_idx[0]]
+        seg = segment_idx[1:]
+        for i, item in enumerate(seg):
+            if item - 1 == tmp[-1]:
+                tmp.append(item)
+            else:
+                segments.append(tmp)
+                tmp = []
+                tmp.append(item)
+    return segments
+
+
+if __name__ == "__main__":
+
+    l1 = ["Tutor", "Tutee"]
+    l2 = ["P1", "P2"]
+
+    d = dict(zip(l1, l2))
+    rprint(d)
diff --git a/utils.py b/utils.py
@@ -1 +1 @@
-]
+‎‎
diff --git a/utils.py b/utils.py
@@ -1 +1 @@
-p
+]
diff --git a/utils.py b/utils.py
@@ -1,17 +1 @@
-def extract_dyad_session(s):
-    """
-    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
-
-    Args:
-        s (str): string
-
-    Returns:
-        int: dyad and session in int format
-    """
-    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
-    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")
-
-    return int(dyad), int(session)
-
-
-
+p
diff --git a/utils.py b/utils.py
@@ -12,3 +12,6 @@ def extract_dyad_session(s):
     session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")
 
     return int(dyad), int(session)
+
+
+
diff --git a/utils.py b/utils.py
@@ -11,4 +11,4 @@ def extract_dyad_session(s):
     dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
     session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")
 
-    return int(dyad), int(session)
+    return int(dyad), int(session)
diff --git a/utils.py b/utils.py
@@ -1 +1,14 @@
-‎‎
+def extract_dyad_session(s):
+    """
+    extract_dyad_session extract dyad and session from a string with "_" as the delimiter
+
+    Args:
+        s (str): string
+
+    Returns:
+        int: dyad and session in int format
+    """
+    dyad = re.search(r"D[0-9]{1,2}", s).group(0).replace("D", "")
+    session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")
+
+    return int(dyad), int(session)
diff --git a/utils.py b/utils.py
@@ -0,0 +1 @@
+‎‎
Original file line number	Diff line number	Diff line change
		@@ -12,3 +12,6 @@ def extract_dyad_session(s):
		session = re.search(r"S[0-9]{1}", s).group(0).replace("S", "")

		return int(dyad), int(session)