Last active
March 31, 2025 20:05
-
-
Save fomightez/4d63cb48f2969b2fdbf3407ca64d4d2c to your computer and use it in GitHub Desktop.
Revisions
-
fomightez revised this gist
Mar 31, 2025 . No changes.There are no files selected for viewing
-
fomightez revised this gist
Mar 31, 2025 . No changes.There are no files selected for viewing
-
fomightez revised this gist
Mar 31, 2025 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ df = pd.DataFrame({ "Gene_name": ["sdsR", "arrS","gadF"], "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF, mdtE, gadF, gadE'], }) # Convert Gene_name column to a set for quick lookup gene_set = set(df['Gene_name'].dropna()) # Drop NaNs from Gene_name to avoid issues @@ -18,12 +18,12 @@ def highlight_genes(row): if pd.isna(genes_str): return None # Keep NaNs as is genes_list = genes_str.split(', ') # Split into list #print(genes_list) # Add asterisks to genes NOT in the Gene_name column highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list] #print(highlighted_list) return ', '.join(highlighted_list) # Join back into string -
fomightez created this gist
Mar 31, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,50 @@ import pandas as pd import numpy as np import matplotlib as mpl import re df = pd.DataFrame({ "Gene_name": ["sdsR", "arrS","gadF"], "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF,mdtE,gadF,gadE'], }) # Convert Gene_name column to a set for quick lookup gene_set = set(df['Gene_name'].dropna()) # Drop NaNs from Gene_name to avoid issues # Function to highlight genes def highlight_genes(row): genes_str = row['Genes_in_same_transcription_unit'] # Handle NaN or missing values if pd.isna(genes_str): return None # Keep NaNs as is genes_list = genes_str.split(',') # Split into list print(genes_list) # Add asterisks to genes NOT in the Gene_name column highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list] print(highlighted_list) return ', '.join(highlighted_list) # Join back into string # Apply function df['Highlighted_Genes'] = df.apply(highlight_genes, axis=1) # Function to highlight words inside asterisks def highlight_genes(val): if not isinstance(val, str): # Ensure it's a string return val # Return as is (preserves NaN or other values) def replace_func(match): return f'<span style="color: red;">{match.group(1)}</span>' # Replace text between * * with a red-colored span tag highlighted_text = re.sub(r'\*(.*?)\*', replace_func, val) return highlighted_text # Apply the function using Styler df_styled = df.style.format({'Highlighted_Genes': lambda x: highlight_genes(x)}) # Display in Jupyter Notebook df_styled