Skip to content

Instantly share code, notes, and snippets.

@fomightez
Last active March 31, 2025 20:05
Show Gist options
  • Select an option

  • Save fomightez/4d63cb48f2969b2fdbf3407ca64d4d2c to your computer and use it in GitHub Desktop.

Select an option

Save fomightez/4d63cb48f2969b2fdbf3407ca64d4d2c to your computer and use it in GitHub Desktop.

Revisions

  1. fomightez revised this gist Mar 31, 2025. No changes.
  2. fomightez revised this gist Mar 31, 2025. No changes.
  3. fomightez revised this gist Mar 31, 2025. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions mre_for_highlight_gene_names.py
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,7 @@

    df = pd.DataFrame({
    "Gene_name": ["sdsR", "arrS","gadF"],
    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF,mdtE,gadF,gadE'],
    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF, mdtE, gadF, gadE'],
    })
    # Convert Gene_name column to a set for quick lookup
    gene_set = set(df['Gene_name'].dropna()) # Drop NaNs from Gene_name to avoid issues
    @@ -18,12 +18,12 @@ def highlight_genes(row):
    if pd.isna(genes_str):
    return None # Keep NaNs as is

    genes_list = genes_str.split(',') # Split into list
    print(genes_list)
    genes_list = genes_str.split(', ') # Split into list
    #print(genes_list)

    # Add asterisks to genes NOT in the Gene_name column
    highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list]
    print(highlighted_list)
    #print(highlighted_list)

    return ', '.join(highlighted_list) # Join back into string

  4. fomightez created this gist Mar 31, 2025.
    50 changes: 50 additions & 0 deletions mre_for_highlight_gene_names.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,50 @@
    import pandas as pd
    import numpy as np
    import matplotlib as mpl
    import re

    df = pd.DataFrame({
    "Gene_name": ["sdsR", "arrS","gadF"],
    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF,mdtE,gadF,gadE'],
    })
    # Convert Gene_name column to a set for quick lookup
    gene_set = set(df['Gene_name'].dropna()) # Drop NaNs from Gene_name to avoid issues

    # Function to highlight genes
    def highlight_genes(row):
    genes_str = row['Genes_in_same_transcription_unit']

    # Handle NaN or missing values
    if pd.isna(genes_str):
    return None # Keep NaNs as is

    genes_list = genes_str.split(',') # Split into list
    print(genes_list)

    # Add asterisks to genes NOT in the Gene_name column
    highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list]
    print(highlighted_list)

    return ', '.join(highlighted_list) # Join back into string

    # Apply function
    df['Highlighted_Genes'] = df.apply(highlight_genes, axis=1)

    # Function to highlight words inside asterisks
    def highlight_genes(val):
    if not isinstance(val, str): # Ensure it's a string
    return val # Return as is (preserves NaN or other values)

    def replace_func(match):
    return f'<span style="color: red;">{match.group(1)}</span>'

    # Replace text between * * with a red-colored span tag
    highlighted_text = re.sub(r'\*(.*?)\*', replace_func, val)

    return highlighted_text

    # Apply the function using Styler
    df_styled = df.style.format({'Highlighted_Genes': lambda x: highlight_genes(x)})

    # Display in Jupyter Notebook
    df_styled