fomightez · March 31, 2025 20:05 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/mre_for_highlight_gene_names.py b/mre_for_highlight_gene_names.py
@@ -5,7 +5,7 @@
 
 df = pd.DataFrame({
     "Gene_name": ["sdsR", "arrS","gadF"],
-    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF,mdtE,gadF,gadE'],
+    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF, mdtE, gadF, gadE'],
 })
 # Convert Gene_name column to a set for quick lookup
 gene_set = set(df['Gene_name'].dropna())  # Drop NaNs from Gene_name to avoid issues
@@ -18,12 +18,12 @@ def highlight_genes(row):
     if pd.isna(genes_str):
         return None  # Keep NaNs as is
 
-    genes_list = genes_str.split(',')  # Split into list
-    print(genes_list)
+    genes_list = genes_str.split(', ')  # Split into list
+    #print(genes_list)
 
     # Add asterisks to genes NOT in the Gene_name column
     highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list]
-    print(highlighted_list)
+    #print(highlighted_list)
 
     return ', '.join(highlighted_list)  # Join back into string
 

diff --git a/mre_for_highlight_gene_names.py b/mre_for_highlight_gene_names.py
@@ -0,0 +1,50 @@
+import pandas as pd
+import numpy as np
+import matplotlib as mpl
+import re
+
+df = pd.DataFrame({
+    "Gene_name": ["sdsR", "arrS","gadF"],
+    "Genes_in_same_transcription_unit": ['pphA, sdsR', 'arrS','mdtF,mdtE,gadF,gadE'],
+})
+# Convert Gene_name column to a set for quick lookup
+gene_set = set(df['Gene_name'].dropna())  # Drop NaNs from Gene_name to avoid issues
+
+# Function to highlight genes
+def highlight_genes(row):
+    genes_str = row['Genes_in_same_transcription_unit']
+
+    # Handle NaN or missing values
+    if pd.isna(genes_str):
+        return None  # Keep NaNs as is
+
+    genes_list = genes_str.split(',')  # Split into list
+    print(genes_list)
+
+    # Add asterisks to genes NOT in the Gene_name column
+    highlighted_list = [f"*{gene}*" if gene not in gene_set else gene for gene in genes_list]
+    print(highlighted_list)
+
+    return ', '.join(highlighted_list)  # Join back into string
+
+# Apply function
+df['Highlighted_Genes'] = df.apply(highlight_genes, axis=1)
+
+# Function to highlight words inside asterisks
+def highlight_genes(val):
+    if not isinstance(val, str):  # Ensure it's a string
+        return val  # Return as is (preserves NaN or other values)
+
+    def replace_func(match):
+        return f'<span style="color: red;">{match.group(1)}</span>'
+
+    # Replace text between * * with a red-colored span tag
+    highlighted_text = re.sub(r'\*(.*?)\*', replace_func, val)
+
+    return highlighted_text
+
+# Apply the function using Styler
+df_styled = df.style.format({'Highlighted_Genes': lambda x: highlight_genes(x)})
+
+# Display in Jupyter Notebook
+df_styled
No results found