From e5ec61a8e4ddf491ede3d493bac26f188d3443b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20F=2E=20Mu=C3=B1oz?= Date: Tue, 14 Jan 2025 20:14:55 -0500 Subject: [PATCH 1/3] fix(rr): adjust external links for gallery --- libs/jump_rr/src/jump_rr/galleries.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/libs/jump_rr/src/jump_rr/galleries.py b/libs/jump_rr/src/jump_rr/galleries.py index 14fc5bd..c812715 100644 --- a/libs/jump_rr/src/jump_rr/galleries.py +++ b/libs/jump_rr/src/jump_rr/galleries.py @@ -88,10 +88,7 @@ def generate_gallery(dset: str, write: bool = True) -> pl.DataFrame: df = df.with_columns( [ pl.col(jcp_col).replace(jcp_std_mapper).alias(std_outname), - pl.format( - get_formatter("external_flat"), - pl.col(jcp_col).replace(jcp_external_mapper), - ).alias(ext_links_col), + pl.col(jcp_col).replace(jcp_external_mapper).alias(ext_links_col), ] ) From 5093373cdafba922a8689534de39620b483841de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20F=2E=20Mu=C3=B1oz?= Date: Tue, 14 Jan 2025 20:16:36 -0500 Subject: [PATCH 2/3] docs(rr): update shortlinks --- libs/jump_rr/metadata/shortlinks.org | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libs/jump_rr/metadata/shortlinks.org b/libs/jump_rr/metadata/shortlinks.org index ee059c3..d28f36d 100644 --- a/libs/jump_rr/metadata/shortlinks.org +++ b/libs/jump_rr/metadata/shortlinks.org @@ -2,10 +2,12 @@ These are the full links that correspond to the README's shortlinks. Keeping them here makes it easier to bulk-edit and serves as a quick reference to bypass broad.io's clunky web interface. * shortlink:fulllink -crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr.parquet/content#/data/content -crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr_features.parquet/content#/data/content -orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf.parquet/content#/data/content -orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf_features.parquet/content#/data/content -compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/compound_gallery.parquet/content#/data/content -crispr_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr_gallery.parquet/content#/data/content -orf_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf_gallery.parquet/content#/data/content +crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr.parquet/content#/data/content +crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr_interpretable_features.parquet/content#/data/content +orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf.parquet/content#/data/content +orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf_interpretable_features.parquet/content#/data/content +compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/compound_gallery.parquet/content#/data/content +crispr_gallery: +https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr_gallery.parquet/content#/data/content +orf_gallery: +https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf_gallery.parquet/content#/data/content From 0d6b95f37360cd8202cb7ea3dbd5355d1cfb1000 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20F=2E=20Mu=C3=B1oz?= Date: Tue, 14 Jan 2025 22:31:54 -0500 Subject: [PATCH 3/3] change(rr): Use only low significance (p) vals in calculate_features --- libs/jump_rr/metadata/shortlinks.org | 14 +++--- .../jump_rr/src/jump_rr/calculate_features.py | 2 +- libs/jump_rr/src/jump_rr/index_selection.py | 44 +------------------ 3 files changed, 9 insertions(+), 51 deletions(-) diff --git a/libs/jump_rr/metadata/shortlinks.org b/libs/jump_rr/metadata/shortlinks.org index d28f36d..2edcbe6 100644 --- a/libs/jump_rr/metadata/shortlinks.org +++ b/libs/jump_rr/metadata/shortlinks.org @@ -2,12 +2,12 @@ These are the full links that correspond to the README's shortlinks. Keeping them here makes it easier to bulk-edit and serves as a quick reference to bypass broad.io's clunky web interface. * shortlink:fulllink -crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr.parquet/content#/data/content -crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr_interpretable_features.parquet/content#/data/content -orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf.parquet/content#/data/content -orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf_interpretable_features.parquet/content#/data/content -compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/compound_gallery.parquet/content#/data/content +crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr.parquet/content#/data/content +crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr_interpretable_features.parquet/content#/data/content +orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf.parquet/content#/data/content +orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf_interpretable_features.parquet/content#/data/content +compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/compound_gallery.parquet/content#/data/content crispr_gallery: -https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/crispr_gallery.parquet/content#/data/content +https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr_gallery.parquet/content#/data/content orf_gallery: -https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649178/files/orf_gallery.parquet/content#/data/content +https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf_gallery.parquet/content#/data/content diff --git a/libs/jump_rr/src/jump_rr/calculate_features.py b/libs/jump_rr/src/jump_rr/calculate_features.py index 5eed4e6..993f420 100644 --- a/libs/jump_rr/src/jump_rr/calculate_features.py +++ b/libs/jump_rr/src/jump_rr/calculate_features.py @@ -56,7 +56,7 @@ ) ## Parameters -n_vals_used = 20 # Number of top and bottom matches used +n_vals_used = 30 # Number of top and bottom matches used feat_decomposition = ("Compartment", "Feature", "Channel", "Suffix") ## Column names diff --git a/libs/jump_rr/src/jump_rr/index_selection.py b/libs/jump_rr/src/jump_rr/index_selection.py index 896abc6..a064159 100644 --- a/libs/jump_rr/src/jump_rr/index_selection.py +++ b/libs/jump_rr/src/jump_rr/index_selection.py @@ -39,48 +39,6 @@ def get_bottom_top_indices( ys = indices.flatten().get() return xs, ys - -def get_edge_indices(mat: cp.array, n: int, which: str = "bottom") -> tuple[cp.array]: - """ - Get the top n or bottom n indices from a matrix for each row. - - Parameters - ---------- - mat : cp.array - The input matrix. - n : int - The number of top or bottom indices to get. - which : str, optional - Whether to get 'top' or 'bottom' indices. Defaults to "bottom". - - Returns - ------- - xs : np.ndarray - The row indices of the top or bottom indices. - ys : np.ndarray - The column indices of the top or bottom indices. - - Raises - ------ - AssertionError - If `which` is not either 'top' or 'bottom'. - - """ - mask = cp.ones(mat.shape[1], dtype=bool) - - assert which in ("top", "bottom"), "which must be either top or bottom" - - if which == "bottom": - mask[n:] = False - else: - mask[: -n - 1] = False - - indices = mat.argsort(axis=1)[:, mask] - - xs = cp.indices(indices.shape)[0].flatten().get() - ys = indices.flatten().get() - return xs, ys - def get_ranks(mat: cp.array, n_vals_used: int = 20) -> tuple[np.array, list[cp.array]]: """Get a binary mask of the edges and ranks in every dimension.""" ranks = [mat.argsort(i) for i in range(mat.ndim)] @@ -89,6 +47,6 @@ def get_ranks(mat: cp.array, n_vals_used: int = 20) -> tuple[np.array, list[cp.a # Get the location of the largest/smallest values in every dimension for rank in ranks: - mask |= (rank < n_vals_used) | (rank > (rank.max() - n_vals_used)) + mask |= (rank < n_vals_used) return ([x.get() for x in cp.where(mask)], [rank[mask].get() for rank in ranks])