Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes to jump_rr tables #71

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions libs/jump_rr/metadata/shortlinks.org
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
These are the full links that correspond to the README's shortlinks. Keeping them here makes it easier to bulk-edit and serves as a quick reference to bypass broad.io's clunky web interface.

* shortlink:fulllink
crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr.parquet/content#/data/content
crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr_features.parquet/content#/data/content
orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf.parquet/content#/data/content
orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf_features.parquet/content#/data/content
compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/compound_gallery.parquet/content#/data/content
crispr_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/crispr_gallery.parquet/content#/data/content
orf_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14642120/files/orf_gallery.parquet/content#/data/content
crispr: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr.parquet/content#/data/content
crispr_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr_interpretable_features.parquet/content#/data/content
orf: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_matches.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf.parquet/content#/data/content
orf_feature: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_feature.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf_interpretable_features.parquet/content#/data/content
compound_gallery: https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/compound_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/compound_gallery.parquet/content#/data/content
crispr_gallery:
https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/crispr_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/crispr_gallery.parquet/content#/data/content
orf_gallery:
https://lite.datasette.io/?metadata=https://raw.githubusercontent.com/broadinstitute/monorepo/main/libs/jump_rr/metadata/orf_gallery.json&install=datasette-json-html&parquet=https://zenodo.org/api/records/14649344/files/orf_gallery.parquet/content#/data/content
2 changes: 1 addition & 1 deletion libs/jump_rr/src/jump_rr/calculate_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
)

## Parameters
n_vals_used = 20 # Number of top and bottom matches used
n_vals_used = 30 # Number of top and bottom matches used
feat_decomposition = ("Compartment", "Feature", "Channel", "Suffix")

## Column names
Expand Down
5 changes: 1 addition & 4 deletions libs/jump_rr/src/jump_rr/galleries.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,7 @@ def generate_gallery(dset: str, write: bool = True) -> pl.DataFrame:
df = df.with_columns(
[
pl.col(jcp_col).replace(jcp_std_mapper).alias(std_outname),
pl.format(
get_formatter("external_flat"),
pl.col(jcp_col).replace(jcp_external_mapper),
).alias(ext_links_col),
pl.col(jcp_col).replace(jcp_external_mapper).alias(ext_links_col),
]
)

Expand Down
44 changes: 1 addition & 43 deletions libs/jump_rr/src/jump_rr/index_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,48 +39,6 @@ def get_bottom_top_indices(
ys = indices.flatten().get()
return xs, ys


def get_edge_indices(mat: cp.array, n: int, which: str = "bottom") -> tuple[cp.array]:
"""
Get the top n or bottom n indices from a matrix for each row.

Parameters
----------
mat : cp.array
The input matrix.
n : int
The number of top or bottom indices to get.
which : str, optional
Whether to get 'top' or 'bottom' indices. Defaults to "bottom".

Returns
-------
xs : np.ndarray
The row indices of the top or bottom indices.
ys : np.ndarray
The column indices of the top or bottom indices.

Raises
------
AssertionError
If `which` is not either 'top' or 'bottom'.

"""
mask = cp.ones(mat.shape[1], dtype=bool)

assert which in ("top", "bottom"), "which must be either top or bottom"

if which == "bottom":
mask[n:] = False
else:
mask[: -n - 1] = False

indices = mat.argsort(axis=1)[:, mask]

xs = cp.indices(indices.shape)[0].flatten().get()
ys = indices.flatten().get()
return xs, ys

def get_ranks(mat: cp.array, n_vals_used: int = 20) -> tuple[np.array, list[cp.array]]:
"""Get a binary mask of the edges and ranks in every dimension."""
ranks = [mat.argsort(i) for i in range(mat.ndim)]
Expand All @@ -89,6 +47,6 @@ def get_ranks(mat: cp.array, n_vals_used: int = 20) -> tuple[np.array, list[cp.a

# Get the location of the largest/smallest values in every dimension
for rank in ranks:
mask |= (rank < n_vals_used) | (rank > (rank.max() - n_vals_used))
mask |= (rank < n_vals_used)

return ([x.get() for x in cp.where(mask)], [rank[mask].get() for rank in ranks])
Loading