broadinstitute · afermg · Nov 26, 2024 · Aug 19, 2024 · Nov 26, 2024 · Nov 26, 2024
diff --git a/flake.nix b/flake.nix
@@ -1,14 +1,64 @@
 {
-  description = "A collection of flake templates";
-
-  outputs = { self }: {
-    templates = {
-      portrait = {
-        path = ./libs/jump_portrait;
-        description = "JUMP portrait development template";
-      };
-
-    };
-
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.05";
+    nixpkgs_master.url = "github:NixOS/nixpkgs/master";
+    systems.url = "github:nix-systems/default";
+    devenv.url = "github:cachix/devenv";
+  };
+  nixConfig = {
+    extra-trusted-public-keys = "devenv.cachix.org-1:w1cLUi8dv3hnoSPGAuibQv+f9TZLr6cv/Hm9XgU50cw=";
+    extra-substituters = "https://devenv.cachix.org";
   };
-}
+  outputs = { self, nixpkgs, devenv, systems, ... } @ inputs:
+    let
+      forEachSystem = nixpkgs.lib.genAttrs (import systems);
+    in
+    {
+      packages = forEachSystem (system: {
+        devenv-up = self.devShells.${system}.default.config.procfileScript;
+      });
+      devShells = forEachSystem
+        (system:
+          let
+            pkgs = import nixpkgs {
+              system = system;
+              config.allowUnfree = true;
+            };
+            mpkgs = import inputs.nixpkgs_master {
+              system = system;
+              config.allowUnfree = true;
+            };
+          in
+          {
+            default = devenv.lib.mkShell {
+              inherit inputs pkgs;
+              modules = [
+                {
+                  stdenv = pkgs.clangStdenv;
+                  env.NIX_LD = nixpkgs.lib.fileContents "${pkgs.stdenv.cc}/nix-support/dynamic-linker";
+                  env.NIX_LD_LIBRARY_PATH = nixpkgs.lib.makeLibraryPath (with pkgs; [
+                  # Add needed packages here
+                  pkgs.libz # for numpy
+                  pkgs.stdenv.cc.cc
+                  pkgs.libGL
+                  ]);
+                  # https://devenv.sh/reference/options/
+                  packages = with pkgs; [
+                    poetry
+                    python311
+                    ruff
+                    pyright
+                  ];
+                  enterShell = ''
+                    export LD_LIBRARY_PATH=$NIX_LD_LIBRARY_PATH
+                    export PYTHON_KEYRING_BACKEND=keyring.backends.fail.Keyring
+                    if [ ! -d ".venv" ]; then
+                       poetry install -vv --with dev --no-root
+                    fi
+                    source .venv/bin/activate
+                  '';
+                }
+              ];
+            };
+          });
+    };
diff --git a/libs/jump_babel/.envrc b/libs/jump_babel/.envrc
@@ -1 +1 @@
-use flake . --impure
+use flake . --impure --accept-flake-config
diff --git a/libs/jump_portrait/README.md b/libs/jump_portrait/README.md
@@ -21,7 +21,6 @@ download_item_images(item_name, channels, corrections=corrections, controls=cont
 Fetch one image for a given item and a control
 ```python
 from jump_portrait.fetch import get_jump_image, get_sample
-from jump_portrait.save import download_item_images
 
 sample = get_sample()
 
@@ -33,11 +32,10 @@ img = get_jump_image(source, batch, plate, well, channel, site, correction)
 ```
 
 
-Workflow 3: Fetch bright field channel
+### Workflow 3: Fetch bright field channel
 Note that this is hacky and may not work for all sources.
 ```python
 from jump_portrait.fetch import get_jump_image, get_sample
-from jump_portrait.save import download_item_images
 
 sample = get_sample()
 
@@ -57,9 +55,14 @@ from jump_portrait.fetch import get_item_location_info
 gene = "MYT1"
 
 location_df = get_item_location_info(gene)
+
+```
+
 Returns a polars dataframe whose columns contain the metadata 
 alongside path and file locations
 
+``` python
+
 #┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
 #│ Metadata_ ┆ Metadata_ ┆ Metadata_ ┆ Metadata_ ┆ … ┆ PathName_ ┆ Metadata_ ┆ Metadata_ ┆ standard │
 #│ Source    ┆ Batch     ┆ Plate     ┆ Well      ┆   ┆ OrigRNA   ┆ PlateType ┆ JCP2022   ┆ _key     │
@@ -90,6 +93,7 @@ alongside path and file locations
 ```
 
 The columns of these dataframes are:
+
 ```
 Metadata_[Source/Batch/Plate/Well/Site]:
  - Source: Source in the range 0-14.
@@ -110,4 +114,46 @@ standard_key: Gene or compound queried
 
 ```
 
-We can then feed this information to `jump_portrait.fetch.get_jump_image` to fetch the available images.
+We can then feed this information to `jump_portrait.fetch.get_jump_image` to fetch the available images as in workflow 2.
+
+Or we can feed this information straight to `jump_portrait.fetch.get_jump_image_batch` to fetch the available images in batches with desired channel and sites.
+
+```python
+from jump_portrait.fetch import get_jump_image_batch
+sub_location_df = location_df.select(["Metadata_Source", "Metadata_Batch", "Metadata_Plate", "Metadata_Well"]).unique()
+channel = ["DNA", "AGP", "Mito", "ER", "RNA"] # example
+site = [str(i) for i in range(10)] # every site from 0 to 9 (as this is a CRISPR plate) 
+correction = "Orig" # or "Illum"
+verbose = False # whether to have tqdm loading bar
+
+iterable, img_list = get_jump_image_batch(sub_location_df, channel, site, correction, verbose)
+```
+
+Returns: 
+- iterable (list of tuple) > list containing the metadata, channel, site and correction
+- img_list (list of array) > list containing the images. NB, if no image has been retrieved for a specific site (this might happen), array object is replaced by a None
+
+From there, current processing will include:
+1. Filter out images where no image has been retrieved (remove None values) 
+2. Stack images along a channel axis
+
+```python
+# first, filter out img / param where no img has been retrieved
+mask = [x is not None for x in img_list]
+iterable_filt = [param for i, param in enumerate(iterable) if mask[i]]
+img_list_filt = [param for i, param in enumerate(img_list) if mask[i]]
+```
+
+``` python
+# second, group image per source, batch, well, site > to stack on channel
+from itertools import groupby, starmap
+import numpy as np
+zip_iter_img = sorted(zip(iterable_filt, img_list_filt),
+                      key=lambda x: (x[0][0], x[0][1], x[0][2], x[0][3], x[0][5], x[0][4]))
+iterable_stack, img_stack = map(lambda tup: list(tup),
+        zip(*starmap(
+            lambda key, param_img: (key, np.stack(list(map(lambda x: x[1], param_img)))),
+            # grouped image are returned as the common key, and then the zip of param and img, so we retrieve the img then we stack
+            groupby(zip_iter_img,
+                    key=lambda x: (x[0][0], x[0][1], x[0][2], x[0][3], x[0][5])))))
+```
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		use flake . --impure
		use flake . --impure --accept-flake-config