Skip to content

Commit

Permalink
Merge pull request #7 from broadinstitute/STPD-69-Run-Instanseg-on-Terra
Browse files Browse the repository at this point in the history
Stpd 69 run instanseg on terra
  • Loading branch information
jaspreetishar authored Jan 10, 2025
2 parents 7abe2e6 + 3948876 commit aafb880
Show file tree
Hide file tree
Showing 10 changed files with 308 additions and 83 deletions.
19 changes: 11 additions & 8 deletions common_python_scripts/create_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import imagecodecs
from scipy.ndimage import gaussian_filter

def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_transcripts_file, technology, tiles_dimension, overlap, amount_of_VMs, transcript_plot_as_channel, sigma, trim_amount=50):
def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_transcripts_file, technology, tiles_dimension, overlap, amount_of_VMs, transcript_plot_as_channel, sigma, algorithm, trim_amount=50):

channel_images = []
mean_intensity_of_channels = {}
Expand Down Expand Up @@ -132,7 +132,6 @@ def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_tr
])

np.savetxt('subset_transformation_matrix.csv', transformation_matrix_subset, delimiter=' ', fmt='%d')

for image_index, image_path in enumerate(image_paths_list):

with tiff.TiffFile(image_path, is_ome=False) as image_file:
Expand All @@ -148,7 +147,7 @@ def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_tr

mean_intensity_of_channels_df = pd.DataFrame(mean_intensity_of_channels, index=[0])
mean_intensity_of_channels_df.to_csv('mean_intensity_of_channels.csv', index=False)

if transcript_plot_as_channel == 1:
array_x = trx_subset[x_col].values
array_y = trx_subset[y_col].values
Expand All @@ -172,14 +171,16 @@ def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_tr

subset_multi_channel_image = np.stack(channel_images, axis=0)

listed_intervals = tile_intervals.tile_intervals(subset_multi_channel_image, tiles_dimension, overlap, amount_of_VMs, trim_amount)
num_VMs_in_use = listed_intervals['number_of_VMs'][0][0]
out_path=os.getcwd()
if algorithm != 'INSTANSEG':

for shard_index in range(num_VMs_in_use):
listed_intervals = tile_intervals.tile_intervals(subset_multi_channel_image, tiles_dimension, overlap, amount_of_VMs, trim_amount)
num_VMs_in_use = listed_intervals['number_of_VMs'][0][0]
out_path=os.getcwd()

tiling_script(subset_multi_channel_image, listed_intervals, shard_index, out_path)
for shard_index in range(num_VMs_in_use):

tiling_script(subset_multi_channel_image, listed_intervals, shard_index, out_path)

if __name__ == '__main__':

parser = argparse.ArgumentParser(description='composite_image_creation')
Expand All @@ -193,6 +194,7 @@ def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_tr
parser.add_argument('--amount_of_VMs', type=float)
parser.add_argument('--transcript_plot_as_channel', type=int)
parser.add_argument('--sigma', type=int)
parser.add_argument('--algorithm', type=str)
parser.add_argument('--trim_amount', type=int)
args = parser.parse_args()

Expand All @@ -206,4 +208,5 @@ def main(image_paths_list, subset_data_y_x_interval, transform_file, detected_tr
amount_of_VMs = args.amount_of_VMs,
transcript_plot_as_channel = args.transcript_plot_as_channel,
sigma = args.sigma,
algorithm = args.algorithm,
trim_amount = args.trim_amount)
106 changes: 106 additions & 0 deletions common_python_scripts/run_instanseg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import imagecodecs
import tifffile
from instanseg import InstanSeg
import numpy as np
from aicsimageio import AICSImage
import torch
import os
import bioio
from instanseg.utils.utils import labels_to_features
import fastremap
from skimage import io
from pathlib import Path
import json
from shapely.geometry import Polygon
import geopandas as gpd
import glob
import argparse

def main(image_paths_list, image_pixel_size):

def patched_save_output(self, image_path: str, labels: torch.Tensor, image_array=None, save_overlay=False, save_geojson=False):
if isinstance(image_path, str):
image_path = Path(image_path)
if isinstance(labels, torch.Tensor):
labels = labels.cpu().detach().numpy()

new_stem = image_path.stem + self.prediction_tag

if self.verbose:
out_path = Path(image_path).parent / (new_stem + ".tiff")
print(f"Saving output to {out_path}")
io.imsave(out_path, labels.squeeze().astype(np.int32), check_contrast=False)

if save_geojson:
if labels.ndim == 3:
labels = labels[None]

output_dimension = labels.shape[1]
if output_dimension == 1:
features = labels_to_features(labels[0,0], object_type="detection")
elif output_dimension == 2:
features = (labels_to_features(labels[0,0], object_type="detection", classification="Nuclei") +
labels_to_features(labels[0,1], object_type="detection", classification="Cells"))

geojson = json.dumps(features)
geojson_path = Path(image_path).parent / (new_stem + ".geojson")

print("Saving geojson...")
with open(geojson_path, "w") as outfile:
outfile.write(geojson)

if save_overlay:
assert image_array is not None, "Image array must be provided to save overlay."
if self.verbose:
out_path = Path(image_path).parent / (new_stem + "_overlay.tiff")
print(f"Saving overlay to {out_path}")
display = self.display(image_array, labels)
io.imsave(out_path, display, check_contrast=False)

InstanSeg.save_output = patched_save_output

image_paths_list = image_paths_list.split(',')
image = tifffile.imread(image_paths_list[0])

instanseg_brightfield = InstanSeg("fluorescence_nuclei_and_cells", image_reader="bioio", verbosity=1)
instanseg_brightfield.medium_image_threshold = image.shape[1] * image.shape[2] * 10

labeled_output = instanseg_brightfield.eval(
image=image_paths_list[0],
save_output=True,
save_overlay=True,
save_geojson=True,
pixel_size=image_pixel_size,
target = "cells"
)

directory_path = os.getcwd()
geojson_files = []

for root, dirs, files in os.walk(directory_path):
for file in files:
if file.endswith('.geojson'):
geojson_files.append(os.path.join(root, file))

with open(geojson_files[0]) as file:
data = json.load(file)

polygons = []

for entry in data:
coordinates_list = entry['geometry']['coordinates'][0]
polygons.append(Polygon(coordinates_list))

polygons_geo_df = gpd.GeoDataFrame(geometry=polygons)

polygons_geo_df.to_parquet("cell_polygons.parquet")

if __name__ == '__main__':

parser = argparse.ArgumentParser(description='instanseg_implementation')
parser.add_argument('--image_paths_list')
parser.add_argument('--image_pixel_size', type=float)
args = parser.parse_args()

main(image_paths_list = args.image_paths_list,
image_pixel_size = args.image_pixel_size)
40 changes: 40 additions & 0 deletions docker/common_for_mac_and_linux/instanseg/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Base image with Python 3.10 for compatibility
FROM python:3.10

# Install system dependencies for geospatial and image processing libraries
RUN apt-get update && \
apt-get install -y --no-install-recommends \
gdal-bin \
libgdal-dev \
libspatialindex-dev \
build-essential \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxrender1 \
libxext6 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Install Python dependencies
RUN pip install --no-cache-dir \
numpy==1.26.4 \
torch==2.5.1 \
tifffile==2024.9.20 \
imagecodecs==2024.9.22 \
aicsimageio==4.11.0 \
instanseg-torch[full]==0.0.5 \
fastremap==1.15.0 \
scikit-image==0.22.0 \
shapely==2.0.6 \
geopandas==1.0.1 \
bioio==1.1.0 \
bioio-ome-tiff \
bioio-ome-zarr \
pyarrow==16.1.0 \
fastparquet==2024.5.0

COPY common_python_scripts/run_instanseg.py /opt/run_instanseg.py
RUN chmod +x /opt/run_instanseg.py

ENTRYPOINT ["/bin/bash", "-l", "-c", "/bin/bash"]
Loading

0 comments on commit aafb880

Please sign in to comment.