import io import boto3 import requests import numpy as np import polars as pl from PIL import Image from botocore.config import Config import logging logger = logging.getLogger(__name__) # S3 for sample images my_config = Config( region_name='us-east-1' ) s3_client = boto3.client('s3', config=my_config) # Set basepath for EOL pages for info EOL_URL = "https://eol.org/pages/" RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"] def get_sample(df, pred_taxon, rank): ''' Function to retrieve a sample image of the predicted taxon and EOL page link for more info. Parameters: ----------- df : DataFrame DataFrame with all sample images listed and their filepaths (in "file_path" column). pred_taxon : str Predicted taxon of the uploaded image. rank : int Index of rank in RANKS chosen for prediction. Returns: -------- img : PIL.Image Sample image of predicted taxon for display. eol_page : str URL to EOL page for the taxon (may be a lower rank, e.g., species sample). ''' logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}") try: filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank) except Exception as e: logger.error(f"Error retrieving sample data: {e}") return None, f"We encountered the following error trying to retrieve a sample image: {e}." if filepath is None: logger.warning(f"No sample image found for taxon: {pred_taxon}") return None, f"Sorry, our EOL images do not include {pred_taxon}." # Get sample image of selected individual try: img_src = s3_client.generate_presigned_url('get_object', Params={'Bucket': 'treeoflife-10m-sample-images', 'Key': filepath} ) img_resp = requests.get(img_src) img = Image.open(io.BytesIO(img_resp.content)) full_eol_url = EOL_URL + eol_page_id if is_exact: eol_page = f"

Check out the EOL entry for {pred_taxon} to learn more: {full_eol_url}.

" else: eol_page = f"

Check out an example EOL entry within {pred_taxon} to learn more: {full_name} {full_eol_url}.

" logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}") return img, eol_page except Exception as e: logger.error(f"Error retrieving sample image: {e}") return None, f"We encountered the following error trying to retrieve a sample image: {e}." def get_sample_data(df, pred_taxon, rank): ''' Function to randomly select a sample individual of the given taxon and provide associated native location. Parameters: ----------- df : DataFrame DataFrame with all sample images listed and their filepaths (in "file_path" column). pred_taxon : str Predicted taxon of the uploaded image. rank : int Index of rank in RANKS chosen for prediction. Returns: -------- filepath : str Filepath of selected sample image for predicted taxon. eol_page_id : str EOL page ID associated with predicted taxon for more information. full_name : str Full taxonomic name of the selected sample. is_exact : bool Flag indicating if the match is exact (i.e., with empty lower ranks). ''' for idx in range(rank + 1): taxon = RANKS[idx] target_taxon = pred_taxon.split(" ")[idx] df = df.filter(pl.col(taxon) == target_taxon) if df.shape[0] == 0: return None, np.nan, "", False # First, try to find entries with empty lower ranks exact_df = df for lower_rank in RANKS[rank + 1:]: exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == "")) if exact_df.shape[0] > 0: df_filtered = exact_df.sample() full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True # If no exact matches, return any entry with the specified rank df_filtered = df.sample() full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0)) return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False