Source code for modlee.recommender.recommender

""" 
Recommender for models.
"""
import json
import requests
import logging

# logging.basicConfig(level=logging.INFO)

import modlee
from modlee.utils import get_model_size, typewriter_print
from modlee.converter import Converter

modlee_converter = Converter()

from datetime import datetime

import lightning.pytorch as pl

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import lr_scheduler

import numpy as np

from time import sleep
import sys

import os
from urllib.parse import urlparse
from modlee import ModleeClient
API_KEY = os.environ.get("MODLEE_API_KEY", 'None')
import modlee

modlee_client = ModleeClient(api_key=API_KEY)
# SERVER_ENDPOINT = modlee_client.endpoint
SERVER_ENDPOINT = modlee_client.origin
#SERVER_ORIGIN = 'http://127.0.0.1:7070'
#SERVER_ENDPOINT = 'http://ec2-3-84-155-233.compute-1.amazonaws.com:7070'
#print(SERVER_ENDPOINT)


[docs]
class Recommender(object):
    """
    Recommender for models conditioned on datasets.
    """

    def __init__(
        self, dataloader=None, origin=SERVER_ENDPOINT, *args, **kwargs
    ) -> None:
        """ 
        Constructor for recommender.
        
        :param dataloader: The dataloader to analyze, defaults to None.
        :param origin: The origin (scheme://hostname:port) for the server, defaults to Modlee's server. 
        """
        self._model = None
        self.modality = None
        self.task = None
        self.metafeatures = None
        self.origin = origin
        if dataloader is not None:
            self.analyze(dataloader)

    def __call__(self, *args, **kwargs):
        """
        Wrapper to analyze
        """
        self.analyze(*args, **kwargs)


[docs]
    def analyze(self, dataloader=None, *args, **kwargs):
        """
        Analyze a dataloader and calculate data metafeatures.

        :param dataloader: The dataloader to analyze. If not given, tries to use the class dataloader.
        """
        if not dataloader:
            dataloader = self.dataloader
        self.dataloader = dataloader
        if not dataloader:
            raise Exception(f'Dataloader not provided and not previously set.')
        self.metafeatures = self.calculate_metafeatures(dataloader)
        if len(self.metafeatures) > 0:
            logging.info("Finished analyzing dataset.")
        else:
            logging.info("Could not analyze dataset.")

        # typewriter_print("[Modlee] Finished analyzing.")
        # self.write_files()

    fit = analyze


[docs]
    def calculate_metafeatures(self, dataloader):
        """
        Calculate metafeatures.

        :param dataloader: The dataloader on which to calculate metafeatures.
        :return: The metafeatures of the data as a dictionary.
        """
        if modlee.data_metafeatures.module_available:
            logging.info("Analyzing dataset based on data metafeatures...")
            # analyze_message = "[Modlee] Just a moment, analyzing your dataset...n"
            # typewriter_print(analyze_message, sleep_time=0.01)

            # ??? Add in type writer print
            # TODO - generalize to a base DataMetafeatures,
            # override this method for modality-specific calclations
            return modlee.data_metafeatures.ImageDataMetafeatures(dataloader, testing=True).stats_rep
            # ??? Convert to ImageDataMetafeatures
        else:
            print("Could not analyze data (check access to server)")
            return {}


    def _get_model_text(self, metafeatures):
        """
        Get the text for a recommended model based on data metafeatures.
        Sends the metafeatures to the server, the server analyzes the metafeatures
        and returns a client-parseable text representation of the model.

        :param metafeatures: The data metafeatures to send to the server.
        :return: The model as text that can be parsed into a trainable object. 
        """
        assert (
            self.modality is not None
        ), "Recommender modality is not set (e.g. image, text)"
        assert (
            self.task is not None
        ), "Recommender task is not set (e.g. classification, segmentation)"
        metafeatures = json.loads(json.dumps(metafeatures))
        # breakpoint()
        # res = requests.get(
        #     f"{self.origin}/model/{self.modality}/{self.task}",
        #     data=json.dumps({"data_features": metafeatures}),
        #     headers={"Content-Type": "application/json"},
        #     verify=False,
        # )
        
        res = modlee_client.get(
            path=f"model/{self.modality}/{self.task}",
            data=json.dumps({"data_features": metafeatures}),
            headers={"Content-Type": "application/json",
                    "User-Agent": "Mozilla/5.0",
                    "Access-Control-Allow-Origin": "*",
                    "Access-Control-Allow-Headers": "*",
                    "Access-Control-Allow-Methods": "*",
                    # "X-API-KEY": API_KEY
                    },
            timeout=20,
        )
        model_text = res.content
        return model_text

    @property
    def model(self):
        """ 
        The cached model.
        """
        if self._model is None:
            logging.info(
                "No model recommendation, call .analyze on a dataloader first."
            )
        return self._model

    @model.setter
    def model(self, model):
        self._model = model


[docs]
    def get_model_details(self):
        """ 
        Get the details of a model with verbose logging.
        """
        # ??? save self.model_onnx_text and self.model_code to local place, point use to them here
        # In case you wanted to take a deeper look I saved the onnx graph summary here:, I also saved and python editable version of the model with train, val, and optimzers. This is a great place to start your own model exploration!

        print("--- Modlee Recommended Model Details --->")

        indent = "        "
        text_indent = "\n            "

        # summary_message = '\n[Modlee] -> In case you want to take a deeper look, I saved the summary of my current model recommendation here:{}file: {}'.format(text_indent+indent,self.model_onnx_text_file)
        summary_message = "\n[Modlee] -> In case you want to take a deeper look, I saved the summary of my current model recommendation here:{}file: {}".format(
            text_indent + indent, "./model.txt"
        )
        typewriter_print(summary_message, sleep_time=0.01)

        # code_message = '\n[Modlee] -> I also saved the model as a python editable version (model def, train, val, optimizer):{}file: {}{}This is a great place to start your own model exploration!'.format(text_indent+indent,self.model_code_file,text_indent)
        code_message = "\n[Modlee] -> I also saved the model as a python editable version (model def, train, val, optimizer):{}file: {}{}This is a great place to start your own model exploration!".format(
            text_indent + indent, "./model.py", text_indent
        )
        typewriter_print(code_message, sleep_time=0.01)


    def _write_files(self):
        """ 
        Write the model text and code to files.
        """
        self.model_onnx_text_file = "./model_summary.txt"
        self.model_code_file = "./model_code.py"
    
        if hasattr(self, "model_text"):
            with open(self.model_onnx_text_file, "w") as file:
                file.write(self.model_text)
        if hasattr(self, "model_code"):
            with open(self.model_code_file, "w") as file:
                file.write(self.model_code)


[docs]
    def write_file(self, file_contents, file_path):
        """ 
        Helper function to write a file.
        
        :param file_contents: The contents to write.
        :param file_path: The path to the file.
        """
        with open(file_path, "w") as _file:
            _file.write(file_contents)



[docs]
    def train(self, max_epochs=1, val_dataloaders=None):
        """ 
        Train the recommended model.
        
        :param max_epochs: The maximum epochs to train for.
        :param val_dataloaders: The validation dataloaders, optional.
        """

        print("----------------------------------------------------------------")
        print("Training your recommended modlee model:")
        print("     - Running this model: {}".format("./model.py"))
        print("     - On the dataloader previously analyzed by the recommender")
        print("----------------------------------------------------------------")

        callbacks = self.model.configure_callbacks()
        if val_dataloaders is not None:
            callbacks.append(
                pl.callbacks.EarlyStopping(
                    monitor="val_loss", patience=10, verbose=True
                )
            )
        with modlee.start_run() as run:
            trainer = pl.Trainer(
                max_epochs=max_epochs, callbacks=callbacks, enable_model_summary=False
            )
            trainer.fit(
                model=self.model,
                train_dataloaders=self.dataloader,
                val_dataloaders=val_dataloaders,
            )
            # if val_dataloaders == None:
            #     trainer.fit(
            #         model=self.model,
            #         train_dataloaders=self.dataloader)
            # else:
            #     trainer.fit(
            #         model=self.model,
            #         train_dataloaders=self.dataloader,
            #         val_dataloaders=val_dataloaders)

            self.run_artifact_uri = urlparse(run.info.artifact_uri).path
            self.run_id = run.info.artifact_uri.split("/")[-2]
            self.exp_id = run.info.artifact_uri.split("/")[-3]
            self.run_folder = self.run_artifact_uri.split("///")[-1].split("artifacts")[
                0
            ]

            # <RunInfo: artifact_uri='file:///Users/brad/Github_Modlee/modlee_survey/notebooks/mlruns/0/e2d08510ac28438681203a930bb713ed/artifacts', end_time=None, experiment_id='0', lifecycle_stage='active', run_id='e2d08510ac28438681203a930bb713ed', run_name='skittish-trout-521', run_uuid='e2d08510ac28438681203a930bb713ed', start_time=1697907858055, status='RUNNING', user_id='brad'>


[docs]
    def train_documentation_locations(self):
        """ 
        Print the location of documented assets.
        """

        vertical_sep = "\n-----------------------------------------------------------------------------------------------\n"
        path_indent = "        Path: "
        indent = "        "
        doc_indent = "                     "

        print(vertical_sep)

        print(
            "Modlee documented all the details about your trained model and experiment here: \n\n{}{}".format(
                path_indent, self.run_folder
            )
        )
        print(
            "{}Experiment_id: automatically assigned to | {}".format(
                indent, self.exp_id
            )
        )
        print("{}Run_id: automatically assigned to | {}".format(indent, self.run_id))

        print(vertical_sep)



[docs]
    def train_documentation_shared(self):
        """ 
        Print the shared experiment assets. 
        """

        vertical_sep = "\n-----------------------------------------------------------------------------------------------\n"
        path_indent = "        Path: "
        indent = "        "
        doc_indent = "                     "

        print(vertical_sep)

        print(
            "Modlee auto-documents your experiment locally and learns from non-sensitive details:\n -> Sharing helps to enhance ML model recommendations across the entire community of modlee users, including you!\n"
        )

        print("Modlee's ML Experiment Documentation Overview: \n")
        print("[ Local ] [ Shared ] Documented Element Description ...")
        print(vertical_sep[2:-2])

        print("[       ] [        ] Dataloader\n")

        print(
            "[   X   ] [        ] Sampling of Dataloader: for your benefit, and in case we have improvements to our data analysis process"
        )
        print(
            "{}{}{}".format(
                doc_indent, path_indent, self.run_artifact_uri + "/model/snapshot*"
            )
        )

        print("[   X   ] [        ] Model Weights")
        print(
            "{}{}{}".format(
                doc_indent, path_indent, self.run_artifact_uri + "/model/data/model.pth"
            )
        )

        print(
            "[   X   ] [   X    ] Dataloader Complexity Analysis: Applying standard statistics (dims, mean, std, var, etc ...) & ML methods (clustering, etc ...) to your dataset"
        )
        print(
            "{}{}{}".format(
                doc_indent, path_indent, self.run_artifact_uri + "/stats_rep"
            )
        )

        print(
            "[   X   ] [   X    ] Modlee Model Code (model def, training step, validation step, optimizers)"
        )
        print(
            "{}{}{}".format(
                doc_indent, path_indent, self.run_artifact_uri + "/model.py"
            )
        )

        print("[   X   ] [   X    ] Experiment Metrics: (loss, accuracy, etc ...)")
        print("{}{}{}".format(doc_indent, path_indent, self.run_folder + "/metrics/"))

        print(vertical_sep)



[docs]
    def get_input_torch(self):
        """ 
        Get an input from the dataloader.

        :return: A tuple of the inputs (tensors) and their sizes.
        """

        # Assuming you have a DataLoader called dataloader
        for batch in self.dataloader:
            # Access the first element in the batch
            one_element = batch
            break  # Exit the loop after processing the first batch

        input_sizes = [
            [1] + list(b.size()[1:])
            for i, b in enumerate(one_element)
            if i in self.dataloader_input_inds
        ]
        input_torches = [torch.rand(ins) for ins in input_sizes]

        return input_torches, input_sizes



[docs]
    def get_code_text(self):
        """ 
        Get the code for a model as text (deprecated?).
        
        :return: The model code as text.
        """
        _get_code_text_for_model = getattr(modlee, "get_code_text_for_model", None)

        if _get_code_text_for_model is not None:
            # ==== METHOD 1 ====
            # Save model as code using parsing
            self.model_code = modlee.get_code_text_for_model(
                self.model, include_header=True
            )
        else:
            self.model_code = modlee_converter.onnx_text2code(self.model_onnx_text)

        try:
            self.model_code = self.model_code.replace("= model", "= " + self.model_str)
        except:
            pass
        self.model_code = self.model_code.replace("self, model,", "self,")

        return self.model_code