pythonpytorchwandb

how to run multiple wandb run for hyperparameter tunning in for loop


I'm doing hyperparameter tuning. My code look like this:

combinations = list(itertools.product(*self.grid_model_configs.values()))

        for combination in combinations:
            param_names = self.grid_model_configs.keys()
            model_config = {key: value for key, value in zip(param_names, combination)}

            wandb.login()
            run = wandb.init(
                name=repr(model_config).replace("'", "").replace('{', '').replace('}', ''),
                project='D2T',
                config={
                    'training_config': self.training_config,
                    'model_config': model_config
                }
            )

            filtered_param = {k: v for k, v in model_config.items() if k in
                              [p.name for p in inspect.signature(PointerGenerator).parameters.values()]}

            pointernet = PointerGenerator(device=self.device, **filtered_param).to(self.device)

            trainer = Trainer(training_arguments=self.training_config,
                              model=pointernet,
                              criterion=Criterion(),
                              tokenizer=self.tokenizer,
                              wandb=run)
            trainer.fit(train_dataloader, dev_dataloader)

But In wandb, it only shows one chart for multiple combination.

It worked in a Jupyter notebook, but when I run by command line it doesn't work anymore.


Solution

  • why not give Sweeps a try: https://docs.wandb.ai/guides/sweeps

    Your code would look something like this:

    import wandb
    
    # Define sweep configuration
    sweep_config = {
        'method': 'grid',  # or 'random' or 'bayes'
        'parameters': {
            'learning_rate': {
                'values': [0.001, 0.01, 0.1]  # replace with actual hyperparameters
            },
            'batch_size': {
                'values': [16, 32, 64]
            },
            'hidden_size': {
                'values': [128, 256, 512]
            },
            # Add all other hyperparameters explicitly
        }
    }
    
    # Initialize sweep
    sweep_id = wandb.sweep(sweep_config, project='D2T')
    
    # Define training function
    def train(config=None):
        with wandb.init(config=config):
            config = wandb.config
    
            # Explicitly create model configuration
            model_config = {
                'learning_rate': config.learning_rate,
                'batch_size': config.batch_size,
                'hidden_size': config.hidden_size,
                # Add all other hyperparameters explicitly
            }
    
            # Initialize the model with explicit parameters
            pointernet = PointerGenerator(
                device=self.device,
                learning_rate=model_config['learning_rate'],
                hidden_size=model_config['hidden_size'],
                # Include all necessary model parameters
            ).to(self.device)
    
            trainer = Trainer(
                training_arguments=self.training_config,
                model=pointernet,
                criterion=Criterion(),
                tokenizer=self.tokenizer,
                wandb=wandb.run
            )
    
            trainer.fit(train_dataloader, dev_dataloader)
    
    # Execute the sweep agent
    wandb.agent(sweep_id, function=train, count=NUMBER_OF_SWEEP_RUNS)