javascriptgoogle-apps-scriptgoogle-drive-apilarge-language-model

Passing image in Google drive to multimodal LLM


I'm trying to upload an image from Google drive to an LLM from analysis. I have the working code below:

function openRouterApiRequest() {
  var apiKey = "****";


  // var imageUrl = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg';
  var imageUrl = "https://drive.google.com/file/d/11178UwHmPb2TAnYCyxFOKXlPh-vSPecv/view?usp=sharing";

  var apiEndpoint = 'https://openrouter.ai/api/v1/chat/completions';
  
  var payload = {
    "model": "meta-llama/llama-4-maverick",
    "messages": [
      {
        "role": "user",
        "content": [
          {
            "type": "text",
            "text": "What is in this image?"
          },
          {
            "type": "image_url",
            "image_url": {
              "url": imageUrl
            }
          }
        ]
      }
    ]
  };
  
  var options = {
    'method': 'post',
    'headers': {
      'Authorization': 'Bearer ' + apiKey,
      'Content-Type': 'application/json'
    },
    'payload': JSON.stringify(payload),
    'muteHttpExceptions': true // To get the response even if the request fails
  };
  
  
  var response = UrlFetchApp.fetch(apiEndpoint, options);
  var responseCode = response.getResponseCode();
  var responseBody = response.getContentText();
  
  // Handle the response as needed
  Logger.log('Response Code: ' + responseCode);
  Logger.log('Response Body: ' + responseBody);
  
  // You might want to parse the responseBody if it's JSON
  try {
    var jsonResponse = JSON.parse(responseBody);
    Logger.log(jsonResponse);
  } catch (e) {
    Logger.log('Failed to parse response as JSON: ' + e.message);
  }
}

If I uncomment:

  var imageUrl = 'https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg';

The output includes :{model=meta-llama/llama-4-maverick, created=1.744144885E9, system_fingerprint=, choices=[{finish_reason=stop, native_finish_reason=stop, index=0.0, logprobs=null, message={content=The image depicts a serene landscape featuring a wooden boardwalk or path that traverses through a lush grassy field. The boardwalk, constructed from weathered wooden planks, is flanked by tall grasses on both sides and appears to be slightly elevated above the surrounding terrain.

If I try the sharable link : https://drive.google.com/file/d/11178UwHmPb2TAnYCyxFOKXlPh-vSPecv/view?usp=sharing,

I get: {"error":{"message":"Provider returned error","code":502,"metadata":{"raw":"error, status code: 500, status: , message: invalid character 'I' looking for beginning of value, body: ","provider_name":"Novita"}},"user_id":"user_2i6MRzMhAMlWPTYRLTP5uRwhSyx"}

It appears that this is not readable by the LLM. Is there any way to make it readable?


Solution

  • As another approach, how about modifying the URL as follows?

    From:

    https://drive.google.com/file/d/11178UwHmPb2TAnYCyxFOKXlPh-vSPecv/view?usp=sharing
    

    To:

    https://drive.google.com/thumbnail?sz=w1000&id=11178UwHmPb2TAnYCyxFOKXlPh-vSPecv