Create JSON from given data in Swift

I want to execute POST URLRequest but first I need to create request body. The body should look like this:

{
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": <mutableStringValue>
        },
        {
          "type": "image_url",
          "image_url": {
            "url": <mutableStringValue>
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

I cannot hardcode this as .json file because parameters "text" (under first "type") and "image_url" (under second "type") are given as function parameters. I don't know how to handle two "type" keys.

As of now, I came up with something like this:

struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
        
        struct Message: Codable {
            let role: String
            let content: [Content]
        }
        struct Content: Codable {
            let type: String
            let text: String?
        }
    }

But here I'm missing second "type" and "image_url" content

Solution

You could try something simple like this:

struct Content: Identifiable, Codable {
    let id = UUID()
    let type: String
    var text: String?
    var imageUrl: ImgUrl?
    
    enum CodingKeys: String, CodingKey {
        case type, text
        case imageUrl = "image_url"
    }

    // where value is the text or the url string
    // depending on the type given (which could be an enum)
    init(type: String, value: String? = nil) {
        self.type = type
        self.text = nil
        self.imageUrl = nil
    
        if type == "text" {
            self.text = value
        } else {
            if value != nil {
                self.imageUrl = ImgUrl(url: value!)
            }
        }
    }
    
    public func encode(to encoder: Encoder) throws {
        var container = encoder.container(keyedBy: CodingKeys.self)
        try container.encode(type, forKey: .type)
        if type == "text" {
            try container.encode(text, forKey: .text)
        } else {
            try container.encode(imageUrl, forKey: .imageUrl)
        }
    }
}

struct ImageInputRequestBody: Codable {
    let model: String = "gpt-4-vision-preview"
    let messages: [Message]
    let maxTokens: Int = 300
    
    enum CodingKeys: String, CodingKey {
        case model, messages
        case maxTokens = "max_tokens"
    }
}

struct Message: Codable {
    let role: String
    let content: [Content]
}

struct ImgUrl: Codable {
    let url: String
    // let detail: String?  // <--- if desired later
}

Note, you need to add two (or more for multi-images) Content to the Message object.

For example:

func getTestRequestBody(text: String? = nil, url: String? = nil) -> ImageInputRequestBody {
    return ImageInputRequestBody(messages: [
        Message(role: "user", content: [
            Content(type: "text", value: text),
            Content(type: "image_url", value: url)
        ])
    ])
}

EDIT-1

Use it like this to post to OpenAI

     let text = "What's in this image?"
     let imgurl =  "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
     // ....
     .task {
         await fetch(txt: text, urlString: imgurl)
     }
     //....
     
     private func fetch(txt: String? = nil, urlString: String? = nil) async {
         let apikey = "YOUR-APIKEY"
         
         if let url = URL(string: "https://api.openai.com/v1/chat/completions") {
             var request = URLRequest(url: url)
             request.addValue("application/json", forHTTPHeaderField: "Content-Type")
             request.addValue("Bearer \(apikey)", forHTTPHeaderField: "Authorization")
             request.httpMethod = "POST"

             let body = ImageInputRequestBody(messages: [   // <--- here
                 Message(role: "user", content: [
                     Content(type: "text", value: txt),
                     Content(type: "image_url", value: urlString)
                 ])
             ])
             
             do {
                 let encoded = try JSONEncoder().encode(body)
                 request.httpBody = encoded
                 // check the encoding
 //                if let encodedString = String(data: encoded, encoding: .utf8) {
 //                    print("\n----> encodedString: \n \(encodedString) \n")
 //                }
                 let (data, _) = try await URLSession.shared.data(for: request)
                 print("-----> \n \(String(data: data, encoding: .utf8) as AnyObject) \n")
                 
                 // let decoded = try JSONDecoder().decode(OpenAIResponse.self, from: data)
                 // .....
             }
             catch { print(error) }
         }
     }