reactjsnode.jswebrtcwhatsapp-cloud-api

Trying to implement Whatsapp Calling API using WebRTC


Following this documentation https://developers.facebook.com/docs/whatsapp/cloud-api/calling/user-initiated-calls

When user initiates call, i'm receiving following webhook event

  "object": "whatsapp_business_account",
  "entry": [
    {
      "id": "114255454940862",
      "changes": [
        {
          "value": {
            "messaging_product": "whatsapp",
            "metadata": {
              "display_phone_number": "91912995****",
              "phone_number_id": "115017831530658"
            },
            "contacts": [
              {
                "profile": {
                  "name": "Rahul"
                },
                "wa_id": "91832753****"
              }
            ],
            "calls": [
              {
                "id": "wacid.HBgMOTE4MDk3NTM0Njg0FQIAEhggMURENUU5MDg2asdfNjk3OEUzMDM5MzEyRTccGAw5MTkxNjk5NTk5NTkVAgAVAgA=",
                "from": "91809753****",
                "to": "91916995****",
                "event": "connect",
                "timestamp": "1753115233",
                "direction": "USER_INITIATED",
                "session": {
                  "sdp": "v=0\r\no=- 1753115233717 2 IN IP4 127.0.0.1\r\ns=-\r\nt=0 0\r\na=group:BUNDLE audio\r\na=msid-semantic: WMS 7dc1f8d4-d804-48bf-af97-8c6fe9973828\r\na=ice-lite\r\nm=audio 3484 UDP/TLS/RTP/SAVPF 111 126\r\nc=IN IP4 57.144.125.35\r\na=rtcp:9 IN IP4 0.0.0.0\r\na=candidate:2776204705 1 udp 2122260223 57.144.125.35 3484 typ host generation 0 network-cost 50\r\na=candidate:282356334 1 udp 2122262783 2a03:2880:f33e:120:face:b00c:0:699c 3484 typ host generation 0 network-cost 50\r\na=ice-ufrag:MkBWnyZXw+wMTKqA\r\na=ice-pwd:X29zIx+RGAlhVLF1/lo0oQ==\r\na=fingerprint:sha-256 D8:C5:19:9F:8C:AD:AA:21:24:7A:71:6B:4d:6D:54:BC:45:75:FA:AA:61:F8:13:E6:F5:7E:D9:14:1F:59:6D:5E\r\na=setup:actpass\r\na=mid:audio\r\na=sendrecv\r\na=msid:7dc1f8d4-d804-48bf-af97-8c6fe9973828 WhatsAppTrack1\r\na=rtcp-mux\r\na=rtpmap:111 opus/48000/2\r\na=rtcp-fb:111 transport-cc\r\na=fmtp:111 maxaveragebitrate=20000;maxplaybackrate=16000;minptime=20;sprop-maxcapturerate=16000;useinbandfec=1\r\na=rtpmap:126 telephone-event/8000\r\na=maxptime:20\r\na=ptime:20\r\na=ssrc:3435760938 cname:WhatsAppAudioStream1\r\n",
                  "sdp_type": "offer"
                }
              }
            ]
          },
          "field": "calls"
        }
      ]
    }
  ]
}

On NodeJS backend i've handled it like this

const handleUserInitiatedCallRequest = async ({ req, res }) => {
  try {
    // Send immediate 200 response for the webhook
    res.sendStatus(200); // Acknowledge receipt to WhatsApp

    // Extract and validate webhook data
    const change = req.body.entry?.[0]?.changes?.[0] || {};
    const call = change?.value?.calls?.[0] || {};
    const callId = call?.id;
    const receivedSdp = call.session?.sdp;
    const WHATSAPP_BUSINESS_ACCOUNT_ID =
      change.value?.metadata?.phone_number_id;

    if (!callId || !receivedSdp || !WHATSAPP_BUSINESS_ACCOUNT_ID) {
      console.error("Missing required call data:", {
        callId,
        receivedSdp,
        WHATSAPP_BUSINESS_ACCOUNT_ID,
      });
      return;
    }

    // Retrieve tenant ID
    let tenantRes = await getTenantIdFromUserMetaData(
      "whatsapp_id",
      WHATSAPP_BUSINESS_ACCOUNT_ID
    );
    console.log("Tenant response:", tenantRes);
    const tenant_id = tenantRes?.tenant_id;

    if (!tenant_id) {
      console.error("Failed to retrieve tenant ID");
      return;
    }

    // Generate a minimal answer SDP for pre_accept (placeholder)
    const preAcceptSdp = receivedSdp
      .replace("setup:actpass", "setup:passive")
      .replace("a=sendrecv", "a=recvonly"); // Indicate receiving only for pre_accept
    // Adjust DTLS mode for answer

    // Pre-accept the call with WhatsApp API using a placeholder answer SDP
    const whatsappApiEndpoint = `/${WHATSAPP_BUSINESS_ACCOUNT_ID}/calls`;
    const preAcceptPayload = {
      messaging_product: "whatsapp",
      call_id: callId,
      action: "pre_accept",
      session: {
        sdp_type: "answer", // Required by API for pre_accept
        sdp: preAcceptSdp, // Modified SDP as a placeholder answer
      },
    };

    const requestObj = {
      url: `https://graph.facebook.com/${
        process.env.META_GRAPH_API_VERSION || "v20.0"
      }${whatsappApiEndpoint}`,
      method: "POST",
      data: preAcceptPayload,
      headers: {
        Authorization: `Bearer ${process.env.JWERO_META_TOKEN}`,
      },
    };

    console.log("WhatsApp API Pre-accept Request Payload:", requestObj);

    // Call the WhatsApp API to pre-accept the call
    const { data: preAcceptResponse } = await axios(requestObj);
    console.log("Pre-accept API Response:", preAcceptResponse);

    // Emit the original offer SDP to the frontend via WebSocket
    const io = getIO();
    io.emit(`user_call_request_${tenant_id}`, {
      success: true,
      callId: callId,
      sdp: preAcceptSdp, // Send the original offer SDP to the frontend
    });
  } catch (error) {
    console.error("Error handling call request:", getAxiosError(error));
  }
};

And lastly this is my frontend on ReactJS

import { useEffect, useRef, useState } from "react";
import { useSelector } from "react-redux";
import { io } from "socket.io-client";
import { WEB_SOCKET_URL } from "@main-utils";
import {
  getAxiosError,
  isObjWithValues,
} from "jwero-javascript-utils";
import { useDispatch } from "react-redux";
import { handleSocialMessage } from "@redux-actions/chatsActions";
import { backendApiCall } from "@/utils/backendApiCall";
import { getValueFormIntegrations } from "@/pages/chats/components/inbox/chat_conversation/components/send_messages/components/products_section/helper/getValueFormIntegrations";
import { backendEndpoints } from "@/utils/endpoints";

const MainSocket = () => {
  const socketRef = useRef();
  const dispatch = useDispatch();
  // const navigate = useNavigate();
  const messageQueue = useRef([]);
  const processingRef = useRef(false);

  const tenantId = useSelector((state) => state?.users?.tenant_id);
  const userId = useSelector((state) => state?.users?.id);
  const isAdmin = useSelector((state) => state?.users?.isAdmin);
  const reduxTeamMember = useSelector((state) => state?.users?.all_users);
  const allTasks = useSelector((state) => state?.tasks?.all_tasks);

  const metaIntegration = useSelector(
    (state) => state?.settings?.metaIntegration
  );
  const [callStatus, setCallStatus] = useState("Waiting for call...");
  const localAudioRef = useRef(null);
  const peerConnectionRef = useRef(null);
  const metaIntegrationRef = useRef(null);
  const isCallInProgress = useRef(false); // Track call state to prevent re-entry

  useEffect(() => {
    metaIntegrationRef.current = metaIntegration;
    const storeUniqueValue = tenantId;
    const socketLink = WEB_SOCKET_URL;
    socketRef.current = io(socketLink);

    // Initialize WebRTC peer connection
    const configuration = {
      iceServers: [{ urls: "stun:stun.l.google.com:19302" }],
    };
    peerConnectionRef.current = new RTCPeerConnection(configuration);

    // Handle ICE candidates
    peerConnectionRef.current.onicecandidate = (event) => {
      if (event.candidate) {
        console.log("New ICE candidate:", event.candidate);
        // Optionally send ICE candidate to backend via WebSocket
      }
    };

    // Handle incoming audio stream
    peerConnectionRef.current.ontrack = (event) => {
      console.log("Received track:", event); // Log to verify track receipt
      if (event.streams[0]) {
        console.log("Attaching stream to audio element:", event.streams[0]);
        localAudioRef.current.srcObject = event.streams[0]; // Assign stream to audio element
        setCallStatus("Call connected");
      }
    };

    if (socketRef.current && !socketRef.current?.connected) {
      socketRef.current.on("connect", () => {
        console.log("CONNECTED SUCCESFULLY TO SOCKET...", socketLink);

        // Messages
        socketRef.current.on(`visitor_${storeUniqueValue}`, (message) => {
          console.log(message);
          if (
            isObjWithValues(message) &&
            (message.payload || message?.mainPayload)
          ) {
            dispatch(
              handleSocialMessage({
                payload: message?.mainPayload,
                payloadObject: message,
              })
            );
          }
        });
        socketRef.current.on(
          `user_call_request_${storeUniqueValue}`,
          async (message) => {
            console.log("Received call request:", message);
            if (isCallInProgress.current) {
              console.warn("Call already in progress, ignoring new offer");
              return;
            }
            isCallInProgress.current = true;
            setCallStatus("Receiving call...");

            try {
              // Validate message
              if (!message?.sdp || !message?.callId) {
                throw new Error("Invalid message: Missing SDP or callId");
              }

              // Reset connection if not in stable state
              if (peerConnectionRef.current.signalingState !== "stable") {
                console.warn("Connection not in stable state, resetting...");
                peerConnectionRef.current.close();
                peerConnectionRef.current = new RTCPeerConnection(
                  configuration
                );
                peerConnectionRef.current.ontrack = (event) => {
                  if (event.streams[0]) {
                    localAudioRef.current.srcObject = event.streams[0];
                    setCallStatus("Call connected");
                  }
                };
                peerConnectionRef.current.onicecandidate = (event) => {
                  if (event.candidate) {
                    console.log("New ICE candidate:", event.candidate);
                  }
                };
                peerConnectionRef.current.onsignalingstatechange = () => {
                  console.log(
                    "Signaling state:",
                    peerConnectionRef.current.signalingState
                  );
                };
              }

              // Create RTCSessionDescription for the offer
              const remoteOffer = new RTCSessionDescription({
                type: "offer",
                sdp: message.sdp,
              });

              // Set remote description
              console.log("Setting remote description...");
              await peerConnectionRef.current.setRemoteDescription(remoteOffer);
              console.log(
                "Signaling state after setRemoteDescription:",
                peerConnectionRef.current.signalingState
              );

              // Verify state
              if (
                peerConnectionRef.current.signalingState !== "have-remote-offer"
              ) {
                throw new Error(
                  `Unexpected signaling state: ${peerConnectionRef.current.signalingState}`
                );
              }

              // Create and set answer
              console.log("Creating answer...");
              const answer = await peerConnectionRef.current.createAnswer();
              console.log("Setting local description (answer)...");
              await peerConnectionRef.current.setLocalDescription(answer);

              // Get WhatsApp phone ID
              const selected_whatsapp_phone_id = getValueFormIntegrations({
                integrations: metaIntegrationRef.current,
                key: "selected_whatsapp_phone_id",
                platform: "whatsapp",
              });
              console.log(
                "Selected WhatsApp phone ID:",
                selected_whatsapp_phone_id
              );

              // Send answer SDP to WhatsApp Cloud API
              const response = await backendApiCall({
                endpoint: `${backendEndpoints.social}/whatsapp`,
                method: "POST",
                params: {
                  endpoint: `/${selected_whatsapp_phone_id}/calls`,
                },
                data: {
                  messaging_product: "whatsapp",
                  call_id: message.callId,
                  action: "accept",
                  session: {
                    sdp_type: "answer",
                    sdp: peerConnectionRef.current.localDescription.sdp,
                  },
                },
              });

              console.log("WhatsApp API response:", response);
              setCallStatus("Call answered");
            } catch (error) {
              console.error("Error handling offer:", getAxiosError(error));
              setCallStatus("Error in call setup");
            } finally {
              isCallInProgress.current = false; // Reset call state
            }
          }
        );

      });

    }

    return () => {
      socketRef.current?.off(`connect`);
      socketRef.current?.off(`new_update_`);
      socketRef.current?.off(`visitor_${storeUniqueValue}`);

      peerConnectionRef.current.close();
      if (localAudioRef.current) {
        localAudioRef.current.srcObject = null; // Clear audio stream
      }

      // socketRef.current?.off(`call_start_${storeUniqueValue}`);
      // socketRef.current?.off(`call_accepted_${storeUniqueValue}`);
      // socketRef.current?.off(`call_ended_${storeUniqueValue}`);
    };
  }, [dispatch, tenantId, metaIntegration]);

  return (
    <div className="min-h-screen bg-gray-100 flex items-center justify-center">
      <div className="bg-white p-6 rounded-lg shadow-lg">
        <h1 className="text-2xl font-bold mb-4">WhatsApp WebRTC Call</h1>
        <p className="mb-4">Status: {callStatus}</p>
        {/* Audio element to play the user's audio */}
        <audio
          ref={localAudioRef}
          autoPlay
          playsInline
          className="w-full"
        ></audio>
      </div>
    </div>
  );
};

export default MainSocket;

After this i'm not getting any audio and also the call is getting auto terminate after 20 seconds, i've also attached the screenshot of logs on frontend

enter image description here


Solution

  • I faced the exact same issue recently. The core problem here is timing when a WhatsApp call reaches the connect state, it expects media flow to start immediately after the accept call. If your browser isn’t fully prepared to send audio, WhatsApp assumes the call has failed and terminates it within ~20 seconds.

    In your case, it looks like you're creating and sending the SDP answer to WhatsApp too early before adding the browser’s media tracks (like microphone input). This means WhatsApp receives a valid answer but no audio actually flows, resulting in an auto-disconnect.

    Make sure the browser:

    1. Receives the WhatsApp SDP offer.

    2. Sets it as remote description.

    3. Adds microphone audio track via addTrack().

    4. Only then creates the answer and sets the local description.

    5. Sends that SDP answer to your backend → which then passes it to WhatsApp via the accept API.

    I've created a working example using Node.js backend and a browser-based HTML frontend to demonstrate this exact flow: https://github.com/arslan1317/whatsapp-calling

    I’ve also explained the architecture, WebRTC internals, and troubleshooting tips in this Medium article: How to Integrate WhatsApp Calling API in Your Web App Using WebRTC

    Hope this clears things up and helps you get it working!