c++uriurl-parsing

Split URL into Host, Port and Resource - C++


I need to split the URL into host, port and resource. I searched a lot of references but couldn't find anything that could help me. This is how I want:

eg: url is - 1.2.3.4:5678/path1/path2.html Necessary output is: Host - 1.2.3.4, Port - 5678, Resource - /path1/path2.html

This is how I tired:

#include <iostream>
 #include <cstddef>
 #include <string>
 using namespace std;

int main()
{
   string url="http://qwert.mjgug.ouhnbg:5678/path1/path2.html";
   size_t found = url.find_first_of("://");
   cout<<found<<endl;
   string protocol=url.substr(0,found);
   size_t found1 =url.find_first_of(":");
   cout<<found1<<endl;
   string host =url.substr(found+3,found1-found+1);
   size_t found2 = url.find_first_of(":/");
   string port1 =url.substr(found1+7,found2+found1-1);
   string port =url.substr(found2+1);
   cout<<protocol<<endl;
   cout<<host<<endl;
   cout<<port1<<endl;
   cout<<port;
   return 0;
}

My expected result is:

Protocol - http
Host - qwert.mjgug.ouhnbg
Port - 5678
Resource - path1/path2.html

But my result is:

http:                                                                                                                                                  
qwert.mj                                                                                                                                               
t.mjgug                                                                                                                                                
//qwert.mjgug.ouhnbg:5678/path1/path2.html

What should I change?


Solution

  • Use string.first_find_of(":") to get the index of first occurrence of any char and use string.substr(pos,len) to get the substring starting at index pos and length=len;

     #include <iostream>
     #include <cstddef>
     #include <string>
     using namespace std;
    
    int main()
    {
       string url="1.2.3.4:5678/path1/path2.html";
       size_t found = url.find_first_of(":");
       string host=url.substr(0,found);
       size_t found1 =url.find_first_of("/");
       string port =url.substr(found+1,found1-found-1);
       string resource =url.substr(found1);
       cout<<host<<endl;
       cout<<port<<endl;
       cout<<resource;
       return 0;
    }
    

    With http or https in url

    int main()
    {
      string url="http://qwert.mjgug.ouhnbg:5678/path1/path2.html";
      size_t found = url.find_first_of(":");
      string protocol=url.substr(0,found); 
    
     string url_new=url.substr(found+3); //url_new is the url excluding the http part
     size_t found1 =url_new.find_first_of(":");
     string host =url_new.substr(0,found1);
    
     size_t found2 = url_new.find_first_of("/");
     string port =url_new.substr(found1+1,found2-found1-1);
     string path =url_new.substr(found2);
    
      cout<<protocol<<endl;
     cout<<host<<endl;
     cout<<port<<endl;
     cout<<path;
     return 0;
     }