web-scrapinghttp-status-code-401unauthorized

401 Unauthorized: web-scraping with requests python library


I'm trying to use an API from a database using the requests library. The code is working, but every now and then i have to go and remake my headers, because of an authorization error 401. Is there a way to reauthenticate or to get the fresh headers by code? Because if i need to go and get new headers every time, the code becomes meaningless.

url: Database.com

input

import requests
import json
from bs4 import BeautifulSoup as bs4
import pandas as pd
url = "https://dw.ceasa.gov.br/saiku/rest/saiku/api/query/execute"

payload = json.dumps({
  "name": "0A777C63-EAF6-181A-9FB1-248AFCE90BE1",
  "queryModel": {
    "axes": {
      "FILTER": {
        "mdx": None,
        "filters": [],
        "sortOrder": None,
        "sortEvaluationLiteral": None,
        "hierarchizeMode": None,
        "location": "FILTER",
        "hierarchies": [],
        "nonEmpty": False,
        "aggregators": []
      },
      "COLUMNS": {
        "mdx": None,
        "filters": [],
        "sortOrder": None,
        "sortEvaluationLiteral": None,
        "hierarchizeMode": None,
        "location": "COLUMNS",
        "hierarchies": [
          {
            "name": "[04-Produto].[04-Produto]",
            "levels": {
              "Produto": {
                "name": "Produto"
              }
            }
          }
        ],
        "nonEmpty": True,
        "aggregators": []
      },
      "ROWS": {
        "mdx": None,
        "filters": [],
        "sortOrder": None,
        "sortEvaluationLiteral": None,
        "hierarchizeMode": None,
        "location": "ROWS",
        "hierarchies": [
          {
            "name": "[06-Ano].[06-Ano]",
            "levels": {
              "Ano": {
                "name": "Ano",
                "aggregators": [],
                "selection": {
                  "type": "EXCLUSION",
                  "members": [
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2015]",
                      "caption": "2015"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2016]",
                      "caption": "2016"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2017]",
                      "caption": "2017"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2018]",
                      "caption": "2018"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2019]",
                      "caption": "2019"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2020]",
                      "caption": "2020"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2021]",
                      "caption": "2021"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2022]",
                      "caption": "2022"
                    },
                    {
                      "uniqueName": "[06-Ano].[06-Ano].[2023]",
                      "caption": "2023"
                    }
                  ]
                }
              }
            }
          },
          {
            "name": "[07-Mes].[07-Mes]",
            "levels": {
              "Mes": {
                "name": "Mes",
                "aggregators": [],
                "selection": {
                  "type": "EXCLUSION",
                  "members": [
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[JANEIRO]",
                      "caption": "JANEIRO"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[FEVEREIRO]",
                      "caption": "FEVEREIRO"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[MARÇO]",
                      "caption": "MARÇO"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[ABRIL]",
                      "caption": "ABRIL"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[MAIO]",
                      "caption": "MAIO"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[JUNHO]",
                      "caption": "JUNHO"
                    },
                    {
                      "uniqueName": "[07-Mes].[07-Mes].[2º SEM].[JULHO]",
                      "caption": "JULHO"
                    }
                  ]
                }
              }
            }
          },
          {
            "name": "[08-Dia].[08-Dia]",
            "levels": {
              "Dia Mes": {
                "name": "Dia Mes"
              }
            }
          }
        ],
        "nonEmpty": True,
        "aggregators": []
      }
    },
    "visualTotals": False,
    "visualTotalsPattern": None,
    "lowestLevelsOnly": False,
    "details": {
      "axis": "COLUMNS",
      "location": "BOTTOM",
      "measures": []
    },
    "calculatedMeasures": []
  },
  "queryType": "OLAP",
  "type": "QUERYMODEL",
  "cube": {
    "uniqueName": "[Prohort].[Prohort].[Prohort].[PROHORT - PRECO DIARIO]",
    "name": "PROHORT - PRECO DIARIO",
    "connection": "Prohort",
    "catalog": "Prohort",
    "schema": "Prohort",
    "caption": None,
    "visible": False
  },
  "mdx": None,
  "parameters": {},
  "plugins": {},
  "properties": {
    "saiku.olap.query.automatic_execution": False,
    "saiku.olap.query.nonempty": True,
    "saiku.olap.query.nonempty.rows": True,
    "saiku.olap.query.nonempty.columns": True,
    "saiku.ui.render.mode": "table",
    "saiku.olap.query.filter": True,
    "saiku.olap.result.formatter": "flattened",
    "org.saiku.query.explain": True,
    "org.saiku.connection.scenario": False,
    "saiku.olap.query.drillthrough": True
  },
  "metadata": {}
})

headers = {
  'Accept': 'application/json, text/javascript, */*; q=0.01',
  'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
  'Connection': 'keep-alive',
  'Content-Type': 'application/json',
  'Cookie': 'JSESSIONID=EB000E928441CC372431FA5B47CB86F8',
  'Origin': 'https://dw.ceasa.gov.br',
  'Referer': 'https://dw.ceasa.gov.br/',
  'Sec-Fetch-Dest': 'empty',
  'Sec-Fetch-Mode': 'cors',
  'Sec-Fetch-Site': 'same-origin',
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
  'X-Requested-With': 'XMLHttpRequest',
  'sec-ch-ua': '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
  'sec-ch-ua-mobile': '?0',
  'sec-ch-ua-platform': '"Windows"'
}


response = requests.request("POST", url, headers=headers, data=payload)
print(response.status_code)

output at first: 200

output the "next day": 401


Solution

  • Here's how to get the JSESSIONID:

    import requests
    
    session = requests.Session()
    
    data = {
        'username': 'almir.costa',
        'password': '123123',
    }
    
    url = 'https://dw.ceasa.gov.br/saiku/rest/saiku/session'
    response = session.post(url, data=data)
    

    The post request will set the JSESSIONID cookie.

    Use the session to make all your requests. A session will persists all cookies/headers across requests, you can add header to it with: session.headers = headers or session.headers.update(headers)


    Here's the full code:

    import requests
    import json
    from bs4 import BeautifulSoup as bs4
    import pandas as pd
    
    
    payload = json.dumps({
      "name": "0A777C63-EAF6-181A-9FB1-248AFCE90BE1",
      "queryModel": {
        "axes": {
          "FILTER": {
            "mdx": None,
            "filters": [],
            "sortOrder": None,
            "sortEvaluationLiteral": None,
            "hierarchizeMode": None,
            "location": "FILTER",
            "hierarchies": [],
            "nonEmpty": False,
            "aggregators": []
          },
          "COLUMNS": {
            "mdx": None,
            "filters": [],
            "sortOrder": None,
            "sortEvaluationLiteral": None,
            "hierarchizeMode": None,
            "location": "COLUMNS",
            "hierarchies": [
              {
                "name": "[04-Produto].[04-Produto]",
                "levels": {
                  "Produto": {
                    "name": "Produto"
                  }
                }
              }
            ],
            "nonEmpty": True,
            "aggregators": []
          },
          "ROWS": {
            "mdx": None,
            "filters": [],
            "sortOrder": None,
            "sortEvaluationLiteral": None,
            "hierarchizeMode": None,
            "location": "ROWS",
            "hierarchies": [
              {
                "name": "[06-Ano].[06-Ano]",
                "levels": {
                  "Ano": {
                    "name": "Ano",
                    "aggregators": [],
                    "selection": {
                      "type": "EXCLUSION",
                      "members": [
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2015]",
                          "caption": "2015"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2016]",
                          "caption": "2016"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2017]",
                          "caption": "2017"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2018]",
                          "caption": "2018"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2019]",
                          "caption": "2019"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2020]",
                          "caption": "2020"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2021]",
                          "caption": "2021"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2022]",
                          "caption": "2022"
                        },
                        {
                          "uniqueName": "[06-Ano].[06-Ano].[2023]",
                          "caption": "2023"
                        }
                      ]
                    }
                  }
                }
              },
              {
                "name": "[07-Mes].[07-Mes]",
                "levels": {
                  "Mes": {
                    "name": "Mes",
                    "aggregators": [],
                    "selection": {
                      "type": "EXCLUSION",
                      "members": [
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[JANEIRO]",
                          "caption": "JANEIRO"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[FEVEREIRO]",
                          "caption": "FEVEREIRO"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[MARÇO]",
                          "caption": "MARÇO"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[ABRIL]",
                          "caption": "ABRIL"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[MAIO]",
                          "caption": "MAIO"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[1º SEM].[JUNHO]",
                          "caption": "JUNHO"
                        },
                        {
                          "uniqueName": "[07-Mes].[07-Mes].[2º SEM].[JULHO]",
                          "caption": "JULHO"
                        }
                      ]
                    }
                  }
                }
              },
              {
                "name": "[08-Dia].[08-Dia]",
                "levels": {
                  "Dia Mes": {
                    "name": "Dia Mes"
                  }
                }
              }
            ],
            "nonEmpty": True,
            "aggregators": []
          }
        },
        "visualTotals": False,
        "visualTotalsPattern": None,
        "lowestLevelsOnly": False,
        "details": {
          "axis": "COLUMNS",
          "location": "BOTTOM",
          "measures": []
        },
        "calculatedMeasures": []
      },
      "queryType": "OLAP",
      "type": "QUERYMODEL",
      "cube": {
        "uniqueName": "[Prohort].[Prohort].[Prohort].[PROHORT - PRECO DIARIO]",
        "name": "PROHORT - PRECO DIARIO",
        "connection": "Prohort",
        "catalog": "Prohort",
        "schema": "Prohort",
        "caption": None,
        "visible": False
      },
      "mdx": None,
      "parameters": {},
      "plugins": {},
      "properties": {
        "saiku.olap.query.automatic_execution": False,
        "saiku.olap.query.nonempty": True,
        "saiku.olap.query.nonempty.rows": True,
        "saiku.olap.query.nonempty.columns": True,
        "saiku.ui.render.mode": "table",
        "saiku.olap.query.filter": True,
        "saiku.olap.result.formatter": "flattened",
        "org.saiku.query.explain": True,
        "org.saiku.connection.scenario": False,
        "saiku.olap.query.drillthrough": True
      },
      "metadata": {}
    })
    
    headers = {
      'Accept': 'application/json, text/javascript, */*; q=0.01',
      'Accept-Language': 'pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7',
      'Content-Type': 'application/json',
      'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36'
    }
    
    
    session = requests.Session()
    
    cookie_data = {
       'username': 'almir.costa',
       'password': '123123'
    }
    
    cookie_url = 'https://dw.ceasa.gov.br/saiku/rest/saiku/session'
    response = session.post(cookie_url, data=cookie_data)
    
    url = "https://dw.ceasa.gov.br/saiku/rest/saiku/api/query/execute"
    response = session.post(url, headers=headers, data=payload)
    print(response.text)