Below is what I'm trying to accomplish:
Issue that I'm facing:
response = requests.get(API_SCHEME_DATA, headers=headers, data=payload, timeout=20)
if response.status_code == 200:
data = response.json()["Data"]
df = pd.DataFrame(data, dtype=str)
# bigint list contains column names that I can treat as numeric
for col in bigint_list:
df[col] = pd.to_numeric(df[col], errors='coerce')
df[col] = df[col].fillna(0.0)
df = df.astype(str)
df = df.where(pd.notnull(df), None)
# Prepare columns and placeholders for the SQL query
columns = [f"[{col}]" for col in df.columns]
placeholders = ', '.join(['?' for _ in range(len(df.columns))])
# SQL Insert query
insert_data_query = f"""
INSERT INTO {table_name} ({', '.join(columns)})
VALUES ({placeholders})
"""
# Convert the dataframe rows to a list of tuples for insertion
rows = [tuple(x) for x in df.replace({None: None}).values]
# Execute the insert query for multiple rows
connection_cursor.executemany(insert_data_query, rows)
connection_cursor.connection.commit()
pyodbc.ProgrammingError: ('42000', '[42000] [Microsoft][ODBC SQL Server Driver][SQL Server]Error converting data type nvarchar to numeric. (8114) (SQLExecDirectW)')
"CurrentValue": "0.00002"
'2e-05'
. The column that I'm inserting in the SQL server is of DECIMAL
type. And I believe this is why I'm getting the error.What I have done so far to resolve:
pd.options.display.float_format = '{:.8f}'.format
and also tried to round off the columns like below during cleaning step:
# bigint list contains column names that I can treat as numeric
for col in bigint_list:
df[col] = pd.to_numeric(df[col], errors='coerce')
df[col] = df[col].fillna(0.0)
df[col] = df[col].round(10)
df = df.astype(str)
df = df.where(pd.notnull(df), None)
However, nothing seems to work and I'm still seeing the value converted to scientific notation. Any help would be appreciated.
You're probably encountering issues with how Python's json
deserializes numbers from JSON responses. Consider the following example:
from httmock import all_requests, response, HTTMock
import pandas as pd
import requests
@all_requests
def response_content(url, request):
headers = { 'Content-Type': 'application/json' }
content = '{ "Data": [ { "CurrentValue": 0.00001 }, { "CurrentValue": 0.00002 } ] }'
return response(200, content, headers, None, 5, request)
with HTTMock(response_content):
response = requests.get('http://example.org/')
data = response.json()["Data"]
df = pd.DataFrame(data=data, dtype=str)
print(df)
Which outputs the DataFrame:
CurrentValue
0 1e-05
1 2e-05
To get more control over deserialization of the JSON response you can import the json
module and invoke json.loads()
directly, e.g.:
from httmock import all_requests, response, HTTMock
import json
import pandas as pd
import requests
@all_requests
def response_content(url, request):
headers = { 'Content-Type': 'application/json' }
content = '{ "Data": [ { "CurrentValue": 0.00001 }, { "CurrentValue": 0.00002 } ] }'
return response(200, content, headers, None, 5, request)
with HTTMock(response_content):
response = requests.get('http://example.org/')
data = json.loads(response.text, parse_float=None)["Data"]
df = pd.DataFrame(data=data)
print(df)
This outputs the DataFrame:
CurrentValue
0 0.00001
1 0.00002