I am using Flask-SQLAlchemy and Flask-Marshmallow to pull data from a SQL Server instance. The query that I'm trying to replicate looks like this
SELECT
o.[ProductId],
[AccountName] = a.[Name],
[AccountDescription] = a.[Description],
[TotalSize] = SUM(d.[Size])
FROM
[OrderDetail] d
JOIN
[Order] o
ON
d.[OrderId] = o.[Id]
JOIN
[Account] a
ON
d.[AccountId] = a.[Id]
WHERE
o.[Timestamp] <= @asOfDate
GROUP BY
o.[ProductId],
a.[Name],
a.[Description]
I am having difficulty translating this to SQLAlchemy, specifically Flask-SQLAlchemy which it seems has a slightly different way of doing things.
This is my attempt, located inside my route function. This query executes, but the SQL that is run against the server is just a join of the three tables—it doesn't do the aggregation or group by. It also is selecting every column, I think, instead of just the ones I've specified in with_entities
.
# views.py
@app.route('/product_sizes')
def get_product_sizes() -> Response:
as_of_date_str = request.args.get('asOfDate')
query = OrderDetail.query
.with_entities(Order.product_id.label('product_id'),
Account.name.label('account_name'),
Account.description.label('account_description'),
func.sum(OrderDetail.size).label('total_size')) \
.group_by('product_id', 'account_name', 'account_description')
if as_of_date_str:
as_of_date = datetime.strptime(as_of_date_str, '%Y-%m-%d')
query = query.join(OrderDetail.order).filter(Order.timestamp <= as_of_date)
result = schemas.product_size_schema.dump(query).data
json_string = json.dumps(result, cls=DefaultJsonEncoder)
return Response(response=json_string, status=200, mimetype="application/json")
My questions are
join
when trying to filter against a joined table seems redundant—isn't that join already encoded in the db.relationship
property?My models and schema are below.
# models.py
class Account(db.Model):
_id = db.Column('Id', db.Integer, primary_key=True)
name = db.Column('Name', db.Unicode(250))
description = db.Column('Description', db.Unicode)
class Order(db.Model):
_id = db.Column('Id', db.Integer, primary_key=True)
product_id = db.Column('ProductId', db.Integer)
timestamp = db.Column('TradeTimestamp', db.DateTime)
class OrderDetail(db.Model):
_id = db.Column('Id', db.Integer, primary_key=True)
order_id = db.Column('OrderId', db.ForeignKey('Order.Id'), index=True)
account_id = db.Column('AccountId', db.ForeignKey('Account.Id'), index=True)
size = db.Column('Size', db.Numeric(19, 6), nullable=False)
account = db.relationship('Account', primaryjoin='OrderDetail.account_id == Account._id', backref='order_details', lazy='joined')
order = db.relationship('Trade', primaryjoin='OrderDetail.order_id == Order._id', backref='order_details', lazy='joined')
"""schemas.py"""
class ProductSizeSchema(ma.ModelSchema):
"""Schema for serializing product size objects"""
product_id = fields.Int()
account_name = fields.Str()
account_description = fields.Str()
total_size = fields.Decimal(19, 6)
product_size_schema = ProductSizeSchema()
product_sizes_schema = ProductSizeSchema(many=True)
After looking around, I read that it's not at all a big deal to not use the Flask-SQLAlchemy method of querying (e.g., MyModel.query.<something>
) if it's not convenient to do so. So I just used db.session.query()
in a normal SQLAlchemy fashion.
query = db.session \
.query(Order.instrument_id,
Account.name.label("account_name"),
Account.description.label("account_description"),
func.sum(OrderDetail.size).label("total_size")) \
.join(OrderDetail.order) \
.join(OrderDetail.account) \
.group_by(Order.instrument_id, Account.name, Account.description)
if as_of_date_str:
as_of_date = datetime.strptime(as_of_date_str, '%Y-%m-%d')
query = query.filter(Order.timestamp <= as_of_date)