SQLAlchemy Group By With Full Child Objects

Last Updated : 08 Sep, 2023

In this article, we will explore how to use SQLAlchemy Group By With Full Child Objects in Python. SQLAlchemy provides several techniques to achieve SQLAlchemy Group By with Full Child Objects.

SQLAlchemy Group By With Full Child Objects

In this section, we are making a connection with the database, we are creating the structure of two tables with parent-child relationships. Also, we are inserting data in both tables.

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship 
from sqlalchemy.ext.declarative import declarative_base 
  
# create Base class 
Base = declarative_base() 
  
# Establish connection 
engine = create_engine("mysql+pymysql://userName:password@host/dbName") 
  
# model class 
  
  
class Department(Base): 
    __tablename__ = "department"
    id = Column(Integer, primary_key=True) 
    name = Column(String(50)) 
  
    # the argument should be  Class name not table name 
    employees = relationship('Employee') 
  
  
class Employee(Base): 
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True) 
    emp_name = Column(String(50)) 
    age = Column(Integer) 
    salary = Column(DECIMAL) 
  
    # dep_id is just to define the foreign key 
    dep_id = Column(Integer, ForeignKey("department.id")) 
    department = relationship("Department") 
  
  
# creating tables in DB,if tables alredy creatd no need of these statement 
Base.metadata.create_all(engine) 
  
  
# creating session 
Session = sessionmaker(bind=engine) 
session = Session() 
  
  
# creating instances of department class 
ece_dep = Department(id=1, name="ECE") 
cse_dep = Department(id=2, name="CSE") 
mech_dep = Department(id=3, name="MECH") 
  
# ece employees 
emp1 = Employee(emp_id=101, emp_name="Kohli",  age=34, 
                salary=75000,  dep_id=ece_dep.id) 
emp2 = Employee(emp_id=102, emp_name="Dhoni",  age=45, 
                salary=120000, dep_id=ece_dep.id) 
emp3 = Employee(emp_id=103, emp_name="Sachine", age=39, 
                salary=100000, dep_id=ece_dep.id) 
  
# cse employess 
emp4 = Employee(emp_id=201, emp_name="Alice",   age=26, 
                salary=40000, dep_id=cse_dep.id) 
emp5 = Employee(emp_id=202, emp_name="Bob",     age=56, 
                salary=75000, dep_id=cse_dep.id) 
emp6 = Employee(emp_id=203, emp_name="charlie", age=43, 
                salary=50000, dep_id=cse_dep.id) 
  
  
# mech employees 
emp7 = Employee(emp_id=301, emp_name="Ronaldo", age=56, 
                salary=40000, dep_id=mech_dep.id) 
emp8 = Employee(emp_id=302, emp_name="Messi",   age=54, 
                salary=30000, dep_id=mech_dep.id) 
emp9 = Employee(emp_id=303, emp_name="neymar",  age=45, 
                salary=25000, dep_id=mech_dep.id) 
  
# adding instances to session 
session.add_all([ece_dep, cse_dep, mech_dep, emp1, emp2, 
                 emp3, emp4, emp5, emp6, emp7, emp8, emp9]) 
  
# committing the changes 
session.commit() 
  
# closing the connection 
session.close() 

Output

Employee Table

Joinedload Technique

Joinedload technique in SQLAlchemy is used to explicitly instruct the ORM to perform a join query to load related objects along with the primary object. In SQLAlchemy, you can enable joined-load loading using the options() and joinedload() methods.

First, you define a subquery to calculates the count of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.count(Employee.emp_id)).

session.query(Department,func.count(Employee.emp_id).label("employee_count"))\
    .join(Department.employees)\
    .group_by(Department)\
    .options(joinedload(Department.employees))\
    .all()

Then use join(Employess) to sepcify that the query should perform an join between the Departmant table and Employee table.

.join(Department.employees)

We specify the group_by() clause to group the results by the Department entity.

.group_by(Department)

Then, we use the options() method on the query object to specify additional query options, such as joinedload loading of relationships.
We pass the joinedload() method as argument to options().
Inside the joinedload() method, we specify the relationship Department.employees to load. indicating that we want to load the employee’s relationship of the Department model.

.options(joinedload(Department.employees))

finally, we use all() to retrieve the data.

.all()

Code Implementation

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship 
from sqlalchemy.orm import joinedload 
from sqlalchemy.ext.declarative import declarative_base 
  
# create Base class 
Base = declarative_base() 
  
# Establish connection 
engine = create_engine("mysql+pymysql://user:password@host/dbName") 
  
# model class 
class Department(Base): 
    __tablename__ = "department"
    id = Column(Integer, primary_key=True) 
    name = Column(String(50)) 
      
    #the argument should be  Class name not table name 
    employees = relationship('Employee') 
  
  
class Employee(Base): 
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True) 
    emp_name = Column(String(50)) 
    age=Column(Integer) 
    salary=Column(DECIMAL) 
  
    #dep_id is just to define the foreign key  
    dep_id = Column(Integer, ForeignKey("department.id")) 
    department = relationship("Department") 
  
  
# creating tables in DB,if tables alredy  
# creatd no need of these statement 
Base.metadata.create_all(engine) 
  
  
# creating session 
Session = sessionmaker(bind=engine) 
session = Session() 
  
#query 
joinloadQuery =session.query(Department, 
          func.count(Employee.emp_id).label("count"))\ 
    .join(Department.employees)\ 
    .group_by(Department)\ 
    .options(joinedload(Department.employees)) 
  
#execuing query with DB and fetching results 
result=joinloadQuery.all() 
  
print("Joinload technique result") 
print("Dep Count") 
for dep,count in result: 
    print(dep.name,count) 
  
# closing the connection 
session.close() 

Output

Joined load Output

Contains Eager Loading Technique

contains eager technique in SQLAlchemy is used to indicate that a query should eagerly load a specific relationship and include it in the result set. It is particularly useful when you have loaded a relationship using a separate query or a different loading technique, and you want to include those related objects in the result set of the current query.

First, you define a subquery to calculates the sum of salaries of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.sum(Employee.salary)).

query = session.query(Department, func.sum(Employee.salary)) \
        .join(Employee) \
        .group_by(Department.name) \
        .options(contains_eager(Department.employees)) \
        .all()

Then use join(Employess) to specify that the query should perform an join between the Department table and Employee table.

.join(Employee)

We specify the group_by() clause to group the results by the Department entity.

.group_by(Department.name)

Then, we use the options() method on the query object to specify additional query options, such as contains_eager loading of relationships.
we pass the contains_eager() method as argument to options().
Inside the contains_eager() method, we specify the relationship Department.employees to load. indicating that we want to load the employee’s relationship of the Department model.

.options(contains_eager(Department.employees))

Finally, we use all() to retrieve the data

.all()

Code Implemention

Python3

from sqlalchemy.orm import contains_eager 
  
# query 
Query = session.query(Department,  
        func.sum(Employee.salary).label("salary"))\ 
    .join(Employee)\ 
    .group_by(Department.name)\ 
    .options(contains_eager(Department.employees)) 
  
# executing the Query with DB 
result = Query.all() 
  
print("contains_eager technique result") 
print("Dep salarySum") 
for dep, salary in result: 
    print(dep.name, salary) 

Output

contains Eager Load Output

Subquery Technique

We first define a subquery that calculates the count of employees per department. The subquery is created using session.query() with the necessary columns and aggregations, followed by a join() with the Employee table and a group_by() on the Department.id column. The subquery() at the end converts the query into a subquery object.

session.query(Department.id, func.count(Employee.emp_id).label('employee_count'))
    .join(Employee)
    .group_by(Department.id)
    .subquery()

First, you define a subquery to calculates the count of employees per department using the session.query() method and specify the necessary columns(Department.id,Department.name) and aggregations(func.count(Employee.emp_id)).

.join(Employee)

Then use join(Employess) to specify that the query should perform a join between the Department table and Employee table.

.group_by(Department.id)

Then we use group_by() method is used to group the results by the id column of the Department.

.subquery()

Finally we use .subquery() it will convert the query object to subquery object

Code

Python3

#subquery 
subQuery=session\ 
    .query(Department.id, func.count(Employee.emp_id)\ 
    .label('employee_count'))\ 
    .join(Employee)\ 
    .group_by(Department.id)\ 
    .subquery() 
  
#main query 
mainQuery=session.query(Department,subQuery.c.employee_count)\ 
        .join(subQuery,Department.id == subQuery.c.id).all() 
  
print("Subquery Technique") 
for dep in mainQuery: 
    print(dep[0].name,dep[1]) 
  
session.close()

Output :

subQuery Output

Load Technique

By using the Load technique, you can fine-tune how SQLAlchemy fetches related objects, optimizing performance and reducing the number of queries issued to the database.

query = session.query(Department.name, func.avg(Employee.salary)) 
               .join(Employee, Employee.dep_id == Department.id) 
               .group_by(Department.name) 
               .options(Load(Employee))
               .all()

session.query(Department.name, func.avg(Employee.salary)): We start by creating a query object that selects the name attribute from the Department model and calculates the average salary using the func.avg() function on the salary attribute from the Employee model.
join(Employee, Employee.dep_id == Department.id): We use the join() method to join the Employee table with the Department table with joining condition.
group_by(Department.name): We group the results by the name attribute of the Department model. This ensures that we get the average salary per department.
options(Load(Employee)): We specify the loading options for the Employee model using the Load() technique. By passing Load(Employee), we indicate that we want to customize the loading behavior for the Employee objects in the query.
all(): We execute the query and retrieve all the results.

Code

Python3

from sqlalchemy.orm import Load 
  
# Query to calculate the average salary of employees in each department 
query = session.query(Department.name, func.avg(Employee.salary)) \ 
               .join(Employee, Employee.dep_id == Department.id) \ 
               .group_by(Department.name) \ 
               .options(Load(Employee))\ 
               .all() 
print("Load technique result") 
print("Dep averageSalary") 
for depName, salary in query: 
    print(depName, salary) 
  
# closing the connection 
session.close() 

Output

Load technique result
Dep averageSalary
ECE 98333.3333
CSE 55000.0000
MECH 31666.6667

Selectin Load Technique

The selectinload() method allows you to load related objects in a separate SELECT statement rather than joining them with the main query. This can be useful when you have a large number of related objects or when the relationship involves many-to-many or one-to-many associations.

Python3

from sqlalchemy.orm import selectinload 
  
query = session.query(Department, func.min(Employee.salary)) \ 
               .join(Employee) \ 
               .group_by(Department.name) \ 
               .options(selectinload(Department.employees)) 
  
result = query.all() 
  
for dep,salary in result: 
    print(dep.name,salary) 
  
session.close()

Output

selectIn load Output

Immediate Loading Technique

Immediate load is a technique in SQLAlchemy that loads related objects immediately along with the parent object when the query is executed. It allows you to retrieve all the necessary data in a single query, reducing the number of database round-trips. It can be achieved by using the immediateload() in options().

Python3

result = session.query(Department,  
         func.avg(Employee.age).label("averageAge"))\ 
               .join(Employee)\ 
               .group_by(Department.name)\ 
               .options(immediateload(Department.employees))\ 
               .all() 
  
print("contains_eager technique result") 
print("Dep averageAge") 
for dep ,age in result: 
    print(dep.name,age) 
  
# closing the connection 
session.close()

Output

immediate Load Output

Raise Load Technique

The raiseload() method is used to specify that a particular relationship should be loaded using a separate SQL query when accessed.

Python3

from sqlalchemy.orm import raiseload 
  
# query to get the minimum age in each department 
query = session.query(Department, func.min(Employee.age)) \ 
               .join(Employee) \ 
               .group_by(Department.name) \ 
               .options(raiseload(Department.employees)) 
  
result = query.all() 
  
print("Raise Lad technique") 
print("Name MinAge") 
for dep, age in result: 
    print(dep.name, age) 
  
session.close() 

Output

Raise Load Output

Noload Technique

The noload technique is used to specify that a particular relationship should not be loaded at all. It is useful when you want to exclude a specific relationship from being loaded in a query, even if it has a default loading strategy.

Python3

from sqlalchemy.orm import noload 
  
query = session.query(Department, func.max(Employee.salary).label("maxSalary")) \ 
               .join(Employee,Department.id==Employee.dep_id) \ 
               .group_by(Department.id, Department.name) \ 
               .options(noload(Department.employees))\ 
               .all() 
print("noload technique result") 
print("Dep maxSalary") 
for dep ,salary in query: 
    print(dep.name,salary) 
  
# closing the connection 
session.close()

Output

Dep maxSalary
ECE 120000
CSE 75000
MECH 40000

Lazy Loading Technique

We can simply achieve the same functionality by using lazy loading with the help of foreign key relations no need for joins and group_by clauses.

Python3

departments = session.query(Department).all() 
for department in departments: 
    # Accessing the employees attribute triggers lazy loading 
    employee_count = len(department.employees) 
    print(f"Department: {department.name}, Employee Count: {employee_count}") 
session.close() 

Output

Lazy Loading Output

Defaultload Technique

The defaultload technique in SQLAlchemy is used to control the loading behavior of relationships when querying objects from the database. By default, SQLAlchemy loads all related objects for a given relationship when the parent object is queried. However, in some cases, loading all related objects upfront can lead to unnecessary performance overhead.

Python3

from sqlalchemy import *
from sqlalchemy.orm import sessionmaker, relationship 
from sqlalchemy.orm import defaultload 
from sqlalchemy.ext.declarative import declarative_base 
# Create Base class 
Base = declarative_base() 
# Establish connection 
engine = create_engine("postgresql://postgres:root@localhost/test") 
# Model classes 
class Department(Base): 
    __tablename__ = "department"
    id = Column(Integer, primary_key=True) 
    name = Column(String(50)) 
    employees = relationship('Employee', 
                back_populates="department") 
class Employee(Base): 
    __tablename__ = "employee"
    emp_id = Column(Integer, primary_key=True) 
    emp_name = Column(String(50)) 
    age = Column(Integer) 
    salary = Column(DECIMAL) 
    dep_id = Column(Integer, ForeignKey("department.id")) 
    department = relationship( 
        "Department", back_populates="employees",  
                          overlaps="employees") 
Base.metadata.create_all(engine) 
  
Session = sessionmaker(bind=engine) 
session = Session() 
# Query to calculate the max age of employees in each department 
query = session.query(Department, func.max(Employee.age)) \ 
               .join(Employee) \ 
               .group_by(Department.id, Department.name) \ 
               .options(defaultload(Department.employees)) 
result = query.all() 
print("Default Loading") 
print("dep Age") 
for dep, max_age in result: 
    print(dep.name, max_age) 
session.close() 

Output

Default Loading

Suggest improvement

SQLAlchemy filter by json field

Share your thoughts in the comments

SQLAlchemy Group By With Full Child Objects