Execute SQL queries on a dataframe using R

Last Updated : 25 Mar, 2024

In R Programming Language We can use the sqldf package in R to execute SQL queries on a data frame. This can be useful for performing various data manipulation tasks using SQL syntax. The sqldf package provides a way to write SQL queries as strings and apply them to a data frame, allowing us to perform operations such as filtering, sorting, aggregation, joining, and more. In this article, we will explore how we can perform this.

To use the sqldf package, you first need to install it. install it by the below command

install.packages("sqldf")

Now let’s see some common SQL operations we can perform on a data frame using the sqldf package:

Filtering
Sorting
Aggregation
Joining
Grouping
Subsetting
Updating
Deleting

We will be performing above operation on below data frame.

R

# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
df

Output:

id    name   year_of_exp    role
1      Alice           5                    Engineer
2     Bob            8                    Manager
3     Charlie      3                     Analyst
4     David       10                    Director
5     Eve            6                     Developer

Filtering

In below example we are Selecting rows where year_of_exp is greater than 5.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
result <- sqldf("SELECT * FROM df WHERE year_of_exp > 5")
print(result)

Output:

  id  name year_of_exp      role
1  2   Bob           8   Manager
2  4 David          10  Director
3  5   Eve           6 Developer

Sorting

In this example we will Order the dataframe by year_of_exp in descending order.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
result <- sqldf("SELECT * FROM df ORDER BY year_of_exp DESC")
print(result)

Output:

  id    name year_of_exp      role
1  4   David          10  Director
2  2     Bob           8   Manager
3  5     Eve           6 Developer
4  1   Alice           5  Engineer
5  3 Charlie           3   Analyst

Aggregation

Calculating the average years of experience.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
result <- sqldf("SELECT AVG(year_of_exp) AS avg_exp FROM df")
print(result)

Output:

  avg_exp
1     6.4

Joining

In this example we will Combine data from two data frames based on a common column.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
df2 <- data.frame(id = c(1, 2, 3, 4, 5), salary = c(50000, 60000, 70000, 80000, 90000))
result <- sqldf("SELECT df.*, df2.salary FROM df LEFT JOIN df2 ON df.id = df2.id")
print(result)

Output:

  id    name year_of_exp      role salary
1  1   Alice           5  Engineer  50000
2  2     Bob           8   Manager  60000
3  3 Charlie           3   Analyst  70000
4  4   David          10  Director  80000
5  5     Eve           6 Developer  90000

Grouping

In this example we will Group the rows by role and calculate the average years of experience for each role.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
result <- sqldf("SELECT role, AVG(year_of_exp) AS avg_exp FROM df GROUP BY role")
print(result)

Output:

       role avg_exp
1   Analyst       3
2 Developer       6
3  Director      10
4  Engineer       5
5   Manager       8

Subsetting

In this example we will see how to Select specific columns.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
result <- sqldf("SELECT id, name FROM df")
print(result)

Output:

  id    name
1  1   Alice
2  2     Bob
3  3 Charlie
4  4   David
5  5     Eve

Updating

In below example we will update the year_of_exp column in the df dataframe. Here we updated the year_of_exp column of id=1.

R

library(sqldf)
 
# Create vectors for different columns
ids <- c(1, 2, 3, 4, 5)
names <- c("Alice", "Bob", "Charlie", "David", "Eve")
years_of_exp <- c(5, 8, 3, 10, 6)
roles <- c("Engineer", "Manager", "Analyst", "Director", "Developer")
 
# Create a dataframe
df <- data.frame(
  id = ids,
  name = names,
  year_of_exp = years_of_exp,
  role = roles,
  stringsAsFactors = FALSE  # Prevent strings from being converted to factors
)
 
# Update the year_of_exp column
df2 <- sqldf("SELECT *,
                    CASE
                        WHEN id = 1 THEN year_of_exp + 1
                        ELSE year_of_exp
                    END AS year_of_exp
             FROM df")
print(df2)

Output:

  id    name year_of_exp      role year_of_exp
1  1   Alice           5  Engineer           6
2  2     Bob           8   Manager           8
3  3 Charlie           3   Analyst           3
4  4   David          10  Director          10
5  5     Eve           6 Developer           6

Deleting

In this example we will see how to delete the row from data frame.

R

library(sqldf)
 
# Sample dataframe
df <- data.frame(
  id = c(1, 2, 3, 4, 5),
  name = c("Alice", "Bob", "Charlie", "David", "Eve"),
  year_of_exp = c(5, 8, 3, 10, 6),
  role = c("Engineer", "Manager", "Analyst", "Director", "Developer")
)
 
# Filter out rows where year_of_exp is less than 5
df2 <- sqldf("SELECT * FROM df WHERE year_of_exp >= 5")
 
# Print the updated dataframe
print(df2)

Output:

  id  name year_of_exp      role
1  1 Alice           5  Engineer
2  2   Bob           8   Manager
3  4 David          10  Director
4  5   Eve           6 Developer

Suggest improvement

Manipulate R Data Frames Using SQL

Share your thoughts in the comments

Execute SQL queries on a dataframe using R

R

Filtering

R

Sorting

R

Aggregation

R

Joining

R

Grouping

R

R

Updating

R

Deleting

R

Please Login to comment...

Similar Reads

What kind of Experience do you want to share?