Open In App

Web Scraping in Flutter

The process of extracting required data/information from a web page by accessing the HTML of the web page is called Web Scraping or Web Harvesting or Web Data Extraction.

This article discusses the steps involved in Web Scraping by using Flutter’s html and http packages.



Step 1: Set up a new Flutter App

Create a new flutter app by running the command :



flutter create YOUR_APP_NAME




import 'package:flutter/material.dart';
  
void main() => runApp(MaterialApp(
    theme: ThemeData(
      accentColor: Colors.green,
      scaffoldBackgroundColor: Colors.green[100],
      primaryColor: Colors.green,
    ),
    home: MyApp()));
  
class MyApp extends StatefulWidget {
  const MyApp({Key key}) : super(key: key);
  
  @override
  _MyAppState createState() => _MyAppState();
}
  
class _MyAppState extends State<MyApp> {
    
  // Strings to store the extracted Article titles
  String result1 = 'Result 1';
  String result2 = 'Result 2';
  String result3 = 'Result 3';
    
  // boolean to show CircularProgressIndication
  // while Web Scraping awaits
  bool isLoading = false;
  
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(title: Text('GeeksForGeeks')),
      body: Padding(
        padding: const EdgeInsets.all(16.0),
        child: Center(
            child: Column(
          mainAxisAlignment: MainAxisAlignment.center,
          children: [
              
            // if isLoading is true show loader
            // else show Column of Texts
            isLoading
                ? CircularProgressIndicator()
                : Column(
                    children: [
                      Text(result1,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                      SizedBox(
                        height: MediaQuery.of(context).size.height * 0.05,
                      ),
                      Text(result2,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                      SizedBox(
                        height: MediaQuery.of(context).size.height * 0.05,
                      ),
                      Text(result3,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                    ],
                  ),
            SizedBox(height: MediaQuery.of(context).size.height * 0.08),
            MaterialButton(
              onPressed: () {},
              child: Text(
                'Scrap Data',
                style: TextStyle(color: Colors.white),
              ),
              color: Colors.green,
            )
          ],
        )),
      ),
    );
  }
}

Output :

Step 2: Add the HTML and HTTP packages.

 flutter pub get

import 'package:html/parser.dart' as parser;
import 'package:http/http.dart' as http;

Step 3: Adding Web Scraping functionality

“articles-list” class  >> children[0]  >> children[0]  >> children[0] 

“articles-list” class  >> children[1]  >> children[0]  >> children[0] 

“articles-list” class  >> children[2]  >> children[0]  >> children[0]    

Future<List<String>> extractData() async {
//Getting the response from the targeted url
    final response =
        await http.Client().get(Uri.parse('https://www.geeksforgeeks.org/'));
        //Status Code 200 means response has been received successfully
    if (response.statusCode == 200) {
    //Getting the html document from the response
      var document = parser.parse(response.body);
      try {
      //Scraping the first article title
        var responseString1 = document
            .getElementsByClassName('articles-list')[0]
            .children[0]
            .children[0]
            .children[0];

        print(responseString1.text.trim());
        
      //Scraping the second article title
        var responseString2 = document
            .getElementsByClassName('articles-list')[0]
            .children[1]
            .children[0]
            .children[0];

        print(responseString2.text.trim());
        
      //Scraping the third article title
        var responseString3 = document
            .getElementsByClassName('articles-list')[0]
            .children[2]
            .children[0]
            .children[0];

        print(responseString3.text.trim());
     //Converting the extracted titles into string and returning a list of Strings
        return [
          responseString1.text.trim(),
          responseString2.text.trim(),
          responseString3.text.trim()
        ];
      } catch (e) {
        return ['', '', 'ERROR!'];
      }
    } else {
      return ['', '', 'ERROR: ${response.statusCode}.'];
    }
  }
onPressed: () async {
              //Setting isLoading true to show the loader
                setState(() {
                  isLoading = true;
                });
                
                //Awaiting for web scraping function to return list of strings
                final response = await extractData();
                
                //Setting the received strings to be displayed and making isLoading false to hide the loader
                setState(() {
                  result1 = response[0];
                  result2 = response[1];
                  result3 = response[2];
                  isLoading = false;
                });
              }




import 'package:flutter/material.dart';
import 'package:html/parser.dart' as parser;
import 'package:http/http.dart' as http;
  
void main() => runApp(MaterialApp(
    theme: ThemeData(
      accentColor: Colors.green,
      scaffoldBackgroundColor: Colors.green[100],
      primaryColor: Colors.green,
    ),
    home: MyApp()));
  
class MyApp extends StatefulWidget {
  const MyApp({Key key}) : super(key: key);
  
  @override
  _MyAppState createState() => _MyAppState();
}
  
class _MyAppState extends State<MyApp> {
    
  // Strings to store the extracted Article titles
  String result1 = 'Result 1';
  String result2 = 'Result 2';
  String result3 = 'Result 3';
    
  // boolean to show CircularProgressIndication
  // while Web Scraping awaits
  bool isLoading = false;
  
  Future<List<String>> extractData() async {
      
    // Getting the response from the targeted url
    final response =
        await http.Client().get(Uri.parse('https://www.geeksforgeeks.org/'));
      
        // Status Code 200 means response has been received successfully
    if (response.statusCode == 200) {
        
    // Getting the html document from the response
      var document = parser.parse(response.body);
      try {
          
      // Scraping the first article title
        var responseString1 = document
            .getElementsByClassName('articles-list')[0]
            .children[0]
            .children[0]
            .children[0];
  
        print(responseString1.text.trim());
          
      // Scraping the second article title
        var responseString2 = document
            .getElementsByClassName('articles-list')[0]
            .children[1]
            .children[0]
            .children[0];
  
        print(responseString2.text.trim());
          
      // Scraping the third article title
        var responseString3 = document
            .getElementsByClassName('articles-list')[0]
            .children[2]
            .children[0]
            .children[0];
  
        print(responseString3.text.trim());
          
        // Converting the extracted titles into
        // string and returning a list of Strings
        return [
          responseString1.text.trim(),
          responseString2.text.trim(),
          responseString3.text.trim()
        ];
      } catch (e) {
        return ['', '', 'ERROR!'];
      }
    } else {
      return ['', '', 'ERROR: ${response.statusCode}.'];
    }
  }
  
  @override
  Widget build(BuildContext context) {
    return Scaffold(
      appBar: AppBar(title: Text('GeeksForGeeks')),
      body: Padding(
        padding: const EdgeInsets.all(16.0),
        child: Center(
            child: Column(
          mainAxisAlignment: MainAxisAlignment.center,
          children: [
              
            // if isLoading is true show loader
            // else show Column of Texts
            isLoading
                ? CircularProgressIndicator()
                : Column(
                    children: [
                      Text(result1,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                      SizedBox(
                        height: MediaQuery.of(context).size.height * 0.05,
                      ),
                      Text(result2,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                      SizedBox(
                        height: MediaQuery.of(context).size.height * 0.05,
                      ),
                      Text(result3,
                          style: TextStyle(
                              fontSize: 20, fontWeight: FontWeight.bold)),
                    ],
                  ),
            SizedBox(height: MediaQuery.of(context).size.height * 0.08),
            MaterialButton(
             onPressed: () async {
                 
              // Setting isLoading true to show the loader
                setState(() {
                  isLoading = true;
                });
                  
                // Awaiting for web scraping function
                // to return list of strings
                final response = await extractData();
                  
                // Setting the received strings to be
                // displayed and making isLoading false
                // to hide the loader
                setState(() {
                  result1 = response[0];
                  result2 = response[1];
                  result3 = response[2];
                  isLoading = false;
                });
              },
              child: Text(
                'Scrap Data',
                style: TextStyle(color: Colors.white),
              ),
              color: Colors.green,
            )
          ],
        )),
      ),
    );
  }
}

Output:


Article Tags :