import
requests
from
bs4
import
BeautifulSoup
import
json
def
json_from_html_using_bs4(base_url):
page
=
requests.get(base_url)
soup
=
BeautifulSoup(page.text,
"html.parser"
)
books
=
soup.find_all(
'li'
, attrs
=
{
'class'
:
'col-xs-6 col-sm-4 col-md-3 col-lg-3'
})
star
=
[
'One'
,
'Two'
,
'Three'
,
'Four'
,
'Five'
]
res, book_no
=
[],
1
for
book
in
books:
title
=
book.find(
'img'
)[
'alt'
]
link
=
base_url[:
37
]
+
book.find(
'a'
)[
'href'
]
<p> tag
for
index
in
range
(
5
):
find_stars
=
book.find(
'p'
, attrs
=
{
'class'
:
'star-rating '
+
star[index]})
if
find_stars
is
not
None
:
stars
=
star[index]
+
" out of 5"
break
<p> tag
in
price_color
class
price
=
book.find(
'p'
, attrs
=
{
'class'
:
'price_color'
}).text
<p> tag
in
instock
=
book.find(
'p'
, attrs
=
{
'class'
:
'instock availability'
}).text.strip()
data
=
{
'book no'
:
str
(book_no),
'title'
: title,
'rating'
: stars,
'price'
: price,
'link'
: link,
'stock'
: instock}
res.append(data)
book_no
+
=
1
return
res
if
__name__
=
=
"__main__"
:
res
=
json_from_html_using_bs4(base_url)
with
open
(
'books.json'
,
'w'
, encoding
=
'latin-1'
) as f:
json.dump(res, f, indent
=
8
, ensure_ascii
=
False
)
print
(
"Created Json File"
)