book_scraper 0.0.3 book_scraper: ^0.0.3 copied to clipboard
Tools for scraping popular book sites using bs4
Tools for scraping popular book sites using bs4.
Features #
- Helper to scrape book sites.
- Helper to parse scraped book data.
- Helper to safely parse scraped book results.
Getting started #
Usage #
class AudioBookBayApi extends BaseBookApi {
@override
String get baseUrl => 'https://some_book_website';
/// Returns a List of books based on a search request.
Future<SearchResponse<T>> find(String query) async {
try {
final searchTermQuery = '?s=${Uri.encodeComponent(query.trim())}';
final url = '$baseUrl/$searchTermQuery';
final soup = await cookSoup(url);
// Retreive elements from scraped result
final resultContainer = soup.find(
'div',
class_: 'some div claas definition',
);
final books = resultContainer?.findAll('article');
final conversionBookRequest = ConversionRequest<T, Bs4Element>(
List<Bs4Element>.from(books ?? []),
_getBookfromScrapedHtml,
);
return compute(
convertListToModelWithErrorCount<T, Bs4Element>,
conversionBookRequest,
).then((result) {
return {
'total': result.results.length,
'books': result.results,
};
});
} catch (e) {
throw Exception('Error finding books');
}
}
T _getBookfromScrapedHtml(Bs4Element soupElement) {
String getTitle() {
return soupElement
.find(
'h2',
class_: 'mb15 mt0 font110 mobfont100 fontnormal lineheight20',
)
?.string
.trim() ??
'';
}
String getDetailsUrl() {
return soupElement.a?.attributes['href'] ?? '';
}
String getCoverUrl() {
return soupElement.a?.children.first.attributes['src'] ?? '';
}
String getCategory() {
return soupElement
.find(
'span',
class_: 'cat_link_meta',
)
?.a
?.text ??
'';
}
final title = getTitle();
final detailsUrl = getDetailsUrl();
final category = getCategory();
final coverUrl = getCoverUrl();
return T(
title: title,
category: category,
coverUrl: coverUrl,
detailsUrl: detailsUrl,
);
}
}