Source code for book_manager.importers

import datetime
import logging
import csv
from typing import Dict, Any

from django.contrib.auth import get_user_model
from nameparser import HumanName

from .models import Book, BookAuthor, Author, Binding, Publisher, Reading, Shelf

logger = logging.getLogger('book_manager.importers')

User = get_user_model()


[docs]class GoodreadsImporter: """ **Usage**: ``GoodreadsImporter().run(csv_filename, user)`` Import data into our database from a Goodreeads CSV Export. * Import any new :py:class:`book_manager.models.Binding`, :py:class:`book_manager.models.Publisher`, and :py:class:`book_manager.models.Author` instances * Import the book from each row as a :py:class:`book_manager.models.Book` * Import the user specific data from each row as a :py:class:`book_manager.models.Reading` associated with the user ``user`` A Goodreads CSV export has these columns: +---------------------------------+----------------+------------------------------+ | Column name | Type | Notes | +=================================+================+==============================+ | Book Id | int, unique | goodreads internal id | +---------------------------------+----------------+------------------------------+ | Title | str | | +---------------------------------+----------------+------------------------------+ | Author | str | First Last | +---------------------------------+----------------+------------------------------+ | Author l-f | str | Last, First | +---------------------------------+----------------+------------------------------+ | Additional Authors | str | First Last1, First Last2... | +---------------------------------+----------------+------------------------------+ | ISBN | str | value is "=" if empty | +---------------------------------+----------------+------------------------------+ | ISBN13 | str | value is "=" if empty | +---------------------------------+----------------+------------------------------+ | My Rating | int | 0, 1, 2, 3, 4, 5 | +---------------------------------+----------------+------------------------------+ | Average Rating | float | 2 decimals | +---------------------------------+----------------+------------------------------+ | Publisher | str | can be empty | +---------------------------------+----------------+------------------------------+ | Binding | str | can be empty | +---------------------------------+----------------+------------------------------+ | Number of Pages | int | can be empty | +---------------------------------+----------------+------------------------------+ | Year Published | int | can be empty | +---------------------------------+----------------+------------------------------+ | Original Publication Year | int | can be empty | +---------------------------------+----------------+------------------------------+ | Date read | date | YYYY/MM/DD | +---------------------------------+----------------+------------------------------+ | Date added | date | YYYY/MM/DD | +---------------------------------+----------------+------------------------------+ | Bookshelves | str | comma separated | +---------------------------------+----------------+------------------------------+ | Bookshelves with positions | str | NAME (#NUM), comma sep | +---------------------------------+----------------+------------------------------+ | Exclusive Shelf | str | NAME | +---------------------------------+----------------+------------------------------+ | My Review | text | can be empty | +---------------------------------+----------------+------------------------------+ | Spoiler | text | can be empty | +---------------------------------+----------------+------------------------------+ | Private Notes | text | can be empty | +---------------------------------+----------------+------------------------------+ | Read count | int | | +---------------------------------+----------------+------------------------------+ | Owned copies | int | | +---------------------------------+----------------+------------------------------+ """
[docs] def __init__(self) -> None: self.binding_map: Dict[str, Binding] = {} self.publisher_map: Dict[str, Publisher] = {} self.authors_map: Dict[str, Author] = {}
[docs] def load_lookups(self, filename: str) -> None: """ Find the unique bindings, publishers and authors in the Goodreads export CSV ``filename`` and create them in the database as necessary. Args: filename: the filename of the CSV file to read """ bindings = set() publishers = set() authors = set() with open(filename, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for row in reader: if row['Binding']: bindings.add(row['Binding']) if row['Publisher']: publishers.add(row['Publisher']) authors.add(row['Author']) if row['Additional Authors']: other = row['Additional Authors'].split(', ') authors.update(set(other)) for binding in list(bindings): self.binding_map[binding], created = Binding.objects.get_or_create(name=binding) if created: logger.info('%s.binding.created name="%s"', self.__class__.__name__, binding) for publisher in list(publishers): self.publisher_map[publisher], created = Publisher.objects.get_or_create(name=publisher) if created: logger.info('%s.publisher.created name="%s"', self.__class__.__name__, publisher) for name in list(authors): n = HumanName(name) full_name = str(n) author, created = Author.objects.get_or_create(full_name=full_name) if created: logger.info('%s.author.created full_name="%s"', self.__class__.__name__, full_name) author.first_name = n.first author.middle_name = n.middle author.last_name = n.last author.save() self.authors_map[full_name] = author
[docs] def import_book(self, row: Dict[str, Any], overwrite: bool = False) -> Book: """ Get or create a :py:class:`Book` based on ``row``, a row from our :py:class:`csv.DictReader` reader of our Goodreads export. Args: row: a row from our Goodreads export Keyword Args: overwrite: if ``True``, overwrite any existing book data for this book Returns: A :py:class:`Book` instance """ book, created = Book.objects.get_or_create(title=row['Title']) if created or (not created and overwrite): isbn = row['ISBN'][1:].strip('"') isbn13 = row['ISBN13'][1:].strip('"') original_publication_year = row['Original Publication Year'] if not original_publication_year: original_publication_year = None book.isbn = isbn if isbn else None book.isbn13 = isbn13 if isbn13 else None book.num_pages = row['Number of Pages'] if row['Number of Pages'] else None book.year_published = row['Year Published'] if row['Year Published'] else None book.original_publication_year = original_publication_year if row['Binding']: book.binding = self.binding_map[row['Binding']] if row['Publisher']: book.publisher = self.publisher_map[row['Publisher']] book.save() primary_author = HumanName(row['Author']) book.authors.clear() author_order = 1 BookAuthor.objects.create(book=book, author=self.authors_map[str(primary_author)], order=author_order) book.authors.add(self.authors_map[str(primary_author)]) if row['Additional Authors']: others = row['Additional Authors'].split(', ') for author in others: author_order += 1 n = HumanName(author) BookAuthor.objects.create(book=book, author=self.authors_map[str(n)], order=author_order) if created: logger.info('%s.book.created title="%s"', self.__class__.__name__, book.title) else: logger.info('%s.book.updated title="%s"', self.__class__.__name__, book.title) return book
[docs] def import_reading(self, book: Book, user: User, row: Dict[str, Any]) -> None: """ Import the data for the :py:class:`Reading` record for ``user``. Args: book: the book for which we're importing reading data user: the user whose reading data we're importing row: the row from the Goodreads CSV, as output by :py:class:`csv.DictReader` """ shelf, _ = Shelf.objects.get_or_create(reader=user, name=row['Exclusive Shelf']) created = False try: reading = Reading.objects.get(book=book, reader=user) except Reading.DoesNotExist: created = True reading = Reading(book=book, reader=user, shelf=shelf) reading.date_added = datetime.datetime.strptime(row['Date Added'], '%Y/%m/%d').date() if row['Date Read']: reading.date_read = datetime.datetime.strptime(row['Date Read'], '%Y/%m/%d').date() if row['Private Notes']: reading.private_notes = row['Private Notes'] if row['My Review']: reading.review = row['My Review'] reading.read_count = row['Read Count'] if row['Read Count'] else 0 reading.rating = row['My Rating'] reading.save() if created: logger.info('%s.reading.created user=%s title="%s"', self.__class__.__name__, user.username, book.title) else: logger.info('%s.reading.updated user=%s title="%s"', self.__class__.__name__, user.username, book.title)
[docs] def run(self, filename: str, user: User, overwrite: bool = False) -> None: """ Load the books in the CSV identified by ``filename`` into the database, splitting each row into appropriate :py:class:`book_manager.models.Book`, :py:class:`book_manager.models.Author`, :py:class:`book_manager.models.Publisher` and :py:class:`book_manager.models.Binding` records, creating the foreign keys and many-to-many targets as needed. :py:class:`bookmanager.models.Reading` data will always be overwritten, and :py:class:`bookmanager.models.Book` data will be preserved, unless ``override`` is ``True``. Args: filename: the filename of the Goodreads CSV export file Keyword Args: overwrite: if ``True``, overwrite any existing :py:class:`Book` with data from the CSV """ self.load_lookups(filename) with open(filename, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for row in reader: book = self.import_book(row, overwrite=overwrite) self.import_reading(book, user, row)