import datetime
import logging
import csv
from typing import Dict, Any
from django.contrib.auth import get_user_model
from nameparser import HumanName
from .models import Book, BookAuthor, Author, Binding, Publisher, Reading, Shelf
logger = logging.getLogger('book_manager.importers')
User = get_user_model()
[docs]class GoodreadsImporter:
"""
**Usage**: ``GoodreadsImporter().run(csv_filename, user)``
Import data into our database from a Goodreeads CSV Export.
* Import any new :py:class:`book_manager.models.Binding`, :py:class:`book_manager.models.Publisher`,
and :py:class:`book_manager.models.Author` instances
* Import the book from each row as a :py:class:`book_manager.models.Book`
* Import the user specific data from each row as a :py:class:`book_manager.models.Reading`
associated with the user ``user``
A Goodreads CSV export has these columns:
+---------------------------------+----------------+------------------------------+
| Column name | Type | Notes |
+=================================+================+==============================+
| Book Id | int, unique | goodreads internal id |
+---------------------------------+----------------+------------------------------+
| Title | str | |
+---------------------------------+----------------+------------------------------+
| Author | str | First Last |
+---------------------------------+----------------+------------------------------+
| Author l-f | str | Last, First |
+---------------------------------+----------------+------------------------------+
| Additional Authors | str | First Last1, First Last2... |
+---------------------------------+----------------+------------------------------+
| ISBN | str | value is "=" if empty |
+---------------------------------+----------------+------------------------------+
| ISBN13 | str | value is "=" if empty |
+---------------------------------+----------------+------------------------------+
| My Rating | int | 0, 1, 2, 3, 4, 5 |
+---------------------------------+----------------+------------------------------+
| Average Rating | float | 2 decimals |
+---------------------------------+----------------+------------------------------+
| Publisher | str | can be empty |
+---------------------------------+----------------+------------------------------+
| Binding | str | can be empty |
+---------------------------------+----------------+------------------------------+
| Number of Pages | int | can be empty |
+---------------------------------+----------------+------------------------------+
| Year Published | int | can be empty |
+---------------------------------+----------------+------------------------------+
| Original Publication Year | int | can be empty |
+---------------------------------+----------------+------------------------------+
| Date read | date | YYYY/MM/DD |
+---------------------------------+----------------+------------------------------+
| Date added | date | YYYY/MM/DD |
+---------------------------------+----------------+------------------------------+
| Bookshelves | str | comma separated |
+---------------------------------+----------------+------------------------------+
| Bookshelves with positions | str | NAME (#NUM), comma sep |
+---------------------------------+----------------+------------------------------+
| Exclusive Shelf | str | NAME |
+---------------------------------+----------------+------------------------------+
| My Review | text | can be empty |
+---------------------------------+----------------+------------------------------+
| Spoiler | text | can be empty |
+---------------------------------+----------------+------------------------------+
| Private Notes | text | can be empty |
+---------------------------------+----------------+------------------------------+
| Read count | int | |
+---------------------------------+----------------+------------------------------+
| Owned copies | int | |
+---------------------------------+----------------+------------------------------+
"""
[docs] def __init__(self) -> None:
self.binding_map: Dict[str, Binding] = {}
self.publisher_map: Dict[str, Publisher] = {}
self.authors_map: Dict[str, Author] = {}
[docs] def load_lookups(self, filename: str) -> None:
"""
Find the unique bindings, publishers and authors in the Goodreads export
CSV ``filename`` and create them in the database as necessary.
Args:
filename: the filename of the CSV file to read
"""
bindings = set()
publishers = set()
authors = set()
with open(filename, encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row['Binding']:
bindings.add(row['Binding'])
if row['Publisher']:
publishers.add(row['Publisher'])
authors.add(row['Author'])
if row['Additional Authors']:
other = row['Additional Authors'].split(', ')
authors.update(set(other))
for binding in list(bindings):
self.binding_map[binding], created = Binding.objects.get_or_create(name=binding)
if created:
logger.info('%s.binding.created name="%s"', self.__class__.__name__, binding)
for publisher in list(publishers):
self.publisher_map[publisher], created = Publisher.objects.get_or_create(name=publisher)
if created:
logger.info('%s.publisher.created name="%s"', self.__class__.__name__, publisher)
for name in list(authors):
n = HumanName(name)
full_name = str(n)
author, created = Author.objects.get_or_create(full_name=full_name)
if created:
logger.info('%s.author.created full_name="%s"', self.__class__.__name__, full_name)
author.first_name = n.first
author.middle_name = n.middle
author.last_name = n.last
author.save()
self.authors_map[full_name] = author
[docs] def import_book(self, row: Dict[str, Any], overwrite: bool = False) -> Book:
"""
Get or create a :py:class:`Book` based on ``row``, a row from our
:py:class:`csv.DictReader` reader of our Goodreads export.
Args:
row: a row from our Goodreads export
Keyword Args:
overwrite: if ``True``, overwrite any existing book data for this book
Returns:
A :py:class:`Book` instance
"""
book, created = Book.objects.get_or_create(title=row['Title'])
if created or (not created and overwrite):
isbn = row['ISBN'][1:].strip('"')
isbn13 = row['ISBN13'][1:].strip('"')
original_publication_year = row['Original Publication Year']
if not original_publication_year:
original_publication_year = None
book.isbn = isbn if isbn else None
book.isbn13 = isbn13 if isbn13 else None
book.num_pages = row['Number of Pages'] if row['Number of Pages'] else None
book.year_published = row['Year Published'] if row['Year Published'] else None
book.original_publication_year = original_publication_year
if row['Binding']:
book.binding = self.binding_map[row['Binding']]
if row['Publisher']:
book.publisher = self.publisher_map[row['Publisher']]
book.save()
primary_author = HumanName(row['Author'])
book.authors.clear()
author_order = 1
BookAuthor.objects.create(book=book, author=self.authors_map[str(primary_author)], order=author_order)
book.authors.add(self.authors_map[str(primary_author)])
if row['Additional Authors']:
others = row['Additional Authors'].split(', ')
for author in others:
author_order += 1
n = HumanName(author)
BookAuthor.objects.create(book=book, author=self.authors_map[str(n)], order=author_order)
if created:
logger.info('%s.book.created title="%s"', self.__class__.__name__, book.title)
else:
logger.info('%s.book.updated title="%s"', self.__class__.__name__, book.title)
return book
[docs] def import_reading(self, book: Book, user: User, row: Dict[str, Any]) -> None:
"""
Import the data for the :py:class:`Reading` record for ``user``.
Args:
book: the book for which we're importing reading data
user: the user whose reading data we're importing
row: the row from the Goodreads CSV, as output by :py:class:`csv.DictReader`
"""
shelf, _ = Shelf.objects.get_or_create(reader=user, name=row['Exclusive Shelf'])
created = False
try:
reading = Reading.objects.get(book=book, reader=user)
except Reading.DoesNotExist:
created = True
reading = Reading(book=book, reader=user, shelf=shelf)
reading.date_added = datetime.datetime.strptime(row['Date Added'], '%Y/%m/%d').date()
if row['Date Read']:
reading.date_read = datetime.datetime.strptime(row['Date Read'], '%Y/%m/%d').date()
if row['Private Notes']:
reading.private_notes = row['Private Notes']
if row['My Review']:
reading.review = row['My Review']
reading.read_count = row['Read Count'] if row['Read Count'] else 0
reading.rating = row['My Rating']
reading.save()
if created:
logger.info('%s.reading.created user=%s title="%s"', self.__class__.__name__, user.username, book.title)
else:
logger.info('%s.reading.updated user=%s title="%s"', self.__class__.__name__, user.username, book.title)
[docs] def run(self, filename: str, user: User, overwrite: bool = False) -> None:
"""
Load the books in the CSV identified by ``filename`` into the database,
splitting each row into appropriate
:py:class:`book_manager.models.Book`,
:py:class:`book_manager.models.Author`,
:py:class:`book_manager.models.Publisher` and
:py:class:`book_manager.models.Binding` records, creating the foreign
keys and many-to-many targets as needed.
:py:class:`bookmanager.models.Reading` data will always be overwritten, and
:py:class:`bookmanager.models.Book` data will be preserved, unless ``override`` is
``True``.
Args:
filename: the filename of the Goodreads CSV export file
Keyword Args:
overwrite: if ``True``, overwrite any existing :py:class:`Book` with data from the CSV
"""
self.load_lookups(filename)
with open(filename, encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
book = self.import_book(row, overwrite=overwrite)
self.import_reading(book, user, row)