From 00a49723a9ebcf60f99f7d20a0a8880e5b07c82f Mon Sep 17 00:00:00 2001 From: Jonathan Bernard Date: Mon, 24 Oct 2022 09:55:38 -0500 Subject: [PATCH] Implemented media library scanning. - Use VLC instance to parse metadata from the media files. - Switched to MD5 for hashing the files. - Switched to using the file location as the proxy for if we have seen the file rather than hash. Hashing every file was slow. - Moved away from using jsonutils {to, from}Json methods for persistence of the libraryt DB. For simple objects it works well, but serialized the internal implementation of tables, etc. Now using a hybrid. Still use jsonutils for a number of the models, but have custom code to serialize TableRefs and other data structures. --- src/main/nim/wdiwtlt/cli.nim | 16 +++++--- src/main/nim/wdiwtlt/db.nim | 10 ++++- src/main/nim/wdiwtlt/library.nim | 64 +++++++++++++++++++------------- src/main/nim/wdiwtlt/libvlc.nim | 31 +++++++++++++++- src/main/nim/wdiwtlt/models.nim | 47 ++++++++++++++++++++++- 5 files changed, 133 insertions(+), 35 deletions(-) diff --git a/src/main/nim/wdiwtlt/cli.nim b/src/main/nim/wdiwtlt/cli.nim index 0392af5..66f13f9 100644 --- a/src/main/nim/wdiwtlt/cli.nim +++ b/src/main/nim/wdiwtlt/cli.nim @@ -20,7 +20,7 @@ proc initCliCtx(cfg: WdiwtltConfig): CliCtx = cfg: cfg, player: vlc.newMediaPlayer, vlc: vlc, - library: initLibrary(cfg.libraryPath, cfg.dbPath)) + library: initLibrary(cfg.libraryPath, cfg.dbPath, vlc)) proc release(ctx: CliCtx) = if not ctx.player.isNil: ctx.player.release @@ -66,16 +66,22 @@ proc startCli*(cfg: WdiwtltConfig) = cmdChannel.close() ctx.release() -proc processScan(ctx: CliCtx) = - ctx.library.scan +proc processScan(ctx: CliCtx, fullRescan = false) = + stdout.writeLine("Sanning media library...") + ctx.library.scan(fullRescan) proc handleCmd(ctx: CliCtx, cmd: string): bool = result = false if STOP_CMDS.contains(cmd): return true - let cmdParts = cmd.split(" ") + let cmdParts = cmd.split(" ", 1) if cmdParts.len == 0: return + let command = cmdParts[0].toLower + let rest = + if cmdParts.len > 1: cmdParts[1] + else: "" - case cmdParts[0]: + case command: of "scan": ctx.processScan() + of "rescan": ctx.processScan(true) else: stdout.writeLine("Unrecognized command: '" & cmdParts[0] & "'") diff --git a/src/main/nim/wdiwtlt/db.nim b/src/main/nim/wdiwtlt/db.nim index ed02ad5..c73f31c 100644 --- a/src/main/nim/wdiwtlt/db.nim +++ b/src/main/nim/wdiwtlt/db.nim @@ -88,6 +88,7 @@ proc update*(db: WdiwtltDb, mf: MediaFile); proc findMediaFilesByAlbum*(db: WdiwtltDb, a: Album): seq[MediaFile]; proc findMediaFilesByArtist*(db: WdiwtltDb, a: Artist): seq[MediaFile]; proc findMediaFileByHash*(db: WdiwtltDb, hash: string): Option[MediaFile]; +proc findMediaFileByPath*(db: WdiwtltDb, path: string): Option[MediaFile]; ## Playlists ## -------------------- @@ -116,6 +117,7 @@ proc removeEmptyPlaylists*(db: WdiwtltDb): void; ## To JSON ## -------------------- proc `%`(dt: DateTime): JsonNode = %(dt.formatIso8601) +proc `%`(u: UUID): JsonNode = %($u) proc `%`(table: TableRef): JsonNode = result = newJObject() for k, v in table.pairs: result[$k] = %v @@ -251,11 +253,10 @@ proc loadDb*(path: string): WdiwtltDb = if not fileExists(path): raise newException(Exception, "Unable to open database file '" & path & "'") - debug "loaded DB" result = WdiwtltDb( jsonFilePath: path, root: parseDbRoot(parseJson(path.readFile))) - debug result.debug + debug "loaded DB" proc persist*(db: WdiwtltDb): void = db.jsonFilePath.writeFile($(%db.root)) @@ -392,6 +393,11 @@ proc findMediaFileByHash*(db: WdiwtltDb, hash: string): Option[MediaFile] = return none[MediaFile]() +proc findMediaFileByPath*(db: WdiwtltDb, path: string): Option[MediaFile] = + for mf in db.root.mediaFiles.values: + if mf.filePath == path: return some(mf) + return none[MediaFile]() + ## Playlists ## -------------------- proc add*(db: WdiwtltDb, p: Playlist) = db.root.playlists[p.id] = p diff --git a/src/main/nim/wdiwtlt/library.nim b/src/main/nim/wdiwtlt/library.nim index 985f891..09403da 100644 --- a/src/main/nim/wdiwtlt/library.nim +++ b/src/main/nim/wdiwtlt/library.nim @@ -1,6 +1,7 @@ -import std/[nre, options, os, sha1, strutils, times, unicode] +import std/[logging, nre, options, os, times, unicode] +import std/strutils except strip import console_progress, uuids -import ./db, ./libvlc, ./models +import ./db, ./incremental_md5, ./libvlc, ./models type WdiwtltLibrary* = ref object @@ -9,7 +10,7 @@ type db: WdiwtltDb vlc: LibVlcInstance -let FILENAME_PAT = re"(\d+[:-_ ]+)?(.+)$" +let FILENAME_PAT = re"^(\d+)?[:\-_ ]*(.+)$" let RECOGNIZED_MEDIA_EXTENSIONS = [ "3gp", "aac", "aif", "avi", "div", "flac", "flv", "h264", "m4a", "mid", "midi", "mka", "mkv", "mov", "mp3", "mp4a", "mpeg", "mpg", "mpg3", "mpg4", @@ -21,11 +22,11 @@ iterator walkMediaFiles*(l: WdiwtltLibrary): string = for f in l.rootPath.walkDirRec(relative = true): let (_, name, ext) = f.splitFile if name.startsWith('.'): continue - if not RECOGNIZED_MEDIA_EXTENSIONS.contains(ext.toLower): continue + if not RECOGNIZED_MEDIA_EXTENSIONS.contains(ext[1..^1].toLower): continue yield f -proc initLibrary*(rootPath: string, dbPath: string): WdiwtltLibrary = - WdiwtltLibrary(rootPath: rootPath, db: loadDb(dbPath)) +proc initLibrary*(rootPath: string, dbPath: string, vlc: LibVlcInstance): WdiwtltLibrary = + WdiwtltLibrary(rootPath: rootPath, db: loadDb(dbPath), vlc: vlc) proc clean*(l: WdiwtltLibrary) = let staleDt = now() - 1.weeks @@ -49,10 +50,12 @@ proc initMediaFile*( trackTotal: Option[int] ] = - if not fileExists(path): - raise newException(IOError, "file does not exist: '" & path & "'") + let absPath = l.rootPath / path + if not fileExists(absPath): + raise newException(IOError, "file does not exist: '" & absPath & "'") - let m = path.match(FILENAME_PAT) + let (_, name, _) = path.splitFile + let m = name.match(FILENAME_PAT) result = ( MediaFile( @@ -61,22 +64,22 @@ proc initMediaFile*( discNumber: none[string](), fileHash: if hash.isSome: hash.get - else: $secureHashFile(path), - filePath: - if m.isSome: m.get.captures[2] - else: path, + else: fileToMD5(absPath), + filePath: path, id: if id.isSome: id.get else: genUUID(), imageUri: none[string](), lastPlayed: none[DateTime](), metaInfoSource: msFileLocation, - name: path.splitFile.name, + name: + if m.isSome and m.get.captures.contains(1): m.get.captures[1] + else: name, playCount: 0, presentLocally: true, trackNumber: - if m.isSome and m.get.captures.contains(1): - some(parseInt(m.get.captures[1])) + if m.isSome and m.get.captures.contains(0): + some(parseInt(m.get.captures[0].strip)) else: none[int]()), none[string](), none[string](), @@ -84,7 +87,7 @@ proc initMediaFile*( var media: VlcMedia try: - media = l.vlc.mediaFromPath(path) + media = l.vlc.mediaFromPath(cstring("file:///" & absPath)) media.parse let mName = media.getMeta(vmTitle) @@ -112,11 +115,14 @@ proc initMediaFile*( try: result.trackTotal = some(parseInt($mTrackTotal)) except: result.trackTotal = none[int]() - except: discard + except: + info "Failed to read meta from file using VLC: " & + getCurrentExceptionMsg() + debug getCurrentException().getStackTrace() finally: if not media.isNil: media.release -proc scan*(l: WdiwtltLibrary) = +proc scan*(l: WdiwtltLibrary, fullRescan = false) = var fileCount = 0 for f in l.walkMediaFiles: fileCount += 1 @@ -124,16 +130,22 @@ proc scan*(l: WdiwtltLibrary) = let progress = newProgress(stdout, fileCount) var curCount = 0 + debug "Scanning media library root at " & l.rootPath for f in l.walkMediaFiles: - progress.updateProgress(curCount, f[max(f.high - 15, 0)..f.high]) + let (_, name, _) = f.splitFile + progress.updateProgress(curCount, name[0..min(name.high, 15)]) + curCount += 1 - # Skip this file if we already have a record of it - let hash = $secureHashFile(f) - var existingMf = l.db.findMediaFileByHash(hash) - if existingMf.isSome: continue + # Skip this file if we already have a record of it. Hash can be slow + # depending on disk speed, so we'll just look for the file path + var existingMf = l.db.findMediaFileByPath(f) + #let hash = fileToMD5(fullfn) + #l.db.findMediaFileByHash(hash) + if not fullRescan and existingMf.isSome: continue # Process this new file - let (mf, artistsFromMeta, albumsFromMeta, trackTotal) = l.initMediaFile(f) + let (mf, artistsFromMeta, albumsFromMeta, trackTotal) = + l.initMediaFile(f) l.db.add(mf) var allArtists = newSeq[Artist]() @@ -180,4 +192,6 @@ proc scan*(l: WdiwtltLibrary) = for artist in allArtists: l.db.associate(allArtists[0], allAlbums[0]) + progress.updateProgress(curCount, "") + stdout.writeLine("Scan complete") l.db.persist diff --git a/src/main/nim/wdiwtlt/libvlc.nim b/src/main/nim/wdiwtlt/libvlc.nim index 757fcb8..47c4ed8 100644 --- a/src/main/nim/wdiwtlt/libvlc.nim +++ b/src/main/nim/wdiwtlt/libvlc.nim @@ -11,7 +11,7 @@ type VlcMedia* = ptr object VlcMetaType* = enum - vmTitle, + vmTitle = 0, vmArtist, vmGenre, vmCopyright, @@ -38,6 +38,20 @@ type vmDiscNumber, vmDiscTotal + VlcMediaParseFlag* = enum + vmpfParseLocal = 0x00, + vmpfParseNetwork = 0x01, + vmpfFetchLocal = 0x02, + vmpfFetchNetwork = 0x04, + vmpfDoInteract = 0x08, + + VlcMediaParseStatus* = enum + vmpsPending = 1, + vmpsSkipped, + vmpsFailed, + vmpsTimeout, + vmpsDone + VlcMediaState* = enum vmsNothingSpecial, vmsOpening, @@ -67,6 +81,9 @@ proc version*(inst: LibVlcInstance): cstring {.importc: "libvlc_get_version".} proc newMediaPlayer*(inst: LibVlcInstance): VlcMediaPlayer {.importc: "libvlc_media_player_new".} +proc newMediaPlayer*(media: VlcMedia): + VlcMediaPlayer {.importc: "libvlc_media_player_new_from_media".} + proc release*(mp: VlcMediaPlayer) {.importc: "libvlc_media_player_release".} proc retain*(mp: VlcMediaPlayer) {.importc: "libvlc_media_player_retain".} @@ -115,8 +132,17 @@ proc release*(media: VlcMedia) {.importc: "libvlc_media_release".} proc duplicate*(media: VlcMedia): VlcMedia {.importc: "libvlc_media_duplicate".} proc parse*(media: VlcMedia) {.importc: "libvlc_media_parse".} +proc parseAsync*(media: VlcMedia) {.importc: "libvlc_media_parse_async".} +proc parse_with_options*( + media: VlcMedia, + parseFlags: VlcMediaParseFlag, + timeout = -1) {.importc: "libvlc_media_parse_with_options".} + proc isParsed*(media: VlcMedia) {.importc: "libvlc_media_is_parsed".} +proc getParsedStatus*(media: VlcMedia): + VlcMediaParseStatus {.importc: "libvlc_media_get_parsed_status".} + proc getMeta*(media: VlcMedia, key: VlcMetaType): cstring {.importc: "libvlc_media_get_meta".} @@ -131,6 +157,9 @@ proc libvlc_errmsg*(): cstring {.importc: "libvlc_errmsg" .} proc libvlc_clearerr*(): cstring {.importc: "libvlc_clearerr" .} proc libvlc_printerr*(fmt: cstring): cstring {.importc: "libvlc_printerr" .} +# Logging +# https://videolan.videolan.me/vlc/group__libvlc__log.html + # Nim-native wrappers proc newVlc*(args: openarray[string] = []): LibVlcInstance = let argc = args.len diff --git a/src/main/nim/wdiwtlt/models.nim b/src/main/nim/wdiwtlt/models.nim index d9d6153..05c83a5 100644 --- a/src/main/nim/wdiwtlt/models.nim +++ b/src/main/nim/wdiwtlt/models.nim @@ -74,13 +74,56 @@ proc fromJsonHook(dt: var DateTime, node: JsonNode) = proc `%`*(a: Album): JsonNode = toJson(a) proc `%`*(a: Artist): JsonNode = toJson(a) proc `%`*(b: Bookmark): JsonNode = toJson(b) -proc `%`*(mf: MediaFile): JsonNode = toJson(mf) +proc `%`*(mf: MediaFile): JsonNode = + result = %*{ + "id": $mf.id, + "name": mf.name, + "dateAdded": mf.dateAdded.formatIso8601, + "fileHash": mf.fileHash, + "filePath": mf.filePath, + "metaInfoSource": $mf.metaInfoSource, + "playCount": mf.playCount, + "presentLocally": mf.presentLocally, + "trackNumber": mf.trackNumber + } + + if mf.comment.isSome: result["comment"] = %mf.comment.get + if mf.discNumber.isSome: result["discNumber"] = %mf.discNumber.get + if mf.imageUri.isSome: result["imageUri"] = %mf.imageUri.get + if mf.lastPlayed.isSome: result["lastPlayed"] = + %(mf.lastPlayed.get.formatIso8601) + proc `%`*(p: Playlist): JsonNode = toJson(p) proc `%`*(t: Tag): JsonNode = toJson(t) proc parseAlbum*(n: JsonNode): Album = result.fromJson(n) proc parseArtist*(n: JsonNode): Artist = result.fromJson(n) proc parseBookmark*(n: JsonNode): Bookmark = result.fromJson(n) -proc parseMediaFile*(n: JsonNode): MediaFile = result.fromJson(n) + +proc parseMediaFile*(n: JsonNode): MediaFile = + MediaFile( + id: n.parseUUID("id"), + name: n.getOrFail("name").getStr, + dateAdded: n.getOrFail("dateAdded").getStr.parseIso8601, + discNumber: + if n.contains("discNumber"): some(n["discNumber"].getStr) + else: none[string](), + fileHash: n.getOrFail("fileHash").getStr, + filePath: n.getOrFail("filePath").getStr, + imageUri: + if n.contains("imageUrl"): some(n["imageUrl"].getStr) + else: none[string](), + lastPlayed: + if n.contains("lastPlayed"): some(n["lastPlayed"].getStr.parseIso8601) + else: none[DateTime](), + metaInfoSource: + parseEnum[MetaSource](n.getOrFail("metaInfoSource").getStr), + playCount: n.getOrFail("playCount").getInt, + presentLocally: n.getOrFail("presentLocally").getBool, + trackNumber: + if n.contains("trackNumber"): some(n["trackNumber"].getInt) + else: none[int]()) + + proc parsePlaylist*(n: JsonNode): Playlist = result.fromJson(n) proc parseTag*(n: JsonNode): Tag = result.fromJson(n)