Implemented media library scanning.

- Use VLC instance to parse metadata from the media files.
- Switched to MD5 for hashing the files.
- Switched to using the file location as the proxy for if we have seen
  the file rather than hash. Hashing every file was slow.
- Moved away from using jsonutils {to, from}Json methods for persistence
  of the libraryt DB. For simple objects it works well, but serialized
  the internal implementation of tables, etc. Now using a hybrid. Still
  use jsonutils for a number of the models, but have custom code to
  serialize TableRefs and other data structures.
This commit is contained in:
Jonathan Bernard 2022-10-24 09:55:38 -05:00
parent af12546ebc
commit 00a49723a9
5 changed files with 133 additions and 35 deletions

View File

@ -20,7 +20,7 @@ proc initCliCtx(cfg: WdiwtltConfig): CliCtx =
cfg: cfg,
player: vlc.newMediaPlayer,
vlc: vlc,
library: initLibrary(cfg.libraryPath, cfg.dbPath))
library: initLibrary(cfg.libraryPath, cfg.dbPath, vlc))
proc release(ctx: CliCtx) =
if not ctx.player.isNil: ctx.player.release
@ -66,16 +66,22 @@ proc startCli*(cfg: WdiwtltConfig) =
cmdChannel.close()
ctx.release()
proc processScan(ctx: CliCtx) =
ctx.library.scan
proc processScan(ctx: CliCtx, fullRescan = false) =
stdout.writeLine("Sanning media library...")
ctx.library.scan(fullRescan)
proc handleCmd(ctx: CliCtx, cmd: string): bool =
result = false
if STOP_CMDS.contains(cmd): return true
let cmdParts = cmd.split(" ")
let cmdParts = cmd.split(" ", 1)
if cmdParts.len == 0: return
let command = cmdParts[0].toLower
let rest =
if cmdParts.len > 1: cmdParts[1]
else: ""
case cmdParts[0]:
case command:
of "scan": ctx.processScan()
of "rescan": ctx.processScan(true)
else: stdout.writeLine("Unrecognized command: '" & cmdParts[0] & "'")

View File

@ -88,6 +88,7 @@ proc update*(db: WdiwtltDb, mf: MediaFile);
proc findMediaFilesByAlbum*(db: WdiwtltDb, a: Album): seq[MediaFile];
proc findMediaFilesByArtist*(db: WdiwtltDb, a: Artist): seq[MediaFile];
proc findMediaFileByHash*(db: WdiwtltDb, hash: string): Option[MediaFile];
proc findMediaFileByPath*(db: WdiwtltDb, path: string): Option[MediaFile];
## Playlists
## --------------------
@ -116,6 +117,7 @@ proc removeEmptyPlaylists*(db: WdiwtltDb): void;
## To JSON
## --------------------
proc `%`(dt: DateTime): JsonNode = %(dt.formatIso8601)
proc `%`(u: UUID): JsonNode = %($u)
proc `%`(table: TableRef): JsonNode =
result = newJObject()
for k, v in table.pairs: result[$k] = %v
@ -251,11 +253,10 @@ proc loadDb*(path: string): WdiwtltDb =
if not fileExists(path):
raise newException(Exception, "Unable to open database file '" & path & "'")
debug "loaded DB"
result = WdiwtltDb(
jsonFilePath: path,
root: parseDbRoot(parseJson(path.readFile)))
debug result.debug
debug "loaded DB"
proc persist*(db: WdiwtltDb): void =
db.jsonFilePath.writeFile($(%db.root))
@ -392,6 +393,11 @@ proc findMediaFileByHash*(db: WdiwtltDb, hash: string): Option[MediaFile] =
return none[MediaFile]()
proc findMediaFileByPath*(db: WdiwtltDb, path: string): Option[MediaFile] =
for mf in db.root.mediaFiles.values:
if mf.filePath == path: return some(mf)
return none[MediaFile]()
## Playlists
## --------------------
proc add*(db: WdiwtltDb, p: Playlist) = db.root.playlists[p.id] = p

View File

@ -1,6 +1,7 @@
import std/[nre, options, os, sha1, strutils, times, unicode]
import std/[logging, nre, options, os, times, unicode]
import std/strutils except strip
import console_progress, uuids
import ./db, ./libvlc, ./models
import ./db, ./incremental_md5, ./libvlc, ./models
type
WdiwtltLibrary* = ref object
@ -9,7 +10,7 @@ type
db: WdiwtltDb
vlc: LibVlcInstance
let FILENAME_PAT = re"(\d+[:-_ ]+)?(.+)$"
let FILENAME_PAT = re"^(\d+)?[:\-_ ]*(.+)$"
let RECOGNIZED_MEDIA_EXTENSIONS = [
"3gp", "aac", "aif", "avi", "div", "flac", "flv", "h264", "m4a", "mid",
"midi", "mka", "mkv", "mov", "mp3", "mp4a", "mpeg", "mpg", "mpg3", "mpg4",
@ -21,11 +22,11 @@ iterator walkMediaFiles*(l: WdiwtltLibrary): string =
for f in l.rootPath.walkDirRec(relative = true):
let (_, name, ext) = f.splitFile
if name.startsWith('.'): continue
if not RECOGNIZED_MEDIA_EXTENSIONS.contains(ext.toLower): continue
if not RECOGNIZED_MEDIA_EXTENSIONS.contains(ext[1..^1].toLower): continue
yield f
proc initLibrary*(rootPath: string, dbPath: string): WdiwtltLibrary =
WdiwtltLibrary(rootPath: rootPath, db: loadDb(dbPath))
proc initLibrary*(rootPath: string, dbPath: string, vlc: LibVlcInstance): WdiwtltLibrary =
WdiwtltLibrary(rootPath: rootPath, db: loadDb(dbPath), vlc: vlc)
proc clean*(l: WdiwtltLibrary) =
let staleDt = now() - 1.weeks
@ -49,10 +50,12 @@ proc initMediaFile*(
trackTotal: Option[int]
] =
if not fileExists(path):
raise newException(IOError, "file does not exist: '" & path & "'")
let absPath = l.rootPath / path
if not fileExists(absPath):
raise newException(IOError, "file does not exist: '" & absPath & "'")
let m = path.match(FILENAME_PAT)
let (_, name, _) = path.splitFile
let m = name.match(FILENAME_PAT)
result = (
MediaFile(
@ -61,22 +64,22 @@ proc initMediaFile*(
discNumber: none[string](),
fileHash:
if hash.isSome: hash.get
else: $secureHashFile(path),
filePath:
if m.isSome: m.get.captures[2]
else: path,
else: fileToMD5(absPath),
filePath: path,
id:
if id.isSome: id.get
else: genUUID(),
imageUri: none[string](),
lastPlayed: none[DateTime](),
metaInfoSource: msFileLocation,
name: path.splitFile.name,
name:
if m.isSome and m.get.captures.contains(1): m.get.captures[1]
else: name,
playCount: 0,
presentLocally: true,
trackNumber:
if m.isSome and m.get.captures.contains(1):
some(parseInt(m.get.captures[1]))
if m.isSome and m.get.captures.contains(0):
some(parseInt(m.get.captures[0].strip))
else: none[int]()),
none[string](),
none[string](),
@ -84,7 +87,7 @@ proc initMediaFile*(
var media: VlcMedia
try:
media = l.vlc.mediaFromPath(path)
media = l.vlc.mediaFromPath(cstring("file:///" & absPath))
media.parse
let mName = media.getMeta(vmTitle)
@ -112,11 +115,14 @@ proc initMediaFile*(
try: result.trackTotal = some(parseInt($mTrackTotal))
except: result.trackTotal = none[int]()
except: discard
except:
info "Failed to read meta from file using VLC: " &
getCurrentExceptionMsg()
debug getCurrentException().getStackTrace()
finally:
if not media.isNil: media.release
proc scan*(l: WdiwtltLibrary) =
proc scan*(l: WdiwtltLibrary, fullRescan = false) =
var fileCount = 0
for f in l.walkMediaFiles: fileCount += 1
@ -124,16 +130,22 @@ proc scan*(l: WdiwtltLibrary) =
let progress = newProgress(stdout, fileCount)
var curCount = 0
debug "Scanning media library root at " & l.rootPath
for f in l.walkMediaFiles:
progress.updateProgress(curCount, f[max(f.high - 15, 0)..f.high])
let (_, name, _) = f.splitFile
progress.updateProgress(curCount, name[0..min(name.high, 15)])
curCount += 1
# Skip this file if we already have a record of it
let hash = $secureHashFile(f)
var existingMf = l.db.findMediaFileByHash(hash)
if existingMf.isSome: continue
# Skip this file if we already have a record of it. Hash can be slow
# depending on disk speed, so we'll just look for the file path
var existingMf = l.db.findMediaFileByPath(f)
#let hash = fileToMD5(fullfn)
#l.db.findMediaFileByHash(hash)
if not fullRescan and existingMf.isSome: continue
# Process this new file
let (mf, artistsFromMeta, albumsFromMeta, trackTotal) = l.initMediaFile(f)
let (mf, artistsFromMeta, albumsFromMeta, trackTotal) =
l.initMediaFile(f)
l.db.add(mf)
var allArtists = newSeq[Artist]()
@ -180,4 +192,6 @@ proc scan*(l: WdiwtltLibrary) =
for artist in allArtists:
l.db.associate(allArtists[0], allAlbums[0])
progress.updateProgress(curCount, "")
stdout.writeLine("Scan complete")
l.db.persist

View File

@ -11,7 +11,7 @@ type
VlcMedia* = ptr object
VlcMetaType* = enum
vmTitle,
vmTitle = 0,
vmArtist,
vmGenre,
vmCopyright,
@ -38,6 +38,20 @@ type
vmDiscNumber,
vmDiscTotal
VlcMediaParseFlag* = enum
vmpfParseLocal = 0x00,
vmpfParseNetwork = 0x01,
vmpfFetchLocal = 0x02,
vmpfFetchNetwork = 0x04,
vmpfDoInteract = 0x08,
VlcMediaParseStatus* = enum
vmpsPending = 1,
vmpsSkipped,
vmpsFailed,
vmpsTimeout,
vmpsDone
VlcMediaState* = enum
vmsNothingSpecial,
vmsOpening,
@ -67,6 +81,9 @@ proc version*(inst: LibVlcInstance): cstring {.importc: "libvlc_get_version".}
proc newMediaPlayer*(inst: LibVlcInstance):
VlcMediaPlayer {.importc: "libvlc_media_player_new".}
proc newMediaPlayer*(media: VlcMedia):
VlcMediaPlayer {.importc: "libvlc_media_player_new_from_media".}
proc release*(mp: VlcMediaPlayer) {.importc: "libvlc_media_player_release".}
proc retain*(mp: VlcMediaPlayer) {.importc: "libvlc_media_player_retain".}
@ -115,8 +132,17 @@ proc release*(media: VlcMedia) {.importc: "libvlc_media_release".}
proc duplicate*(media: VlcMedia): VlcMedia {.importc: "libvlc_media_duplicate".}
proc parse*(media: VlcMedia) {.importc: "libvlc_media_parse".}
proc parseAsync*(media: VlcMedia) {.importc: "libvlc_media_parse_async".}
proc parse_with_options*(
media: VlcMedia,
parseFlags: VlcMediaParseFlag,
timeout = -1) {.importc: "libvlc_media_parse_with_options".}
proc isParsed*(media: VlcMedia) {.importc: "libvlc_media_is_parsed".}
proc getParsedStatus*(media: VlcMedia):
VlcMediaParseStatus {.importc: "libvlc_media_get_parsed_status".}
proc getMeta*(media: VlcMedia, key: VlcMetaType):
cstring {.importc: "libvlc_media_get_meta".}
@ -131,6 +157,9 @@ proc libvlc_errmsg*(): cstring {.importc: "libvlc_errmsg" .}
proc libvlc_clearerr*(): cstring {.importc: "libvlc_clearerr" .}
proc libvlc_printerr*(fmt: cstring): cstring {.importc: "libvlc_printerr" .}
# Logging
# https://videolan.videolan.me/vlc/group__libvlc__log.html
# Nim-native wrappers
proc newVlc*(args: openarray[string] = []): LibVlcInstance =
let argc = args.len

View File

@ -74,13 +74,56 @@ proc fromJsonHook(dt: var DateTime, node: JsonNode) =
proc `%`*(a: Album): JsonNode = toJson(a)
proc `%`*(a: Artist): JsonNode = toJson(a)
proc `%`*(b: Bookmark): JsonNode = toJson(b)
proc `%`*(mf: MediaFile): JsonNode = toJson(mf)
proc `%`*(mf: MediaFile): JsonNode =
result = %*{
"id": $mf.id,
"name": mf.name,
"dateAdded": mf.dateAdded.formatIso8601,
"fileHash": mf.fileHash,
"filePath": mf.filePath,
"metaInfoSource": $mf.metaInfoSource,
"playCount": mf.playCount,
"presentLocally": mf.presentLocally,
"trackNumber": mf.trackNumber
}
if mf.comment.isSome: result["comment"] = %mf.comment.get
if mf.discNumber.isSome: result["discNumber"] = %mf.discNumber.get
if mf.imageUri.isSome: result["imageUri"] = %mf.imageUri.get
if mf.lastPlayed.isSome: result["lastPlayed"] =
%(mf.lastPlayed.get.formatIso8601)
proc `%`*(p: Playlist): JsonNode = toJson(p)
proc `%`*(t: Tag): JsonNode = toJson(t)
proc parseAlbum*(n: JsonNode): Album = result.fromJson(n)
proc parseArtist*(n: JsonNode): Artist = result.fromJson(n)
proc parseBookmark*(n: JsonNode): Bookmark = result.fromJson(n)
proc parseMediaFile*(n: JsonNode): MediaFile = result.fromJson(n)
proc parseMediaFile*(n: JsonNode): MediaFile =
MediaFile(
id: n.parseUUID("id"),
name: n.getOrFail("name").getStr,
dateAdded: n.getOrFail("dateAdded").getStr.parseIso8601,
discNumber:
if n.contains("discNumber"): some(n["discNumber"].getStr)
else: none[string](),
fileHash: n.getOrFail("fileHash").getStr,
filePath: n.getOrFail("filePath").getStr,
imageUri:
if n.contains("imageUrl"): some(n["imageUrl"].getStr)
else: none[string](),
lastPlayed:
if n.contains("lastPlayed"): some(n["lastPlayed"].getStr.parseIso8601)
else: none[DateTime](),
metaInfoSource:
parseEnum[MetaSource](n.getOrFail("metaInfoSource").getStr),
playCount: n.getOrFail("playCount").getInt,
presentLocally: n.getOrFail("presentLocally").getBool,
trackNumber:
if n.contains("trackNumber"): some(n["trackNumber"].getInt)
else: none[int]())
proc parsePlaylist*(n: JsonNode): Playlist = result.fromJson(n)
proc parseTag*(n: JsonNode): Tag = result.fromJson(n)