using AngleSharp; using AngleSharp.Dom; using AngleSharp.Io; using System; using System.Collections.Generic; using System.IO; using System.Runtime.CompilerServices; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Linq; namespace napdump.Dumpers { class Nineball : Dumper { readonly IConfiguration browserConfig; readonly IBrowsingContext context; readonly ICookieProvider cookies; public Nineball(DumperConfig config) : base(config) { browserConfig = Configuration.Default.WithDefaultCookies().WithDefaultLoader(); cookies = browserConfig.Services.OfType().First(); foreach (var c in config.Cookies ?? Array.Empty<(string Url, string Value)>()) { cookies.SetCookie(new Url(c.Url), c.Value); } context = BrowsingContext.New(browserConfig); } private static readonly Regex reBoardName = new Regex(@"^(\/.*?\/)", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static readonly Regex reBoardNameW = new Regex(@"^\/(\w+)\/", RegexOptions.Compiled | RegexOptions.IgnoreCase); protected async Task GetBoardInfo(BoardInfo bi, IDocument document, CancellationToken token) { await Task.Yield(); token.ThrowIfCancellationRequested(); bi.Title = document.QuerySelector("body > threads > h1").InnerHtml; bi.BoardName = reBoardName.IsMatch(bi.Title) ? reBoardName.Match(bi.Title).Groups[1].Value : bi.Title; bi.SafeName = reBoardNameW.IsMatch(bi.Title) ? reBoardNameW.Match(bi.Title).Groups[1].Value : "unbound"; bi.Description = document.QuerySelector("#banner_info").TextContent; bi.Tags = new[] { "meguca", "node", "liveboard" }; } private static readonly Regex reImageDim = new Regex(@"\((\d+) ([kmg]?b), (\d+)x(\d+)\)", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static bool TryParseImageDimInfo(string info, out long size, out int x, out int y) { //Console.WriteLine(info + " " + reImageDim.IsMatch(info)); if(reImageDim.IsMatch(info)) { var groups = reImageDim.Match(info).Groups; if(long.TryParse(groups[1].Value, out var rawSize) && int.TryParse(groups[3].Value, out x) && int.TryParse(groups[4].Value, out y)) { long multiplier = 1; switch (groups[2].Value.ToLower().Trim()) { case "b": break; case "kb": multiplier = 1024; break; case "mb": multiplier = 1024 * 1024; break; case "gb": multiplier = 1024 * 1024 * 1024; break; default: goto bad; } size = rawSize & multiplier; return true; } } bad: size = default; x = y = default; return false; } private static readonly Regex reDateTime = new Regex(@"(\d\d) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d\d\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled | RegexOptions.IgnoreCase); private static bool TryParseDateTime(string htmlDateTime, out DateTime dt) { htmlDateTime = htmlDateTime.Trim(); //Console.WriteLine(htmlDateTime + " " + reDateTime.IsMatch(htmlDateTime)); if(reDateTime.IsMatch(htmlDateTime)) { var groups = reDateTime.Match(htmlDateTime).Groups; int day = int.Parse(groups[1].Value); string month = groups[2].Value; int year = int.Parse(groups[3].Value); int hour = int.Parse(groups[4].Value); int minute = int.Parse(groups[5].Value); try { dt = new DateTime(year, month switch { "Jan" => 1, "Feb" => 2, "Mar" => 3, "Apr" => 4, "May" => 5, "Jun" => 6, "Jul" => 7, "Aug" => 8, "Sep" => 9, "Oct" => 10, "Nov" => 11, "Dec" => 12, _ => throw new InvalidDataException(), }, day, hour, minute, 0); return true; } catch { dt = default; return false; } } dt = default; return false; } private static readonly Regex reImageDeleted = new Regex(@"^Image deleted by (\w+)$", RegexOptions.Compiled); private static readonly Regex reImageSpoilered = new Regex(@"^Image spoilered by (\w+)$", RegexOptions.Compiled); private static readonly Regex rePostDeleted = new Regex(@"^Post deleted by (\w+)$", RegexOptions.Compiled); private static readonly Regex reUserBanned = new Regex(@"^User banned by (\w+)(?: for (.+))?$", RegexOptions.Compiled); private static void getModlog(string nodeHtml, out Modlog log) { log = new Modlog(); if (nodeHtml == null) return; try { var split = nodeHtml.Split("
").Select(x => x.Trim()).Where(x=> x.Length>0); foreach (var line in split) { if (reImageDeleted.IsMatch(line)) log.ImageDeleted = AdminInfo.Create(true, reImageDeleted.Match(line).Groups[1].Value); if (reImageSpoilered.IsMatch(line)) log.ImageSpoilered = AdminInfo.Create(true, reImageSpoilered.Match(line).Groups[1].Value); if (rePostDeleted.IsMatch(line)) log.PostDeleted = AdminInfo.Create(true, rePostDeleted.Match(line).Groups[1].Value); if (reUserBanned.IsMatch(line)) { var match = reUserBanned.Match(line).Groups; log.UserBanned = AdminInfo.Create(true, match[1].Value); if (match[2].Success) log.BanMessage = AdminInfo.Create(match[2].Value, match[1].Value); } } } catch(Exception ex) { Console.WriteLine("Modlog parsing error: "+ex.Message); log = new Modlog(); } } private static readonly Regex reMailTo = new Regex(@"^mailto:", RegexOptions.Compiled); protected override async IAsyncEnumerable GetPosts(ThreadInfo thread, [EnumeratorCancellation] CancellationToken token) { var document = await context.OpenAsync(thread.BoardInfo.BoardURL + thread.PostNumber, token); var section = document.QuerySelector("section"); thread.Locked = section.ClassList.Contains("locked"); (string Name, string Tripcode, string Email, string Capcode) getTripcode(IElement header) { var bname = header.QuerySelector("b"); string name, trip, mail, cap; name = trip = mail = cap = null; if(bname.FirstChild.NodeName.ToLower() == "a") { //Mail link mail = bname.FirstElementChild.GetAttribute("href"); if (reMailTo.IsMatch(mail)) mail = reMailTo.Replace(mail, ""); bname = bname.FirstElementChild; } if(bname.ChildNodes.Length > 1 && bname.ChildNodes[1].NodeName=="CODE") { //Has tripcode & name name = bname.FirstChild.TextContent; trip = bname.ChildNodes[1].TextContent; if (bname.ChildNodes.Length > 2) cap = bname.ChildNodes[2].TextContent; } else if(bname.ChildNodes.Length>1) { name = bname.FirstChild.TextContent; cap = bname.ChildNodes[1].TextContent; } else if(bname.FirstChild.NodeName.ToLower() == "code") { //Tripcode, no name. trip = bname.FirstChild.TextContent; } else { //Name, no tripcode name = bname.FirstChild.TextContent; } return (name, trip, mail, cap); } //Get thread's modlog. getModlog(section.QuerySelector("b.modLog")?.InnerHtml, out var threadModlog); thread.ModLog = threadModlog; //Get thread's info. var imageInfo = section.QuerySelector("figure > figcaption > i"); if (imageInfo != null) { string imageDimInfo = imageInfo.FirstChild.TextContent; if (TryParseImageDimInfo(imageDimInfo, out var _imageSize, out var _x, out var _y)) { thread.ImageSize = _imageSize; thread.ImageDimensions = (_x, _y); var imageNameInfo = imageInfo.QuerySelector("a"); thread.ImageURL = imageNameInfo.GetAttribute("href"); thread.ImageFilename = imageNameInfo.GetAttribute("download"); if (TryParseDateTime(section.QuerySelector("header > time").FirstChild.TextContent, out var threadTimestamp)) { thread.Timestamp = threadTimestamp; } else { thread.Timestamp = default; } (thread.Name, thread.Tripcode, thread.Email, thread.Capcode) = getTripcode(section.QuerySelector("header")); } else { thread.ImageDimensions = default; thread.ImageFilename = null; thread.ImageSize = 0; thread.ImageURL = null; } } thread.Body = section.QuerySelector("blockquote").InnerHtml; thread.ThreadURL = document.Url; thread.Subject = section.QuerySelector("header > h3")?.TextContent; //Get posts foreach (var article in section.QuerySelectorAll("article")) { var post = new PostInfo() { Body = article.QuerySelector("blockquote").InnerHtml, }; (post.Name, post.Tripcode, post.Email, post.Capcode) = getTripcode(article.QuerySelector("header")); if (TryParseDateTime(article.QuerySelector("header > time").TextContent, out var _time)) post.Timestamp = _time; else post.Timestamp = default; if (ulong.TryParse(article.QuerySelector("header > nav > a[class=quote]").TextContent, out ulong _postNumber)) post.PostNumber = _postNumber; else post.PostNumber = default; //Get modlog getModlog(article.QuerySelector("b.modLog")?.InnerHtml, out var postModlog); post.ModLog = postModlog; var figure = article.QuerySelector("figure > figcaption > i"); if (figure != null) { //Has image if (TryParseImageDimInfo(figure.FirstChild.TextContent, out var _imageSize, out var _x, out var _y)) { post.ImageDimensions = (_x, _y); post.ImageSize = _imageSize; post.ImageURL = figure.QuerySelector("a").GetAttribute("href"); post.ImageFilename = figure.QuerySelector("a").GetAttribute("download"); } } await EncryptIfRequired(post, token); yield return post; } await EncryptIfRequired(thread, token); } private async Task EncryptIfRequired(PostInfo post, CancellationToken token) { try { if (Config.EncryptDeleted != null) { if (post.ModLog.ImageDeleted) { await post.EncryptImageAsync(Config.EncryptDeleted.Value, token); } if (post.ModLog.PostDeleted) { await post.EncryptPostAsync(Config.EncryptDeleted.Value, token); } } }catch(Exception ex) { Console.WriteLine("Encryption for post "+post.PostNumber+" failed: " + ex.Message+"\n"+ex.StackTrace); } } protected override async IAsyncEnumerable GetThreads(BoardInfo boardInfo, [EnumeratorCancellation] CancellationToken token) { var document = await context.OpenAsync(boardInfo.BoardURL + "catalog", token); await GetBoardInfo(boardInfo, document, token); var threadLinks = document.QuerySelectorAll("#catalog > article > a[class=history]"); foreach(var link in threadLinks) { if (link.HasAttribute("href")) { var href = link.GetAttribute("href"); if (ulong.TryParse(href, out ulong postNumber)) { yield return new ThreadInfo() { PostNumber = postNumber, }; } } } } } }