package live.page.utils; import com.drew.imaging.ImageMetadataReader; import com.drew.metadata.Metadata; import com.drew.metadata.exif.ExifSubIFDDirectory; import live.page.hubd.blobs.Blob; import live.page.hubd.system.json.Json; import java.text.SimpleDateFormat; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class WikiCommonDateParser { private static final List formats = List.of( new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss"), new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"), new SimpleDateFormat("yyyy-MM-dd HH:mm"), new SimpleDateFormat("yyyy-MM-dd"), new SimpleDateFormat("yyyy-MM"), new SimpleDateFormat("MMMM d, yyyy"), new SimpleDateFormat("MMMM yyyy"), new SimpleDateFormat("yyyy") ); public static Date parse(String date) { for (SimpleDateFormat format : formats) { try { return format.parse(date); } catch (Exception ignore) { } } date = date.trim(); for (int i = 1; i < date.length() - 4; i++) { for (SimpleDateFormat format : formats) { try { return format.parse(date.substring(0, date.length() - (1 + i))); } catch (Exception ignore) { } try { return format.parse(date.substring(i)); } catch (Exception ignore) { } } } return null; } public static Date cleanDate(Json page, Blob blob) { Json infos = null; try { infos = page.getJson("data").getJson("information"); } catch (Exception ignore) { } String dateStr = null; if (infos != null && infos.get("date") != null) { dateStr = infos.getString("date"); if (dateStr == null) { Json date = infos.getJson("date"); if (date != null) { String jsonDate = date.toString(true); if (date.getString("isostring") != null) { dateStr = date.getString("isostring"); } else if (date.containsKey("isodate")) { dateStr = date.getString("isodate"); } else if (date.getJson("taken on") != null) { dateStr = date.getJson("taken on").getString("1"); } else if (date.getJson("taken in") != null) { dateStr = date.getJson("taken in").getString("1"); } else if (date.getString("taken on") != null) { dateStr = date.getString("taken on"); } else if (date.getString("taken o") != null) { dateStr = date.getString("taken o").replace("n", ""); } else if (date.getString("taken in") != null) { dateStr = date.getString("taken in"); } else if (date.getString("original upload date") != null) { dateStr = date.getString("original upload date"); } else if (date.getString("original upload date") != null) { dateStr = date.getString("original upload date"); } else if (date.getString("dtz") != null) { dateStr = date.getString("dtz"); } else if (date.getJson("exif date") != null) { dateStr = date.getJson("exif date").getString("1"); } else if (date.getJson("according to exif data") != null) { dateStr = date.getJson("according to exif data").getString("1"); } else if (date.getJson("accordingtoexif") != null) { dateStr = date.getJson("accordingtoexif").getString("1"); } else if (date.getString("exif date") != null) { dateStr = date.getString("exif date"); } else if (date.getString("according to exif data") != null) { dateStr = date.getString("according to exif data"); } else if (date.getString("accordingtoexif") != null) { dateStr = date.getString("accordingtoexif"); } else if (date.getString("according to exif") != null) { dateStr = date.getString("according to exif"); } else if (date.getList("other date", String.class) != null) { dateStr = date.getList("other date", String.class).get(1); } else if (date.getString("other date") != null) { dateStr = date.getString("other date"); } else if (date.getString("circa") != null) { dateStr = date.getString("circa"); } else if (date.getString("upload date") != null) { dateStr = date.getString("upload date"); } else if (date.getList("takenestormiz", String.class) != null) { dateStr = date.getList("takenestormiz", String.class).get(0); } if (dateStr != null && dateStr.equals("?")) { dateStr = null; } } } } if (dateStr == null) { dateStr = parseTextDate(page.getText("text", "")); } if (dateStr != null) { Date date = parse(dateStr); if (date != null) { return date; } else { System.out.print("."); } } else { System.out.print("."); } if (blob != null) { try { Metadata metadata = ImageMetadataReader.readMetadata(blob.file); ExifSubIFDDirectory directory = metadata.getFirstDirectoryOfType(ExifSubIFDDirectory.class); Date date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME_ORIGINAL); if (date == null) { date = directory.getDate(ExifSubIFDDirectory.TAG_DATETIME); } return date; } catch (Exception ignore) { } } return null; } private static String parseTextDate(String text) { for (Matcher matcher : List.of(Pattern.compile("date ?+= ?+([a-z{|]+)([0-9 \\-:]+)", Pattern.CASE_INSENSITIVE).matcher(text))) { if (matcher.find()) { return matcher.group(1); } } return null; } }