From d834df1e0d4805d531947b60e830d26739c96b2f Mon Sep 17 00:00:00 2001 From: Lilly Rosaline Date: Sat, 2 Jul 2022 14:39:37 -0500 Subject: [PATCH] partial implementation of hashing? --- Cargo.lock | 50 +++++++++++++++++++++++++++++++++- Cargo.toml | 6 +++-- src/main.rs | 78 +++++++++++++++++++++++++++++++++++++++++++++++------ 3 files changed, 123 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f0add6f..d0d7320 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.20.0-alpha.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "149ea5dc24cb11513350770afebba32b68e3d2e356f9221351a2a1ee89112a82" + [[package]] name = "bitflags" version = "1.3.2" @@ -139,6 +145,15 @@ dependencies = [ "syn", ] +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + [[package]] name = "dtoa" version = "0.4.8" @@ -179,6 +194,16 @@ dependencies = [ "byteorder", ] +[[package]] +name = "generic-array" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -341,6 +366,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "meowhash" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31cc124c1dc48285daa66a3bd5e9f083fab93cb1fddafd661fe1862a883089b" +dependencies = [ + "digest", +] + [[package]] name = "new_debug_unreachable" version = "1.0.4" @@ -852,13 +886,21 @@ dependencies = [ "serde", ] +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + [[package]] name = "typeset" -version = "0.1.0" +version = "0.1.1" dependencies = [ + "base64", "chrono", "html5ever 0.26.0", "markdown", + "meowhash", "nipper", "rayon", "regex", @@ -878,6 +920,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.9.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index fc7ae06..93ae405 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "typeset" -version = "0.1.0" +version = "0.1.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -16,4 +16,6 @@ markdown = "0.3" rayon = "1.5" nipper = "0.1.9" html5ever = "0.26.0" -chrono = "0.4.19" \ No newline at end of file +chrono = "0.4.19" +meowhash = "0.3.0" +base64 = "0.20.0-alpha.1" \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c5dd929..52302b7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,11 @@ +use std::borrow::Borrow; +use std::collections::HashMap; use chrono::NaiveDateTime; use html5ever::tendril::SliceExt; use nipper::{Document, Selection}; use rayon::prelude::*; use regex::Regex; -use serde::Deserialize; +use serde::{Serialize, Deserialize}; use std::env::current_dir; use std::fs; @@ -11,6 +13,8 @@ use std::fs::{read_to_string, File}; use std::io::ErrorKind::NotFound; use std::io::{Error, Write}; use std::path::PathBuf; +use html5ever::tendril::fmt::Slice; +use meowhash::{MeowHash, MeowHasher}; extern crate markdown; @@ -18,18 +22,38 @@ fn main() -> Result<(), Error> { let schema_version = 1; println!("Looking for a typeset.toml file..."); let cwd = current_dir()?; + let mut config_update = false; let config_file = cwd.join("typeset.toml"); if !config_file.exists() { eprintln!("typeset.toml not found!"); return Err(Error::from(NotFound)); } - let config: Config = toml::from_str(&*read_to_string(config_file.clone())?)?; + let config_body = read_to_string(config_file.clone())?; + let config_hash = MeowHasher::hash(config_body.as_bytes()).into_bytes(); + let config: Config = toml::from_str(&*config_body)?; + if schema_version != config.schema_version { panic!("Schema version does not match this version of Typeset! Please update Typeset.") } let template_file = config_file.with_file_name(config.template); let index_file = config_file.with_file_name(config.index); + let hash_file = config_file.with_file_name("hashes.toml"); + let out_path = cwd.join(PathBuf::from(config.output)); + if !hash_file.exists() { + println!("Creating a new blog, since no hashes have been calculated"); + config_update = true; + } + let hash_body = read_to_string(hash_file.clone()).unwrap_or(String::new()); + let hash_body: HashFile = toml::from_str(&*hash_body).unwrap_or(HashFile{hashes: HashMap::new()}); + let in_hashes: HashMap = hash_body.hashes.iter().map(|(k, v)| { + let mut array = [0u8; 128]; + let decoded = base64::decode(v).unwrap(); + let slice = decoded.as_slice(); + array = <[u8; 128]>::try_from(slice).unwrap(); + (k.clone(), array) + }).collect::>(); + let mut out_hashes: HashMap = HashMap::new(); if !template_file.exists() { eprintln!("template does not exist!"); return Err(Error::from(NotFound)); @@ -37,6 +61,24 @@ fn main() -> Result<(), Error> { if !index_file.exists() { println!("Index not found, assuming it is not present") } + + if in_hashes.get("config").unwrap_or(&[0; 128]) != &config_hash { + config_update = true; + out_hashes.insert("config".to_string(), config_hash); + } + let template = read_to_string(template_file)?; + let template_hash = MeowHasher::hash(template.as_bytes()).into_bytes(); + if in_hashes.get("template").unwrap_or(&[0; 128]) != &template_hash { + config_update = true; + out_hashes.insert("template".to_string(), template_hash); + } + let index_body = read_to_string(index_file)?; + let index_hash = MeowHasher::hash(index_body.as_bytes()).into_bytes(); + if in_hashes.get("index").unwrap_or(&[0; 128]) != &index_hash { + config_update = true; + out_hashes.insert("index".to_string(), index_hash); + } + let index = Document::from(&*index_body); println!("Read configuration successfully! Converting markdown files to html..."); let mut posts: Vec = vec![]; @@ -50,22 +92,38 @@ fn main() -> Result<(), Error> { .map(|file| file.unwrap().path()) .collect(); println!("Found {} files matching the pattern", input_files.len()); - posts.par_extend(input_files.par_iter().map(|file| { - let content = read_to_string(file).unwrap(); + for file in input_files { + let content = read_to_string(&file).unwrap(); + let hash = MeowHasher::hash(content.as_bytes()).into_bytes(); let content = content.splitn(2, "\n\n").collect::>(); let settings: PostSettings = toml::from_str(content[0]).unwrap(); let body = markdown::to_html(content[1]); - Post { + let post = Post { id: file.file_stem().unwrap().to_str().unwrap().to_string(), body, title: settings.title, published: NaiveDateTime::parse_from_str(&*settings.published, &*config.time_format) .unwrap(), + }; + if in_hashes.get(&*post.id).unwrap_or(&[0; 128]) != &hash || config_update { + out_hashes.insert(post.id.clone(), hash); + posts.push(post); } - })); + }; + let mut write_hashes = in_hashes.clone(); + write_hashes.extend(out_hashes.iter().map(|(k, v)| {(k.clone(), v.clone())})); + println!("Writing hash table..."); + let toml = toml::to_string(&HashFile { hashes: write_hashes.iter().map(|(k, v)| { + (k.clone(), base64::encode(v)) + }).collect::>()}).unwrap(); + let mut file = File::create(hash_file)?; + file.write_all(toml.as_bytes())?; + if out_hashes.len() == 0 { + println!("Nothing to do"); + return Ok(()); + } println!("Successfully converted to HTML! Creating documents..."); - let template = read_to_string(template_file)?; - let index = Document::from(read_to_string(index_file)?.as_str()); + for post in &posts { let template_html = Document::from(template.as_str()); let output_html = template_html; @@ -155,6 +213,10 @@ pub struct Config { pub output: String, pub time_format: String, } +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct HashFile { + pub hashes: HashMap +} fn nth_parent(selection: Selection, n: usize) -> Selection { let mut sel: Selection = selection; for _ in 0..n {