partial implementation of hashing?

This commit is contained in:
Lilly Rosaline 2022-07-02 14:39:37 -05:00
parent 7bbc6b8d5d
commit d834df1e0d
3 changed files with 123 additions and 11 deletions

50
Cargo.lock generated
View File

@ -17,6 +17,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base64"
version = "0.20.0-alpha.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "149ea5dc24cb11513350770afebba32b68e3d2e356f9221351a2a1ee89112a82"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "1.3.2" version = "1.3.2"
@ -139,6 +145,15 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
]
[[package]] [[package]]
name = "dtoa" name = "dtoa"
version = "0.4.8" version = "0.4.8"
@ -179,6 +194,16 @@ dependencies = [
"byteorder", "byteorder",
] ]
[[package]]
name = "generic-array"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
dependencies = [
"typenum",
"version_check",
]
[[package]] [[package]]
name = "getrandom" name = "getrandom"
version = "0.1.16" version = "0.1.16"
@ -341,6 +366,15 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "meowhash"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31cc124c1dc48285daa66a3bd5e9f083fab93cb1fddafd661fe1862a883089b"
dependencies = [
"digest",
]
[[package]] [[package]]
name = "new_debug_unreachable" name = "new_debug_unreachable"
version = "1.0.4" version = "1.0.4"
@ -852,13 +886,21 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]] [[package]]
name = "typeset" name = "typeset"
version = "0.1.0" version = "0.1.1"
dependencies = [ dependencies = [
"base64",
"chrono", "chrono",
"html5ever 0.26.0", "html5ever 0.26.0",
"markdown", "markdown",
"meowhash",
"nipper", "nipper",
"rayon", "rayon",
"regex", "regex",
@ -878,6 +920,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]] [[package]]
name = "wasi" name = "wasi"
version = "0.9.0+wasi-snapshot-preview1" version = "0.9.0+wasi-snapshot-preview1"

View File

@ -1,6 +1,6 @@
[package] [package]
name = "typeset" name = "typeset"
version = "0.1.0" version = "0.1.1"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -16,4 +16,6 @@ markdown = "0.3"
rayon = "1.5" rayon = "1.5"
nipper = "0.1.9" nipper = "0.1.9"
html5ever = "0.26.0" html5ever = "0.26.0"
chrono = "0.4.19" chrono = "0.4.19"
meowhash = "0.3.0"
base64 = "0.20.0-alpha.1"

View File

@ -1,9 +1,11 @@
use std::borrow::Borrow;
use std::collections::HashMap;
use chrono::NaiveDateTime; use chrono::NaiveDateTime;
use html5ever::tendril::SliceExt; use html5ever::tendril::SliceExt;
use nipper::{Document, Selection}; use nipper::{Document, Selection};
use rayon::prelude::*; use rayon::prelude::*;
use regex::Regex; use regex::Regex;
use serde::Deserialize; use serde::{Serialize, Deserialize};
use std::env::current_dir; use std::env::current_dir;
use std::fs; use std::fs;
@ -11,6 +13,8 @@ use std::fs::{read_to_string, File};
use std::io::ErrorKind::NotFound; use std::io::ErrorKind::NotFound;
use std::io::{Error, Write}; use std::io::{Error, Write};
use std::path::PathBuf; use std::path::PathBuf;
use html5ever::tendril::fmt::Slice;
use meowhash::{MeowHash, MeowHasher};
extern crate markdown; extern crate markdown;
@ -18,18 +22,38 @@ fn main() -> Result<(), Error> {
let schema_version = 1; let schema_version = 1;
println!("Looking for a typeset.toml file..."); println!("Looking for a typeset.toml file...");
let cwd = current_dir()?; let cwd = current_dir()?;
let mut config_update = false;
let config_file = cwd.join("typeset.toml"); let config_file = cwd.join("typeset.toml");
if !config_file.exists() { if !config_file.exists() {
eprintln!("typeset.toml not found!"); eprintln!("typeset.toml not found!");
return Err(Error::from(NotFound)); return Err(Error::from(NotFound));
} }
let config: Config = toml::from_str(&*read_to_string(config_file.clone())?)?; let config_body = read_to_string(config_file.clone())?;
let config_hash = MeowHasher::hash(config_body.as_bytes()).into_bytes();
let config: Config = toml::from_str(&*config_body)?;
if schema_version != config.schema_version { if schema_version != config.schema_version {
panic!("Schema version does not match this version of Typeset! Please update Typeset.") panic!("Schema version does not match this version of Typeset! Please update Typeset.")
} }
let template_file = config_file.with_file_name(config.template); let template_file = config_file.with_file_name(config.template);
let index_file = config_file.with_file_name(config.index); let index_file = config_file.with_file_name(config.index);
let hash_file = config_file.with_file_name("hashes.toml");
let out_path = cwd.join(PathBuf::from(config.output)); let out_path = cwd.join(PathBuf::from(config.output));
if !hash_file.exists() {
println!("Creating a new blog, since no hashes have been calculated");
config_update = true;
}
let hash_body = read_to_string(hash_file.clone()).unwrap_or(String::new());
let hash_body: HashFile = toml::from_str(&*hash_body).unwrap_or(HashFile{hashes: HashMap::new()});
let in_hashes: HashMap<String, [u8; 128]> = hash_body.hashes.iter().map(|(k, v)| {
let mut array = [0u8; 128];
let decoded = base64::decode(v).unwrap();
let slice = decoded.as_slice();
array = <[u8; 128]>::try_from(slice).unwrap();
(k.clone(), array)
}).collect::<HashMap<String, [u8; 128]>>();
let mut out_hashes: HashMap<String, [u8; 128]> = HashMap::new();
if !template_file.exists() { if !template_file.exists() {
eprintln!("template does not exist!"); eprintln!("template does not exist!");
return Err(Error::from(NotFound)); return Err(Error::from(NotFound));
@ -37,6 +61,24 @@ fn main() -> Result<(), Error> {
if !index_file.exists() { if !index_file.exists() {
println!("Index not found, assuming it is not present") println!("Index not found, assuming it is not present")
} }
if in_hashes.get("config").unwrap_or(&[0; 128]) != &config_hash {
config_update = true;
out_hashes.insert("config".to_string(), config_hash);
}
let template = read_to_string(template_file)?;
let template_hash = MeowHasher::hash(template.as_bytes()).into_bytes();
if in_hashes.get("template").unwrap_or(&[0; 128]) != &template_hash {
config_update = true;
out_hashes.insert("template".to_string(), template_hash);
}
let index_body = read_to_string(index_file)?;
let index_hash = MeowHasher::hash(index_body.as_bytes()).into_bytes();
if in_hashes.get("index").unwrap_or(&[0; 128]) != &index_hash {
config_update = true;
out_hashes.insert("index".to_string(), index_hash);
}
let index = Document::from(&*index_body);
println!("Read configuration successfully! Converting markdown files to html..."); println!("Read configuration successfully! Converting markdown files to html...");
let mut posts: Vec<Post> = vec![]; let mut posts: Vec<Post> = vec![];
@ -50,22 +92,38 @@ fn main() -> Result<(), Error> {
.map(|file| file.unwrap().path()) .map(|file| file.unwrap().path())
.collect(); .collect();
println!("Found {} files matching the pattern", input_files.len()); println!("Found {} files matching the pattern", input_files.len());
posts.par_extend(input_files.par_iter().map(|file| { for file in input_files {
let content = read_to_string(file).unwrap(); let content = read_to_string(&file).unwrap();
let hash = MeowHasher::hash(content.as_bytes()).into_bytes();
let content = content.splitn(2, "\n\n").collect::<Vec<&str>>(); let content = content.splitn(2, "\n\n").collect::<Vec<&str>>();
let settings: PostSettings = toml::from_str(content[0]).unwrap(); let settings: PostSettings = toml::from_str(content[0]).unwrap();
let body = markdown::to_html(content[1]); let body = markdown::to_html(content[1]);
Post { let post = Post {
id: file.file_stem().unwrap().to_str().unwrap().to_string(), id: file.file_stem().unwrap().to_str().unwrap().to_string(),
body, body,
title: settings.title, title: settings.title,
published: NaiveDateTime::parse_from_str(&*settings.published, &*config.time_format) published: NaiveDateTime::parse_from_str(&*settings.published, &*config.time_format)
.unwrap(), .unwrap(),
};
if in_hashes.get(&*post.id).unwrap_or(&[0; 128]) != &hash || config_update {
out_hashes.insert(post.id.clone(), hash);
posts.push(post);
} }
})); };
let mut write_hashes = in_hashes.clone();
write_hashes.extend(out_hashes.iter().map(|(k, v)| {(k.clone(), v.clone())}));
println!("Writing hash table...");
let toml = toml::to_string(&HashFile { hashes: write_hashes.iter().map(|(k, v)| {
(k.clone(), base64::encode(v))
}).collect::<HashMap<String, String>>()}).unwrap();
let mut file = File::create(hash_file)?;
file.write_all(toml.as_bytes())?;
if out_hashes.len() == 0 {
println!("Nothing to do");
return Ok(());
}
println!("Successfully converted to HTML! Creating documents..."); println!("Successfully converted to HTML! Creating documents...");
let template = read_to_string(template_file)?;
let index = Document::from(read_to_string(index_file)?.as_str());
for post in &posts { for post in &posts {
let template_html = Document::from(template.as_str()); let template_html = Document::from(template.as_str());
let output_html = template_html; let output_html = template_html;
@ -155,6 +213,10 @@ pub struct Config {
pub output: String, pub output: String,
pub time_format: String, pub time_format: String,
} }
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct HashFile {
pub hashes: HashMap<String, String>
}
fn nth_parent(selection: Selection, n: usize) -> Selection { fn nth_parent(selection: Selection, n: usize) -> Selection {
let mut sel: Selection = selection; let mut sel: Selection = selection;
for _ in 0..n { for _ in 0..n {