partial implementation of hashing?

This commit is contained in:
Lilly Rosaline 2022-07-02 14:39:37 -05:00
parent 7bbc6b8d5d
commit d834df1e0d
3 changed files with 123 additions and 11 deletions

50
Cargo.lock generated
View file

@ -17,6 +17,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "base64"
version = "0.20.0-alpha.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "149ea5dc24cb11513350770afebba32b68e3d2e356f9221351a2a1ee89112a82"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -139,6 +145,15 @@ dependencies = [
"syn",
]
[[package]]
name = "digest"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066"
dependencies = [
"generic-array",
]
[[package]]
name = "dtoa"
version = "0.4.8"
@ -179,6 +194,16 @@ dependencies = [
"byteorder",
]
[[package]]
name = "generic-array"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.1.16"
@ -341,6 +366,15 @@ dependencies = [
"autocfg",
]
[[package]]
name = "meowhash"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31cc124c1dc48285daa66a3bd5e9f083fab93cb1fddafd661fe1862a883089b"
dependencies = [
"digest",
]
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
@ -852,13 +886,21 @@ dependencies = [
"serde",
]
[[package]]
name = "typenum"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987"
[[package]]
name = "typeset"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"base64",
"chrono",
"html5ever 0.26.0",
"markdown",
"meowhash",
"nipper",
"rayon",
"regex",
@ -878,6 +920,12 @@ version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.9.0+wasi-snapshot-preview1"

View file

@ -1,6 +1,6 @@
[package]
name = "typeset"
version = "0.1.0"
version = "0.1.1"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -16,4 +16,6 @@ markdown = "0.3"
rayon = "1.5"
nipper = "0.1.9"
html5ever = "0.26.0"
chrono = "0.4.19"
chrono = "0.4.19"
meowhash = "0.3.0"
base64 = "0.20.0-alpha.1"

View file

@ -1,9 +1,11 @@
use std::borrow::Borrow;
use std::collections::HashMap;
use chrono::NaiveDateTime;
use html5ever::tendril::SliceExt;
use nipper::{Document, Selection};
use rayon::prelude::*;
use regex::Regex;
use serde::Deserialize;
use serde::{Serialize, Deserialize};
use std::env::current_dir;
use std::fs;
@ -11,6 +13,8 @@ use std::fs::{read_to_string, File};
use std::io::ErrorKind::NotFound;
use std::io::{Error, Write};
use std::path::PathBuf;
use html5ever::tendril::fmt::Slice;
use meowhash::{MeowHash, MeowHasher};
extern crate markdown;
@ -18,18 +22,38 @@ fn main() -> Result<(), Error> {
let schema_version = 1;
println!("Looking for a typeset.toml file...");
let cwd = current_dir()?;
let mut config_update = false;
let config_file = cwd.join("typeset.toml");
if !config_file.exists() {
eprintln!("typeset.toml not found!");
return Err(Error::from(NotFound));
}
let config: Config = toml::from_str(&*read_to_string(config_file.clone())?)?;
let config_body = read_to_string(config_file.clone())?;
let config_hash = MeowHasher::hash(config_body.as_bytes()).into_bytes();
let config: Config = toml::from_str(&*config_body)?;
if schema_version != config.schema_version {
panic!("Schema version does not match this version of Typeset! Please update Typeset.")
}
let template_file = config_file.with_file_name(config.template);
let index_file = config_file.with_file_name(config.index);
let hash_file = config_file.with_file_name("hashes.toml");
let out_path = cwd.join(PathBuf::from(config.output));
if !hash_file.exists() {
println!("Creating a new blog, since no hashes have been calculated");
config_update = true;
}
let hash_body = read_to_string(hash_file.clone()).unwrap_or(String::new());
let hash_body: HashFile = toml::from_str(&*hash_body).unwrap_or(HashFile{hashes: HashMap::new()});
let in_hashes: HashMap<String, [u8; 128]> = hash_body.hashes.iter().map(|(k, v)| {
let mut array = [0u8; 128];
let decoded = base64::decode(v).unwrap();
let slice = decoded.as_slice();
array = <[u8; 128]>::try_from(slice).unwrap();
(k.clone(), array)
}).collect::<HashMap<String, [u8; 128]>>();
let mut out_hashes: HashMap<String, [u8; 128]> = HashMap::new();
if !template_file.exists() {
eprintln!("template does not exist!");
return Err(Error::from(NotFound));
@ -37,6 +61,24 @@ fn main() -> Result<(), Error> {
if !index_file.exists() {
println!("Index not found, assuming it is not present")
}
if in_hashes.get("config").unwrap_or(&[0; 128]) != &config_hash {
config_update = true;
out_hashes.insert("config".to_string(), config_hash);
}
let template = read_to_string(template_file)?;
let template_hash = MeowHasher::hash(template.as_bytes()).into_bytes();
if in_hashes.get("template").unwrap_or(&[0; 128]) != &template_hash {
config_update = true;
out_hashes.insert("template".to_string(), template_hash);
}
let index_body = read_to_string(index_file)?;
let index_hash = MeowHasher::hash(index_body.as_bytes()).into_bytes();
if in_hashes.get("index").unwrap_or(&[0; 128]) != &index_hash {
config_update = true;
out_hashes.insert("index".to_string(), index_hash);
}
let index = Document::from(&*index_body);
println!("Read configuration successfully! Converting markdown files to html...");
let mut posts: Vec<Post> = vec![];
@ -50,22 +92,38 @@ fn main() -> Result<(), Error> {
.map(|file| file.unwrap().path())
.collect();
println!("Found {} files matching the pattern", input_files.len());
posts.par_extend(input_files.par_iter().map(|file| {
let content = read_to_string(file).unwrap();
for file in input_files {
let content = read_to_string(&file).unwrap();
let hash = MeowHasher::hash(content.as_bytes()).into_bytes();
let content = content.splitn(2, "\n\n").collect::<Vec<&str>>();
let settings: PostSettings = toml::from_str(content[0]).unwrap();
let body = markdown::to_html(content[1]);
Post {
let post = Post {
id: file.file_stem().unwrap().to_str().unwrap().to_string(),
body,
title: settings.title,
published: NaiveDateTime::parse_from_str(&*settings.published, &*config.time_format)
.unwrap(),
};
if in_hashes.get(&*post.id).unwrap_or(&[0; 128]) != &hash || config_update {
out_hashes.insert(post.id.clone(), hash);
posts.push(post);
}
}));
};
let mut write_hashes = in_hashes.clone();
write_hashes.extend(out_hashes.iter().map(|(k, v)| {(k.clone(), v.clone())}));
println!("Writing hash table...");
let toml = toml::to_string(&HashFile { hashes: write_hashes.iter().map(|(k, v)| {
(k.clone(), base64::encode(v))
}).collect::<HashMap<String, String>>()}).unwrap();
let mut file = File::create(hash_file)?;
file.write_all(toml.as_bytes())?;
if out_hashes.len() == 0 {
println!("Nothing to do");
return Ok(());
}
println!("Successfully converted to HTML! Creating documents...");
let template = read_to_string(template_file)?;
let index = Document::from(read_to_string(index_file)?.as_str());
for post in &posts {
let template_html = Document::from(template.as_str());
let output_html = template_html;
@ -155,6 +213,10 @@ pub struct Config {
pub output: String,
pub time_format: String,
}
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct HashFile {
pub hashes: HashMap<String, String>
}
fn nth_parent(selection: Selection, n: usize) -> Selection {
let mut sel: Selection = selection;
for _ in 0..n {