From 78c8e56e6650de42518775943cbeacbde49f2fc6 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Wed, 16 Oct 2024 11:39:12 +1100 Subject: [PATCH] Update tool to work with macros and reference types --- Cargo.lock | 54 ++++++++++++++++------- Cargo.toml | 4 +- src/main.rs | 120 +++++++++++++++++++++++++++++++++++++++++----------- 3 files changed, 137 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0199a20..4cc167f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,12 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.7" +version = "1.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" +dependencies = [ + "shlex", +] [[package]] name = "memchr" @@ -31,9 +34,9 @@ checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" [[package]] name = "regex" -version = "1.10.5" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" dependencies = [ "aho-corasick", "memchr", @@ -43,9 +46,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.7" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" dependencies = [ "aho-corasick", "memchr", @@ -54,9 +57,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rust-organizer" @@ -69,21 +72,42 @@ dependencies = [ ] [[package]] -name = "tree-sitter" -version = "0.22.6" +name = "shlex" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "tree-sitter" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9871f16d6cf5c4757dcf30d5d2172a2df6987c510c017bbb7abfb7f9aa24d06" dependencies = [ "cc", "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", ] [[package]] -name = "tree-sitter-rust" -version = "0.21.2" +name = "tree-sitter-language" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" +checksum = "e8ddffe35a0e5eeeadf13ff7350af564c6e73993a24db62caee1822b185c2600" + +[[package]] +name = "tree-sitter-rust" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cffbbcb780348fbae8395742ae5b34c1fd794e4085d43aac9f259387f9a84dc8" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] diff --git a/Cargo.toml b/Cargo.toml index 5bcaf3a..c992cf0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,8 +7,8 @@ authors = ["Jip J. Dekker "] [dependencies] pico-args = "0.5.0" -tree-sitter = "0.22.6" -tree-sitter-rust = "0.21.2" +tree-sitter = "0.24.3" +tree-sitter-rust = "0.23.0" [build-dependencies] cc = "*" diff --git a/src/main.rs b/src/main.rs index 13e3817..dbb70ea 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,10 @@ struct Cli { #[derive(Debug, Clone, PartialEq, Eq, Hash)] enum Item<'a> { InnerDoc(Cow<'a, str>), + Macro { + name: &'a str, + content: Cow<'a, str>, + }, ModDecl { name: &'a str, content: Cow<'a, str>, @@ -48,6 +52,7 @@ enum Item<'a> { trt: Option<&'a str>, content: SortableContent<'a>, }, + MacroInvocation(Cow<'a, str>), Mod { name: &'a str, content: SortableContent<'a>, @@ -70,6 +75,7 @@ struct SortableContent<'a> { struct TypeIdent<'a> { name: &'a str, generics: Option<&'a str>, + reference_type: Option<&'a str>, } fn main() -> ExitCode { @@ -100,7 +106,7 @@ impl Cli { fn run(&self) -> Result { let mut parser = Parser::new(); parser - .set_language(&tree_sitter_rust::language()) + .set_language(&tree_sitter_rust::LANGUAGE.into()) .expect("Error loading Rust grammar"); let text = std::fs::read_to_string(&self.path) @@ -172,16 +178,36 @@ impl TryFrom for Cli { } impl<'a> Item<'a> { + fn append_content(&mut self, text: &str) { + match self { + Item::Macro { content, .. } + | Item::ModDecl { content, .. } + | Item::Const { content, .. } + | Item::Type { content, .. } + | Item::Func { content, .. } + | Item::InnerDoc(content) + | Item::Use(content) + | Item::MacroInvocation(content) => { + *content = Cow::Owned(format!("{}{}", content, text)); + } + Item::Impl { .. } | Item::Mod { .. } => { + // Cannot add content to these items + } + } + } + fn item_order(&self) -> u8 { match self { Item::InnerDoc(_) => 0, - Item::ModDecl { .. } => 1, - Item::Use(_) => 2, - Item::Const { .. } => 3, - Item::Type { .. } => 4, - Item::Func { .. } => 5, - Item::Impl { .. } => 6, - Item::Mod { .. } => 7, + Item::Macro { .. } => 1, + Item::ModDecl { .. } => 2, + Item::Use(_) => 3, + Item::Const { .. } => 4, + Item::Type { .. } => 5, + Item::Func { .. } => 6, + Item::Impl { .. } => 7, + Item::MacroInvocation(_) => 8, + Item::Mod { .. } => 9, } } @@ -211,7 +237,7 @@ impl<'a> Item<'a> { let name = get_field_str("name").unwrap(); Some(Self::Const { name, content }) } - "enum_item" | "struct_item" => { + "enum_item" | "struct_item" | "trait_item" | "type_item" => { let name = get_field_str("name").unwrap(); Some(Self::Type { name, content }) } @@ -225,6 +251,11 @@ impl<'a> Item<'a> { let content = SortableContent::within_node(text, node, Some(start), "body"); Some(Self::Impl { name, trt, content }) } + "macro_definition" => { + let name = get_field_str("name").unwrap(); + Some(Self::Macro { name, content }) + } + "macro_invocation" => Some(Self::MacroInvocation(content)), "mod_item" => { let name = get_field_str("name").unwrap(); if node.child_by_field_name("body").is_some() { @@ -235,7 +266,11 @@ impl<'a> Item<'a> { } } "use_declaration" => Some(Self::Use(content)), - _ => panic!("unexpected node kind: {}", node.kind()), + _ => panic!( + "unexpected node kind: {}\ncontent: {}", + node.kind(), + content + ), } } } @@ -244,6 +279,8 @@ impl Display for Item<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Item::InnerDoc(content) + | Item::Macro { content, .. } + | Item::MacroInvocation(content) | Item::ModDecl { content, .. } | Item::Use(content) | Item::Const { content, .. } @@ -266,10 +303,14 @@ impl Ord for Item<'_> { match (self, other) { (Item::InnerDoc(_), Item::InnerDoc(_)) => Ordering::Equal, (Item::Const { name: a, .. }, Item::Const { name: b, .. }) + | (Item::Macro { name: a, .. }, Item::Macro { name: b, .. }) | (Item::Mod { name: a, .. }, Item::Mod { name: b, .. }) + | (Item::ModDecl { name: a, .. }, Item::ModDecl { name: b, .. }) | (Item::Type { name: a, .. }, Item::Type { name: b, .. }) | (Item::Func { name: a, .. }, Item::Func { name: b, .. }) => a.cmp(b), - (Item::Use(_), Item::Use(_)) => Ordering::Equal, + (Item::Use(_), Item::Use(_)) | (Item::MacroInvocation(_), Item::MacroInvocation(_)) => { + Ordering::Equal + } ( Item::Impl { name: a, trt: t_a, .. @@ -282,7 +323,9 @@ impl Ord for Item<'_> { if name_order == Ordering::Equal { let trt_order = t_a.unwrap_or("").cmp(t_b.unwrap_or("")); if trt_order == Ordering::Equal { - a.generics.unwrap_or("").cmp(&b.generics.unwrap_or("")) + let a_parts = (a.generics.unwrap_or(""), a.reference_type.unwrap_or("")); + let b_parts = (b.generics.unwrap_or(""), b.reference_type.unwrap_or("")); + a_parts.cmp(&b_parts) } else { trt_order } @@ -290,7 +333,10 @@ impl Ord for Item<'_> { name_order } } - _ => unreachable!(), + _ => { + // eprintln!("{} -- {}", self, other); + unreachable!(); + } } } } @@ -307,7 +353,7 @@ impl<'a> Module<'a> { let mut cursor = root.walk(); cursor.goto_first_child(); - let mut items = Vec::new(); + let mut items: Vec<(bool, Item)> = Vec::new(); let mut start = None; let mut last = None; if cursor.node().kind() == "{" { @@ -315,11 +361,26 @@ impl<'a> Module<'a> { cursor.goto_next_sibling(); } loop { + if cursor.node().kind() == "}" { + assert!(!cursor.goto_next_sibling()); + break; + } let node = cursor.node(); - // println!("{} : {}\n\n", node.kind(), node.to_sexp()); - if let Some(item) = Item::maybe_item(&text, node, start) { - let inbetween = - &text[last.unwrap_or(root.start_byte())..start.unwrap_or(node.start_byte())]; + // eprintln!("{} : {}\n\n", node.kind(), node.to_sexp()); + let inbetween = + &text[last.unwrap_or(root.start_byte())..start.unwrap_or(node.start_byte())]; + if node.kind() == "empty_statement" { + if let Some((_, it)) = items.last_mut() { + it.append_content(";"); + } + debug_assert!( + inbetween.trim().is_empty(), + "unexpected skipped content: {:?}", + inbetween + ); + start = None; + last = Some(node.end_byte()); + } else if let Some(item) = Item::maybe_item(&text, node, start) { debug_assert!( inbetween.trim().is_empty(), "unexpected skipped content: {:?}", @@ -335,10 +396,6 @@ impl<'a> Module<'a> { if !cursor.goto_next_sibling() { break; } - if cursor.node().kind() == "}" { - assert!(!cursor.goto_next_sibling()); - break; - } } Self { items } @@ -356,7 +413,8 @@ impl<'a> Module<'a> { } } for window in self.items.windows(2) { - if window[0] > window[1] { + if window[0].1 > window[1].1 { + // eprintln!("{:?} {:?}", window[0].1, window[1].1); if print_diff { eprintln!( "Expected \n\"\"\"\n{}\n\"\"\"\n before \n\"\"\"\n{}\n\"\"\"", @@ -448,12 +506,26 @@ impl<'a> TypeIdent<'a> { "type_identifier" => Self { name: node.utf8_text(text.as_bytes()).unwrap(), generics: None, + reference_type: None, }, "generic_type" => { let name = get_field_str("type").unwrap(); let generics = get_field_str("type_arguments"); debug_assert!(generics.is_some()); - Self { name, generics } + Self { + name, + generics, + reference_type: None, + } + } + "reference_type" => { + let inner = node.child_by_field_name("type").unwrap(); + let mut ty = TypeIdent::from_node(text, inner); + let reference_str = + std::str::from_utf8(&text.as_bytes()[node.start_byte()..inner.start_byte()]) + .unwrap(); + ty.reference_type = Some(reference_str); + ty } _ => panic!("invalid type identifier node: {}", node.kind()), }