From f2f0eb72b65a438a10661e2212a1ddb952dfe985 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Thu, 1 Aug 2024 16:04:15 +1000 Subject: [PATCH] Implement a small tool that orders items in Rust files --- .gitignore | 1 + Cargo.lock | 89 ++++++++++++ Cargo.toml | 14 ++ README.md | 11 ++ rustfmt.toml | 3 + src/main.rs | 373 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 491 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 rustfmt.toml create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0199a20 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,89 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "cc" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "pico-args" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315" + +[[package]] +name = "regex" +version = "1.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + +[[package]] +name = "rust-organizer" +version = "0.1.0" +dependencies = [ + "cc", + "pico-args", + "tree-sitter", + "tree-sitter-rust", +] + +[[package]] +name = "tree-sitter" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +dependencies = [ + "cc", + "regex", +] + +[[package]] +name = "tree-sitter-rust" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" +dependencies = [ + "cc", + "tree-sitter", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5bcaf3a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "rust-organizer" +version = "0.1.0" +edition = "2021" +description = "A tool to organize Rust files in a opinionated way." +authors = ["Jip J. Dekker "] + +[dependencies] +pico-args = "0.5.0" +tree-sitter = "0.22.6" +tree-sitter-rust = "0.21.2" + +[build-dependencies] +cc = "*" diff --git a/README.md b/README.md new file mode 100644 index 0000000..21ff8b4 --- /dev/null +++ b/README.md @@ -0,0 +1,11 @@ +# Rust Organizer + +An opinionated tool that organizes a Rust source file in the following order: + +1. `mod ` +3. `use` +4. sorted `const`/`static` +5. sorted `struct`/`enum`/`union`/`type`/`trait` +6. `fn` +7. `impl` +8. `mod { ... }` diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..9df34cd --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +hard_tabs = true +imports_granularity = "Crate" +group_imports = "StdExternalCrate" diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..ab92051 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,373 @@ +use std::{cmp::Ordering, convert::Infallible, ops::Range, path::PathBuf, process::ExitCode}; + +use pico_args::Arguments; +use tree_sitter::{Node, Parser}; + +const CLI_HELP: &str = r#"USAGE + $ rust-organizer [-c] [-w] FILE + +ARGUMENTS + FILE File name of the Rust source file to reorganize. + +FLAGS + -c, --check Check whether reorganizing the file would change the file contents. + -w, --write Overwrite the file with the reorganized contents. +"#; + +type ByteRange = Range; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct Cli { + check: bool, + overwrite: bool, + path: PathBuf, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +enum Item<'a> { + InnerDoc(ByteRange), + Mod { + name: &'a str, + is_declaration: bool, + content: ByteRange, + }, + Use(ByteRange), + Const { + name: &'a str, + content: ByteRange, + }, + Type { + name: &'a str, + content: ByteRange, + }, + Func { + name: &'a str, + content: ByteRange, + }, + Impl { + name: TypeIdent<'a>, + trt: Option<&'a str>, + content: ByteRange, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +struct TypeIdent<'a> { + name: &'a str, + generics: Option<&'a str>, +} + +fn main() -> ExitCode { + // Parse commandline arguments + let mut args = Arguments::from_env(); + if args.contains(["-h", "--help"]) { + print!("{}", CLI_HELP); + return ExitCode::SUCCESS; + } + let cli: Cli = match args.try_into() { + Ok(cli) => cli, + Err(e) => { + eprintln!("Error: {}", e); + return ExitCode::FAILURE; + } + }; + // Run the main program + match cli.run() { + Ok(code) => code, + Err(e) => { + eprintln!("Error: {}", e); + ExitCode::FAILURE + } + } +} + +impl Cli { + fn run(&self) -> Result { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_rust::language()) + .expect("Error loading Rust grammar"); + + let text = std::fs::read_to_string(&self.path) + .map_err(|e| format!("unable to read file: {}", e))?; + + let Some(tree) = parser.parse(&text, None) else { + return Err("unable to parse file".to_owned()); + }; + + let mut items = Vec::new(); + + let mut cursor = tree.walk(); + assert_eq!(cursor.node().kind(), "source_file"); + assert!(!cursor.goto_next_sibling()); + cursor.goto_first_child(); + let mut start = None; + loop { + let node = cursor.node(); + // println!("{} : {}\n\n", node.kind(), node.to_sexp()); + if let Some(item) = Item::maybe_item(&text, node, start) { + let last = items.last(); + let inbetween = + last.map(|(_, i): &(_, Item)| i.end_byte()).unwrap_or(0)..item.start_byte(); + debug_assert!(text[inbetween.clone()].trim().is_empty()); + let newline_before = text[inbetween].contains("\n\n"); + items.push((newline_before, item)); + start = None; + } else if start.is_none() { + start = Some(node.start_byte()); + } + if !cursor.goto_next_sibling() { + break; + } + } + + let mut is_sorted = true; + for window in items.windows(2) { + if window[0] > window[1] { + if self.check { + eprintln!( + "Expected \n\"\"\"\n{}\n\"\"\"\n before \n\"\"\"\n{}\n\"\"\"", + window[1].1.content(&text), + window[0].1.content(&text) + ); + return Ok(ExitCode::FAILURE); + } + is_sorted = false; + break; + } + } + if self.check || (self.overwrite && is_sorted) { + return Ok(ExitCode::SUCCESS); + } + + // Sort items by their order in the file + items.sort_by(|a, b| a.1.cmp(&b.1)); + + println!("{:?}", items); + + if self.overwrite { + todo!() + } + + let mut last = None; + for (newline, item) in items { + if newline || last != Some(item.item_order()) { + println!(); + } + println!("{}", item.content(&text)); + last = Some(item.item_order()); + } + + Ok(ExitCode::SUCCESS) + } +} + +impl TryFrom for Cli { + type Error = String; + + fn try_from(mut args: Arguments) -> Result { + let cli = Cli { + check: args.contains(["-c", "--check"]), + overwrite: args.contains(["-w", "--write"]), + path: args + .free_from_os_str::<_, Infallible>(|s| Ok(PathBuf::from(s))) + .unwrap(), + }; + + let remaining = args.finish(); + match remaining.len() { + 0 => Ok(()), + 1 => Err(format!( + "unexpected argument: '{}'", + remaining[0].to_string_lossy() + )), + _ => Err(format!( + "unexpected arguments: {}", + remaining + .into_iter() + .map(|s| format!("'{}'", s.to_string_lossy())) + .collect::>() + .join(", ") + )), + }?; + Ok(cli) + } +} + +impl<'a> Item<'a> { + fn byte_range(&self) -> ByteRange { + match self { + Item::InnerDoc(content) + | Item::Mod { content, .. } + | Item::Use(content) + | Item::Const { content, .. } + | Item::Type { content, .. } + | Item::Func { content, .. } + | Item::Impl { content, .. } => content.clone(), + } + } + + fn content(&self, text: &'a str) -> &'a str { + match self { + Item::InnerDoc(content) + | Item::Mod { content, .. } + | Item::Use(content) + | Item::Const { content, .. } + | Item::Type { content, .. } + | Item::Func { content, .. } + | Item::Impl { content, .. } => &text[content.clone()], + } + } + + fn end_byte(&self) -> usize { + self.byte_range().end + } + + fn item_order(&self) -> u8 { + match self { + Item::InnerDoc(_) => 0, + Item::Mod { + is_declaration: true, + .. + } => 1, + Item::Use(_) => 2, + Item::Const { .. } => 3, + Item::Type { .. } => 4, + Item::Func { .. } => 5, + Item::Impl { .. } => 6, + Item::Mod { + is_declaration: false, + .. + } => 7, + } + } + + fn maybe_item(text: &'a str, node: Node<'a>, start: Option) -> Option { + let get_field_str = |field_name| { + node.child_by_field_name(field_name) + .map(|n| n.utf8_text(text.as_bytes()).unwrap()) + }; + + let start = start.unwrap_or(node.start_byte()); + match node.kind() { + "attribute_item" => { + // Ignore and add to the next item + None + } + "block_comment" | "line_comment" => { + let comment = node.utf8_text(text.as_bytes()).unwrap(); + if comment.starts_with("//!") || comment.starts_with("/*!") { + // Doc comment for the file (ensure that it's at the top of the file). + Some(Self::InnerDoc(start..node.end_byte())) + } else { + None // Move comment with the next item + } + } + "const_item" => { + let name = get_field_str("name").unwrap(); + let content = start..node.end_byte(); + Some(Self::Const { name, content }) + } + "enum_item" | "struct_item" => { + let name = get_field_str("name").unwrap(); + let content = start..node.end_byte(); + Some(Self::Type { name, content }) + } + "function_item" => { + let name = get_field_str("name").unwrap(); + let content = start..node.end_byte(); + Some(Self::Func { name, content }) + } + "impl_item" => { + let name = TypeIdent::from_node(text, node.child_by_field_name("type").unwrap()); + let trt = get_field_str("trait"); + let content = start..node.end_byte(); + Some(Self::Impl { name, trt, content }) + } + "mod_item" => { + let name = get_field_str("name").unwrap(); + let is_declaration = node.child_by_field_name("body").is_none(); + let content = start..node.end_byte(); + Some(Self::Mod { + name, + is_declaration, + content, + }) + } + "use_declaration" => Some(Self::Use(start..node.end_byte())), + _ => panic!("unexpected node kind: {}", node.kind()), + } + } + + fn start_byte(&self) -> usize { + self.byte_range().start + } +} + +impl Ord for Item<'_> { + fn cmp(&self, other: &Self) -> Ordering { + let self_order = self.item_order(); + let other_order = other.item_order(); + if self_order != other_order { + return self_order.cmp(&other_order); + } + match (self, other) { + (Item::InnerDoc(_), Item::InnerDoc(_)) => Ordering::Equal, + (Item::Const { name: a, .. }, Item::Const { name: b, .. }) + | (Item::Mod { name: a, .. }, Item::Mod { name: b, .. }) + | (Item::Type { name: a, .. }, Item::Type { name: b, .. }) + | (Item::Func { name: a, .. }, Item::Func { name: b, .. }) => a.cmp(b), + (Item::Use(_), Item::Use(_)) => Ordering::Equal, + ( + Item::Impl { + name: a, trt: t_a, .. + }, + Item::Impl { + name: b, trt: t_b, .. + }, + ) => { + let name_order = a.name.cmp(b.name); + if name_order == Ordering::Equal { + let trt_order = t_a.unwrap_or("").cmp(t_b.unwrap_or("")); + if trt_order == Ordering::Equal { + a.generics.unwrap_or("").cmp(&b.generics.unwrap_or("")) + } else { + trt_order + } + } else { + name_order + } + } + _ => unreachable!(), + } + } +} + +impl PartialOrd for Item<'_> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> TypeIdent<'a> { + fn from_node(text: &'a str, node: Node<'a>) -> Self { + let get_field_str = |field_name| { + node.child_by_field_name(field_name) + .map(|n| n.utf8_text(text.as_bytes()).unwrap()) + }; + + match node.kind() { + "type_identifier" => Self { + name: node.utf8_text(text.as_bytes()).unwrap(), + generics: None, + }, + "generic_type" => { + let name = get_field_str("type").unwrap(); + let generics = get_field_str("type_arguments"); + debug_assert!(generics.is_some()); + Self { name, generics } + } + _ => panic!("invalid type identifier node: {}", node.kind()), + } + } +}