From f2f0eb72b65a438a10661e2212a1ddb952dfe985 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.one>
Date: Thu, 1 Aug 2024 16:04:15 +1000
Subject: [PATCH] Implement a small tool that orders items in Rust files

---
 .gitignore   |   1 +
 Cargo.lock   |  89 ++++++++++++
 Cargo.toml   |  14 ++
 README.md    |  11 ++
 rustfmt.toml |   3 +
 src/main.rs  | 373 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 491 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Cargo.lock
 create mode 100644 Cargo.toml
 create mode 100644 README.md
 create mode 100644 rustfmt.toml
 create mode 100644 src/main.rs
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ea8c4bf
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..0199a20
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,89 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "cc"
+version = "1.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "pico-args"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315"
+
+[[package]]
+name = "regex"
+version = "1.10.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
+
+[[package]]
+name = "rust-organizer"
+version = "0.1.0"
+dependencies = [
+ "cc",
+ "pico-args",
+ "tree-sitter",
+ "tree-sitter-rust",
+]
+
+[[package]]
+name = "tree-sitter"
+version = "0.22.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
+dependencies = [
+ "cc",
+ "regex",
+]
+
+[[package]]
+name = "tree-sitter-rust"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93"
+dependencies = [
+ "cc",
+ "tree-sitter",
+]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..5bcaf3a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "rust-organizer"
+version = "0.1.0"
+edition = "2021"
+description = "A tool to organize Rust files in a opinionated way."
+authors = ["Jip J. Dekker <jip@dekker.one>"]
+
+[dependencies]
+pico-args = "0.5.0"
+tree-sitter = "0.22.6"
+tree-sitter-rust = "0.21.2"
+
+[build-dependencies]
+cc = "*"
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..21ff8b4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# Rust Organizer
+
+An opinionated tool that organizes a Rust source file in the following order:
+
+1. `mod <name>`
+3. `use`
+4. sorted `const`/`static`
+5. sorted `struct`/`enum`/`union`/`type`/`trait`
+6. `fn`
+7. `impl`
+8. `mod <name> { ... }`
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644
index 0000000..9df34cd
--- /dev/null
+++ b/rustfmt.toml
@@ -0,0 +1,3 @@
+hard_tabs = true
+imports_granularity = "Crate"
+group_imports = "StdExternalCrate"
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..ab92051
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,373 @@
+use std::{cmp::Ordering, convert::Infallible, ops::Range, path::PathBuf, process::ExitCode};
+
+use pico_args::Arguments;
+use tree_sitter::{Node, Parser};
+
+const CLI_HELP: &str = r#"USAGE
+  $ rust-organizer [-c] [-w] FILE
+
+ARGUMENTS
+  FILE    File name of the Rust source file to reorganize.
+
+FLAGS
+  -c, --check            Check whether reorganizing the file would change the file contents.
+  -w, --write            Overwrite the file with the reorganized contents.
+"#;
+
+type ByteRange = Range<usize>;
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct Cli {
+	check: bool,
+	overwrite: bool,
+	path: PathBuf,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+enum Item<'a> {
+	InnerDoc(ByteRange),
+	Mod {
+		name: &'a str,
+		is_declaration: bool,
+		content: ByteRange,
+	},
+	Use(ByteRange),
+	Const {
+		name: &'a str,
+		content: ByteRange,
+	},
+	Type {
+		name: &'a str,
+		content: ByteRange,
+	},
+	Func {
+		name: &'a str,
+		content: ByteRange,
+	},
+	Impl {
+		name: TypeIdent<'a>,
+		trt: Option<&'a str>,
+		content: ByteRange,
+	},
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+struct TypeIdent<'a> {
+	name: &'a str,
+	generics: Option<&'a str>,
+}
+
+fn main() -> ExitCode {
+	// Parse commandline arguments
+	let mut args = Arguments::from_env();
+	if args.contains(["-h", "--help"]) {
+		print!("{}", CLI_HELP);
+		return ExitCode::SUCCESS;
+	}
+	let cli: Cli = match args.try_into() {
+		Ok(cli) => cli,
+		Err(e) => {
+			eprintln!("Error: {}", e);
+			return ExitCode::FAILURE;
+		}
+	};
+	// Run the main program
+	match cli.run() {
+		Ok(code) => code,
+		Err(e) => {
+			eprintln!("Error: {}", e);
+			ExitCode::FAILURE
+		}
+	}
+}
+
+impl Cli {
+	fn run(&self) -> Result<ExitCode, String> {
+		let mut parser = Parser::new();
+		parser
+			.set_language(&tree_sitter_rust::language())
+			.expect("Error loading Rust grammar");
+
+		let text = std::fs::read_to_string(&self.path)
+			.map_err(|e| format!("unable to read file: {}", e))?;
+
+		let Some(tree) = parser.parse(&text, None) else {
+			return Err("unable to parse file".to_owned());
+		};
+
+		let mut items = Vec::new();
+
+		let mut cursor = tree.walk();
+		assert_eq!(cursor.node().kind(), "source_file");
+		assert!(!cursor.goto_next_sibling());
+		cursor.goto_first_child();
+		let mut start = None;
+		loop {
+			let node = cursor.node();
+			// println!("{} : {}\n\n", node.kind(), node.to_sexp());
+			if let Some(item) = Item::maybe_item(&text, node, start) {
+				let last = items.last();
+				let inbetween =
+					last.map(|(_, i): &(_, Item)| i.end_byte()).unwrap_or(0)..item.start_byte();
+				debug_assert!(text[inbetween.clone()].trim().is_empty());
+				let newline_before = text[inbetween].contains("\n\n");
+				items.push((newline_before, item));
+				start = None;
+			} else if start.is_none() {
+				start = Some(node.start_byte());
+			}
+			if !cursor.goto_next_sibling() {
+				break;
+			}
+		}
+
+		let mut is_sorted = true;
+		for window in items.windows(2) {
+			if window[0] > window[1] {
+				if self.check {
+					eprintln!(
+						"Expected \n\"\"\"\n{}\n\"\"\"\n before \n\"\"\"\n{}\n\"\"\"",
+						window[1].1.content(&text),
+						window[0].1.content(&text)
+					);
+					return Ok(ExitCode::FAILURE);
+				}
+				is_sorted = false;
+				break;
+			}
+		}
+		if self.check || (self.overwrite && is_sorted) {
+			return Ok(ExitCode::SUCCESS);
+		}
+
+		// Sort items by their order in the file
+		items.sort_by(|a, b| a.1.cmp(&b.1));
+
+		println!("{:?}", items);
+
+		if self.overwrite {
+			todo!()
+		}
+
+		let mut last = None;
+		for (newline, item) in items {
+			if newline || last != Some(item.item_order()) {
+				println!();
+			}
+			println!("{}", item.content(&text));
+			last = Some(item.item_order());
+		}
+
+		Ok(ExitCode::SUCCESS)
+	}
+}
+
+impl TryFrom<Arguments> for Cli {
+	type Error = String;
+
+	fn try_from(mut args: Arguments) -> Result<Self, Self::Error> {
+		let cli = Cli {
+			check: args.contains(["-c", "--check"]),
+			overwrite: args.contains(["-w", "--write"]),
+			path: args
+				.free_from_os_str::<_, Infallible>(|s| Ok(PathBuf::from(s)))
+				.unwrap(),
+		};
+
+		let remaining = args.finish();
+		match remaining.len() {
+			0 => Ok(()),
+			1 => Err(format!(
+				"unexpected argument: '{}'",
+				remaining[0].to_string_lossy()
+			)),
+			_ => Err(format!(
+				"unexpected arguments: {}",
+				remaining
+					.into_iter()
+					.map(|s| format!("'{}'", s.to_string_lossy()))
+					.collect::<Vec<_>>()
+					.join(", ")
+			)),
+		}?;
+		Ok(cli)
+	}
+}
+
+impl<'a> Item<'a> {
+	fn byte_range(&self) -> ByteRange {
+		match self {
+			Item::InnerDoc(content)
+			| Item::Mod { content, .. }
+			| Item::Use(content)
+			| Item::Const { content, .. }
+			| Item::Type { content, .. }
+			| Item::Func { content, .. }
+			| Item::Impl { content, .. } => content.clone(),
+		}
+	}
+
+	fn content(&self, text: &'a str) -> &'a str {
+		match self {
+			Item::InnerDoc(content)
+			| Item::Mod { content, .. }
+			| Item::Use(content)
+			| Item::Const { content, .. }
+			| Item::Type { content, .. }
+			| Item::Func { content, .. }
+			| Item::Impl { content, .. } => &text[content.clone()],
+		}
+	}
+
+	fn end_byte(&self) -> usize {
+		self.byte_range().end
+	}
+
+	fn item_order(&self) -> u8 {
+		match self {
+			Item::InnerDoc(_) => 0,
+			Item::Mod {
+				is_declaration: true,
+				..
+			} => 1,
+			Item::Use(_) => 2,
+			Item::Const { .. } => 3,
+			Item::Type { .. } => 4,
+			Item::Func { .. } => 5,
+			Item::Impl { .. } => 6,
+			Item::Mod {
+				is_declaration: false,
+				..
+			} => 7,
+		}
+	}
+
+	fn maybe_item(text: &'a str, node: Node<'a>, start: Option<usize>) -> Option<Self> {
+		let get_field_str = |field_name| {
+			node.child_by_field_name(field_name)
+				.map(|n| n.utf8_text(text.as_bytes()).unwrap())
+		};
+
+		let start = start.unwrap_or(node.start_byte());
+		match node.kind() {
+			"attribute_item" => {
+				// Ignore and add to the next item
+				None
+			}
+			"block_comment" | "line_comment" => {
+				let comment = node.utf8_text(text.as_bytes()).unwrap();
+				if comment.starts_with("//!") || comment.starts_with("/*!") {
+					// Doc comment for the file (ensure that it's at the top of the file).
+					Some(Self::InnerDoc(start..node.end_byte()))
+				} else {
+					None // Move comment with the next item
+				}
+			}
+			"const_item" => {
+				let name = get_field_str("name").unwrap();
+				let content = start..node.end_byte();
+				Some(Self::Const { name, content })
+			}
+			"enum_item" | "struct_item" => {
+				let name = get_field_str("name").unwrap();
+				let content = start..node.end_byte();
+				Some(Self::Type { name, content })
+			}
+			"function_item" => {
+				let name = get_field_str("name").unwrap();
+				let content = start..node.end_byte();
+				Some(Self::Func { name, content })
+			}
+			"impl_item" => {
+				let name = TypeIdent::from_node(text, node.child_by_field_name("type").unwrap());
+				let trt = get_field_str("trait");
+				let content = start..node.end_byte();
+				Some(Self::Impl { name, trt, content })
+			}
+			"mod_item" => {
+				let name = get_field_str("name").unwrap();
+				let is_declaration = node.child_by_field_name("body").is_none();
+				let content = start..node.end_byte();
+				Some(Self::Mod {
+					name,
+					is_declaration,
+					content,
+				})
+			}
+			"use_declaration" => Some(Self::Use(start..node.end_byte())),
+			_ => panic!("unexpected node kind: {}", node.kind()),
+		}
+	}
+
+	fn start_byte(&self) -> usize {
+		self.byte_range().start
+	}
+}
+
+impl Ord for Item<'_> {
+	fn cmp(&self, other: &Self) -> Ordering {
+		let self_order = self.item_order();
+		let other_order = other.item_order();
+		if self_order != other_order {
+			return self_order.cmp(&other_order);
+		}
+		match (self, other) {
+			(Item::InnerDoc(_), Item::InnerDoc(_)) => Ordering::Equal,
+			(Item::Const { name: a, .. }, Item::Const { name: b, .. })
+			| (Item::Mod { name: a, .. }, Item::Mod { name: b, .. })
+			| (Item::Type { name: a, .. }, Item::Type { name: b, .. })
+			| (Item::Func { name: a, .. }, Item::Func { name: b, .. }) => a.cmp(b),
+			(Item::Use(_), Item::Use(_)) => Ordering::Equal,
+			(
+				Item::Impl {
+					name: a, trt: t_a, ..
+				},
+				Item::Impl {
+					name: b, trt: t_b, ..
+				},
+			) => {
+				let name_order = a.name.cmp(b.name);
+				if name_order == Ordering::Equal {
+					let trt_order = t_a.unwrap_or("").cmp(t_b.unwrap_or(""));
+					if trt_order == Ordering::Equal {
+						a.generics.unwrap_or("").cmp(&b.generics.unwrap_or(""))
+					} else {
+						trt_order
+					}
+				} else {
+					name_order
+				}
+			}
+			_ => unreachable!(),
+		}
+	}
+}
+
+impl PartialOrd for Item<'_> {
+	fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+		Some(self.cmp(other))
+	}
+}
+
+impl<'a> TypeIdent<'a> {
+	fn from_node(text: &'a str, node: Node<'a>) -> Self {
+		let get_field_str = |field_name| {
+			node.child_by_field_name(field_name)
+				.map(|n| n.utf8_text(text.as_bytes()).unwrap())
+		};
+
+		match node.kind() {
+			"type_identifier" => Self {
+				name: node.utf8_text(text.as_bytes()).unwrap(),
+				generics: None,
+			},
+			"generic_type" => {
+				let name = get_field_str("type").unwrap();
+				let generics = get_field_str("type_arguments");
+				debug_assert!(generics.is_some());
+				Self { name, generics }
+			}
+			_ => panic!("invalid type identifier node: {}", node.kind()),
+		}
+	}
+}