Implement a small tool that orders items in Rust files

This commit is contained in:
Jip J. Dekker 2024-08-01 16:04:15 +10:00
commit f2f0eb72b6
No known key found for this signature in database
6 changed files with 491 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

89
Cargo.lock generated Normal file
View File

@ -0,0 +1,89 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "cc"
version = "1.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "pico-args"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5be167a7af36ee22fe3115051bc51f6e6c7054c9348e28deb4f49bd6f705a315"
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "rust-organizer"
version = "0.1.0"
dependencies = [
"cc",
"pico-args",
"tree-sitter",
"tree-sitter-rust",
]
[[package]]
name = "tree-sitter"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-rust"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93"
dependencies = [
"cc",
"tree-sitter",
]

14
Cargo.toml Normal file
View File

@ -0,0 +1,14 @@
[package]
name = "rust-organizer"
version = "0.1.0"
edition = "2021"
description = "A tool to organize Rust files in a opinionated way."
authors = ["Jip J. Dekker <jip@dekker.one>"]
[dependencies]
pico-args = "0.5.0"
tree-sitter = "0.22.6"
tree-sitter-rust = "0.21.2"
[build-dependencies]
cc = "*"

11
README.md Normal file
View File

@ -0,0 +1,11 @@
# Rust Organizer
An opinionated tool that organizes a Rust source file in the following order:
1. `mod <name>`
3. `use`
4. sorted `const`/`static`
5. sorted `struct`/`enum`/`union`/`type`/`trait`
6. `fn`
7. `impl`
8. `mod <name> { ... }`

3
rustfmt.toml Normal file
View File

@ -0,0 +1,3 @@
hard_tabs = true
imports_granularity = "Crate"
group_imports = "StdExternalCrate"

373
src/main.rs Normal file
View File

@ -0,0 +1,373 @@
use std::{cmp::Ordering, convert::Infallible, ops::Range, path::PathBuf, process::ExitCode};
use pico_args::Arguments;
use tree_sitter::{Node, Parser};
const CLI_HELP: &str = r#"USAGE
$ rust-organizer [-c] [-w] FILE
ARGUMENTS
FILE File name of the Rust source file to reorganize.
FLAGS
-c, --check Check whether reorganizing the file would change the file contents.
-w, --write Overwrite the file with the reorganized contents.
"#;
type ByteRange = Range<usize>;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct Cli {
check: bool,
overwrite: bool,
path: PathBuf,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
enum Item<'a> {
InnerDoc(ByteRange),
Mod {
name: &'a str,
is_declaration: bool,
content: ByteRange,
},
Use(ByteRange),
Const {
name: &'a str,
content: ByteRange,
},
Type {
name: &'a str,
content: ByteRange,
},
Func {
name: &'a str,
content: ByteRange,
},
Impl {
name: TypeIdent<'a>,
trt: Option<&'a str>,
content: ByteRange,
},
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct TypeIdent<'a> {
name: &'a str,
generics: Option<&'a str>,
}
fn main() -> ExitCode {
// Parse commandline arguments
let mut args = Arguments::from_env();
if args.contains(["-h", "--help"]) {
print!("{}", CLI_HELP);
return ExitCode::SUCCESS;
}
let cli: Cli = match args.try_into() {
Ok(cli) => cli,
Err(e) => {
eprintln!("Error: {}", e);
return ExitCode::FAILURE;
}
};
// Run the main program
match cli.run() {
Ok(code) => code,
Err(e) => {
eprintln!("Error: {}", e);
ExitCode::FAILURE
}
}
}
impl Cli {
fn run(&self) -> Result<ExitCode, String> {
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_rust::language())
.expect("Error loading Rust grammar");
let text = std::fs::read_to_string(&self.path)
.map_err(|e| format!("unable to read file: {}", e))?;
let Some(tree) = parser.parse(&text, None) else {
return Err("unable to parse file".to_owned());
};
let mut items = Vec::new();
let mut cursor = tree.walk();
assert_eq!(cursor.node().kind(), "source_file");
assert!(!cursor.goto_next_sibling());
cursor.goto_first_child();
let mut start = None;
loop {
let node = cursor.node();
// println!("{} : {}\n\n", node.kind(), node.to_sexp());
if let Some(item) = Item::maybe_item(&text, node, start) {
let last = items.last();
let inbetween =
last.map(|(_, i): &(_, Item)| i.end_byte()).unwrap_or(0)..item.start_byte();
debug_assert!(text[inbetween.clone()].trim().is_empty());
let newline_before = text[inbetween].contains("\n\n");
items.push((newline_before, item));
start = None;
} else if start.is_none() {
start = Some(node.start_byte());
}
if !cursor.goto_next_sibling() {
break;
}
}
let mut is_sorted = true;
for window in items.windows(2) {
if window[0] > window[1] {
if self.check {
eprintln!(
"Expected \n\"\"\"\n{}\n\"\"\"\n before \n\"\"\"\n{}\n\"\"\"",
window[1].1.content(&text),
window[0].1.content(&text)
);
return Ok(ExitCode::FAILURE);
}
is_sorted = false;
break;
}
}
if self.check || (self.overwrite && is_sorted) {
return Ok(ExitCode::SUCCESS);
}
// Sort items by their order in the file
items.sort_by(|a, b| a.1.cmp(&b.1));
println!("{:?}", items);
if self.overwrite {
todo!()
}
let mut last = None;
for (newline, item) in items {
if newline || last != Some(item.item_order()) {
println!();
}
println!("{}", item.content(&text));
last = Some(item.item_order());
}
Ok(ExitCode::SUCCESS)
}
}
impl TryFrom<Arguments> for Cli {
type Error = String;
fn try_from(mut args: Arguments) -> Result<Self, Self::Error> {
let cli = Cli {
check: args.contains(["-c", "--check"]),
overwrite: args.contains(["-w", "--write"]),
path: args
.free_from_os_str::<_, Infallible>(|s| Ok(PathBuf::from(s)))
.unwrap(),
};
let remaining = args.finish();
match remaining.len() {
0 => Ok(()),
1 => Err(format!(
"unexpected argument: '{}'",
remaining[0].to_string_lossy()
)),
_ => Err(format!(
"unexpected arguments: {}",
remaining
.into_iter()
.map(|s| format!("'{}'", s.to_string_lossy()))
.collect::<Vec<_>>()
.join(", ")
)),
}?;
Ok(cli)
}
}
impl<'a> Item<'a> {
fn byte_range(&self) -> ByteRange {
match self {
Item::InnerDoc(content)
| Item::Mod { content, .. }
| Item::Use(content)
| Item::Const { content, .. }
| Item::Type { content, .. }
| Item::Func { content, .. }
| Item::Impl { content, .. } => content.clone(),
}
}
fn content(&self, text: &'a str) -> &'a str {
match self {
Item::InnerDoc(content)
| Item::Mod { content, .. }
| Item::Use(content)
| Item::Const { content, .. }
| Item::Type { content, .. }
| Item::Func { content, .. }
| Item::Impl { content, .. } => &text[content.clone()],
}
}
fn end_byte(&self) -> usize {
self.byte_range().end
}
fn item_order(&self) -> u8 {
match self {
Item::InnerDoc(_) => 0,
Item::Mod {
is_declaration: true,
..
} => 1,
Item::Use(_) => 2,
Item::Const { .. } => 3,
Item::Type { .. } => 4,
Item::Func { .. } => 5,
Item::Impl { .. } => 6,
Item::Mod {
is_declaration: false,
..
} => 7,
}
}
fn maybe_item(text: &'a str, node: Node<'a>, start: Option<usize>) -> Option<Self> {
let get_field_str = |field_name| {
node.child_by_field_name(field_name)
.map(|n| n.utf8_text(text.as_bytes()).unwrap())
};
let start = start.unwrap_or(node.start_byte());
match node.kind() {
"attribute_item" => {
// Ignore and add to the next item
None
}
"block_comment" | "line_comment" => {
let comment = node.utf8_text(text.as_bytes()).unwrap();
if comment.starts_with("//!") || comment.starts_with("/*!") {
// Doc comment for the file (ensure that it's at the top of the file).
Some(Self::InnerDoc(start..node.end_byte()))
} else {
None // Move comment with the next item
}
}
"const_item" => {
let name = get_field_str("name").unwrap();
let content = start..node.end_byte();
Some(Self::Const { name, content })
}
"enum_item" | "struct_item" => {
let name = get_field_str("name").unwrap();
let content = start..node.end_byte();
Some(Self::Type { name, content })
}
"function_item" => {
let name = get_field_str("name").unwrap();
let content = start..node.end_byte();
Some(Self::Func { name, content })
}
"impl_item" => {
let name = TypeIdent::from_node(text, node.child_by_field_name("type").unwrap());
let trt = get_field_str("trait");
let content = start..node.end_byte();
Some(Self::Impl { name, trt, content })
}
"mod_item" => {
let name = get_field_str("name").unwrap();
let is_declaration = node.child_by_field_name("body").is_none();
let content = start..node.end_byte();
Some(Self::Mod {
name,
is_declaration,
content,
})
}
"use_declaration" => Some(Self::Use(start..node.end_byte())),
_ => panic!("unexpected node kind: {}", node.kind()),
}
}
fn start_byte(&self) -> usize {
self.byte_range().start
}
}
impl Ord for Item<'_> {
fn cmp(&self, other: &Self) -> Ordering {
let self_order = self.item_order();
let other_order = other.item_order();
if self_order != other_order {
return self_order.cmp(&other_order);
}
match (self, other) {
(Item::InnerDoc(_), Item::InnerDoc(_)) => Ordering::Equal,
(Item::Const { name: a, .. }, Item::Const { name: b, .. })
| (Item::Mod { name: a, .. }, Item::Mod { name: b, .. })
| (Item::Type { name: a, .. }, Item::Type { name: b, .. })
| (Item::Func { name: a, .. }, Item::Func { name: b, .. }) => a.cmp(b),
(Item::Use(_), Item::Use(_)) => Ordering::Equal,
(
Item::Impl {
name: a, trt: t_a, ..
},
Item::Impl {
name: b, trt: t_b, ..
},
) => {
let name_order = a.name.cmp(b.name);
if name_order == Ordering::Equal {
let trt_order = t_a.unwrap_or("").cmp(t_b.unwrap_or(""));
if trt_order == Ordering::Equal {
a.generics.unwrap_or("").cmp(&b.generics.unwrap_or(""))
} else {
trt_order
}
} else {
name_order
}
}
_ => unreachable!(),
}
}
}
impl PartialOrd for Item<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<'a> TypeIdent<'a> {
fn from_node(text: &'a str, node: Node<'a>) -> Self {
let get_field_str = |field_name| {
node.child_by_field_name(field_name)
.map(|n| n.utf8_text(text.as_bytes()).unwrap())
};
match node.kind() {
"type_identifier" => Self {
name: node.utf8_text(text.as_bytes()).unwrap(),
generics: None,
},
"generic_type" => {
let name = get_field_str("type").unwrap();
let generics = get_field_str("type_arguments");
debug_assert!(generics.is_some());
Self { name, generics }
}
_ => panic!("invalid type identifier node: {}", node.kind()),
}
}
}