From c4c696df5a2ab0a7cde46e4bd4c5d812a4593614 Mon Sep 17 00:00:00 2001 From: Alex Selimov Date: Mon, 9 Jun 2025 21:54:37 -0400 Subject: [PATCH] Initial attempt at git module --- pyproject.toml | 1 + src/reviewllama/cli.py | 18 +++- src/reviewllama/configs.py | 16 +++- src/reviewllama/git_diff.py | 164 ++++++++++++++++++++++++++++++++++++ src/reviewllama/logger.py | 37 ++++++++ uv.lock | 39 ++++++++- 6 files changed, 269 insertions(+), 6 deletions(-) create mode 100644 src/reviewllama/git_diff.py diff --git a/pyproject.toml b/pyproject.toml index 18ef4bd..c1fb5ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ authors = [ ] requires-python = ">=3.13" dependencies = [ + "gitpython>=3.1.44", "rich>=14.0.0", ] diff --git a/src/reviewllama/cli.py b/src/reviewllama/cli.py index 5676e3d..4f4c96e 100644 --- a/src/reviewllama/cli.py +++ b/src/reviewllama/cli.py @@ -3,8 +3,11 @@ import sys from pathlib import Path from typing import List, Optional +from reviewllama.git_diff import analyze_git_repository + from .configs import OllamaConfig, ReviewConfig, create_config_from_vars -from .logger import log_paths, log_review_start +from .logger import (log_git_analysis_result, log_git_analysis_start, + log_paths, log_review_start) def normalize_server_url(url: str) -> str: @@ -62,6 +65,13 @@ Examples: help="Maximum number of retry attempts (default: %(default)s)", ) + parser.add_argument( + "--base-branch", + dest="base_branch", + default="master", + help="Base branch to compare against (default: %(default)s)", + ) + return parser @@ -81,6 +91,7 @@ def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig server_url=normalize_server_url(namespace.server_url), timeout=namespace.timeout, max_retries=namespace.max_retries, + base_branch=namespace.base_branch, ) @@ -97,6 +108,11 @@ def cli() -> None: # TODO: Pass config to review engine log_review_start(config) log_paths(config.paths) + + for path in Paths: + analysis = analyze_git_repository(path, config.base_branch) + log_git_analysis_start(path, config.base_branch) + log_git_analysis_result(analysis) except SystemExit: # argparse calls sys.exit on error, let it propagate raise diff --git a/src/reviewllama/configs.py b/src/reviewllama/configs.py index 588be9a..b658113 100644 --- a/src/reviewllama/configs.py +++ b/src/reviewllama/configs.py @@ -2,6 +2,8 @@ from dataclasses import dataclass from pathlib import Path from typing import List +from .cli import normalize_server_url + @dataclass(frozen=True) class OllamaConfig: @@ -19,6 +21,7 @@ class ReviewConfig: paths: List[Path] ollama: OllamaConfig + base_branch: str def create_ollama_config( @@ -34,14 +37,19 @@ def create_ollama_config( def create_review_config( - paths: List[Path], ollama_config: OllamaConfig + paths: List[Path], ollama_config: OllamaConfig, base_branch: str ) -> ReviewConfig: """Create complete ReviewConfig from validated components.""" - return ReviewConfig(paths=paths, ollama=ollama_config) + return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch) def create_config_from_vars( - paths: List[Path], model: str, server_url: str, timeout: int, max_retries: int + paths: List[Path], + model: str, + server_url: str, + timeout: int, + max_retries: int, + base_branch: str, ): ollama_config = OllamaConfig( model=model, @@ -50,4 +58,4 @@ def create_config_from_vars( max_retries=max_retries, ) - return create_review_config(paths, ollama_config) + return create_review_config(paths, ollama_config, base_branch) diff --git a/src/reviewllama/git_diff.py b/src/reviewllama/git_diff.py new file mode 100644 index 0000000..f3a25c4 --- /dev/null +++ b/src/reviewllama/git_diff.py @@ -0,0 +1,164 @@ +""" +Git analysis module for ReviewLlama using functional programming style. +""" + +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Tuple + +from git import Repo +from git.diff import Diff +from git.exc import GitCommandError, InvalidGitRepositoryError +from git.objects import Commit +from rich.console import Console + + +@dataclass(frozen=True) +class GitDiff: + """Represents a git diff with metadata.""" + file_path: str + old_content: str + new_content: str + diff_text: str + change_type: str # 'added', 'modified', 'deleted' + + +@dataclass(frozen=True) +class GitAnalysis: + """Complete git analysis result.""" + repository_path: Path + current_branch: str + base_branch: str + diffs: List[GitDiff] + total_files_changed: int + + +def find_git_repository(path: Path) -> Repo: + """Find and return git repository from given path.""" + try: + return Repo(path, search_parent_directories=True) + except InvalidGitRepositoryError: + raise ValueError(f"No git repository found at or above {path}") + + +def get_current_branch_name(repo: Repo) -> str: + """Get the name of the current branch.""" + try: + return repo.active_branch.name + except TypeError: + # Detached HEAD state + return repo.head.commit.hexsha[:8] + + +def branch_exists(repo: Repo, branch_name: str) -> bool: + """Check if a branch exists in the repository.""" + try: + repo.commit(branch_name) + return True + except: + return False + + +def get_base_branch(repo: Repo, requested_base: str) -> str: + """Determine the base branch to compare against.""" + # Try requested base first + if branch_exists(repo, requested_base): + return requested_base + + # Fall back to common master branch names + common_masters = ['master', 'main', 'develop'] + for branch in common_masters: + if branch_exists(repo, branch): + return branch + + raise ValueError(f"Base branch '{requested_base}' not found and no common master branch exists") + + +def get_diff_between_branches(repo: Repo, base_branch: str, current_branch: str): + """Get diff between two branches.""" + try: + base_commit = repo.commit(base_branch) + current_commit = repo.commit(current_branch) + return base_commit.diff(current_commit) + except GitCommandError as e: + raise ValueError(f"Failed to get diff between {base_branch} and {current_branch}: {e}") + + +def determine_change_type(diff_item: Diff) -> str: + """Determine the type of change from a diff item.""" + if diff_item.new_file: + return 'added' + elif diff_item.deleted_file: + return 'deleted' + else: + return 'modified' + + +def extract_file_content(diff_item: Diff, is_old: bool = True) -> str: + """Extract file content from diff item.""" + try: + blob = diff_item.a_blob if is_old else diff_item.b_blob + if blob is None: + return "" + return blob.data_stream.read().decode('utf-8', errors='ignore') + except (UnicodeDecodeError, AttributeError): + return "" + + +def create_git_diff(diff_item: Diff) -> GitDiff: + """Create GitDiff from git.Diff object.""" + file_path = diff_item.a_path or diff_item.b_path or "unknown" + old_content = extract_file_content(diff_item, is_old=True) + new_content = extract_file_content(diff_item, is_old=False) + diff_text = str(diff_item) + change_type = determine_change_type(diff_item) + + return GitDiff( + file_path=file_path, + old_content=old_content, + new_content=new_content, + diff_text=diff_text, + change_type=change_type + ) + + +def process_diff_items(diff_index) -> List[GitDiff]: + """Process all diff items into GitDiff objects.""" + return [create_git_diff(item) for item in diff_index] + + +def filter_reviewable_diffs(diffs: List[GitDiff]) -> List[GitDiff]: + """Filter diffs to only include reviewable files.""" + + # TODO: Update this to a more complete list of programming language extensions + reviewable_extensions = { + '.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp', + '.go', '.rs', '.rb', '.php', '.cs', '.swift', '.kt', + '.scala', '.clj', '.sh', '.sql', '.yml', '.yaml', '.json' + } + + def is_reviewable(diff: GitDiff) -> bool: + path = Path(diff.file_path) + return path.suffix.lower() in reviewable_extensions + + return [diff for diff in diffs if is_reviewable(diff)] + + +def analyze_git_repository(path: Path, base_branch: str) -> GitAnalysis: + """Analyze git repository and extract diffs for review.""" + repo = find_git_repository(path) + current_branch = get_current_branch_name(repo) + resolved_base_branch = get_base_branch(repo, base_branch) + + diff_index = get_diff_between_branches(repo, resolved_base_branch, current_branch) + all_diffs = process_diff_items(diff_index) + reviewable_diffs = filter_reviewable_diffs(all_diffs) + + return GitAnalysis( + repository_path=path, + current_branch=current_branch, + base_branch=resolved_base_branch, + diffs=reviewable_diffs, + total_files_changed=len(reviewable_diffs) + ) + diff --git a/src/reviewllama/logger.py b/src/reviewllama/logger.py index f4f188a..f51eda2 100644 --- a/src/reviewllama/logger.py +++ b/src/reviewllama/logger.py @@ -6,6 +6,7 @@ from rich.console import Console from rich.text import Text from .configs import ReviewConfig +from .git_diff import GitAnalysis def create_console() -> Console: @@ -42,3 +43,39 @@ def log_error(error: str) -> None: """Log error message with colored output.""" console = create_error_console() console.print(f"Error: {error}") + + +def log_git_analysis_start(path: Path, base_branch: str) -> None: + """Log the start of git analysis.""" + console = create_console + console.print(f"[dim]Analyzing git repository at:[/dim] [yellow]{path}[/yellow]") + console.print(f"[dim]Base branch:[/dim] [cyan]{base_branch}[/cyan]") + + +def log_git_analysis_result(analysis: GitAnalysis) -> None: + """Log the results of git analysis.""" + console = create_console() + console.print( + f"[dim]Current branch:[/dim] [green]{analysis.current_branch}[/green]" + ) + console.print(f"[dim]Comparing against:[/dim] [cyan]{analysis.base_branch}[/cyan]") + console.print( + f"[dim]Files changed:[/dim] [yellow]{analysis.total_files_changed}[/yellow]" + ) + + if analysis.diffs: + console.print("\n[bold]Changed files:[/bold]") + for diff in analysis.diffs: + change_color = { + "added": "green", + "modified": "yellow", + "deleted": "red", + }.get(diff.change_type, "white") + + console.print( + f" [{change_color}]{diff.change_type.upper():>8}[/{change_color}] {diff.file_path}" + ) + else: + console.print("[dim]No reviewable files changed.[/dim]") + + console.print() diff --git a/uv.lock b/uv.lock index 2a77ae8..9b3e323 100644 --- a/uv.lock +++ b/uv.lock @@ -2,6 +2,30 @@ version = 1 revision = 1 requires-python = ">=3.13" +[[package]] +name = "gitdb" +version = "4.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "smmap" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 }, +] + +[[package]] +name = "gitpython" +version = "3.1.44" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "gitdb" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -37,11 +61,15 @@ name = "reviewllama" version = "0.1.0" source = { editable = "." } dependencies = [ + { name = "gitpython" }, { name = "rich" }, ] [package.metadata] -requires-dist = [{ name = "rich", specifier = ">=14.0.0" }] +requires-dist = [ + { name = "gitpython", specifier = ">=3.1.44" }, + { name = "rich", specifier = ">=14.0.0" }, +] [[package]] name = "rich" @@ -55,3 +83,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0a wheels = [ { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, ] + +[[package]] +name = "smmap" +version = "5.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 }, +]