Initial attempt at git module

This commit is contained in:
Alex Selimov 2025-06-09 21:54:37 -04:00
parent 645d329321
commit c4c696df5a
6 changed files with 269 additions and 6 deletions

View file

@ -8,6 +8,7 @@ authors = [
] ]
requires-python = ">=3.13" requires-python = ">=3.13"
dependencies = [ dependencies = [
"gitpython>=3.1.44",
"rich>=14.0.0", "rich>=14.0.0",
] ]

View file

@ -3,8 +3,11 @@ import sys
from pathlib import Path from pathlib import Path
from typing import List, Optional from typing import List, Optional
from reviewllama.git_diff import analyze_git_repository
from .configs import OllamaConfig, ReviewConfig, create_config_from_vars from .configs import OllamaConfig, ReviewConfig, create_config_from_vars
from .logger import log_paths, log_review_start from .logger import (log_git_analysis_result, log_git_analysis_start,
log_paths, log_review_start)
def normalize_server_url(url: str) -> str: def normalize_server_url(url: str) -> str:
@ -62,6 +65,13 @@ Examples:
help="Maximum number of retry attempts (default: %(default)s)", help="Maximum number of retry attempts (default: %(default)s)",
) )
parser.add_argument(
"--base-branch",
dest="base_branch",
default="master",
help="Base branch to compare against (default: %(default)s)",
)
return parser return parser
@ -81,6 +91,7 @@ def transform_namespace_to_config(namespace: argparse.Namespace) -> ReviewConfig
server_url=normalize_server_url(namespace.server_url), server_url=normalize_server_url(namespace.server_url),
timeout=namespace.timeout, timeout=namespace.timeout,
max_retries=namespace.max_retries, max_retries=namespace.max_retries,
base_branch=namespace.base_branch,
) )
@ -97,6 +108,11 @@ def cli() -> None:
# TODO: Pass config to review engine # TODO: Pass config to review engine
log_review_start(config) log_review_start(config)
log_paths(config.paths) log_paths(config.paths)
for path in Paths:
analysis = analyze_git_repository(path, config.base_branch)
log_git_analysis_start(path, config.base_branch)
log_git_analysis_result(analysis)
except SystemExit: except SystemExit:
# argparse calls sys.exit on error, let it propagate # argparse calls sys.exit on error, let it propagate
raise raise

View file

@ -2,6 +2,8 @@ from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import List from typing import List
from .cli import normalize_server_url
@dataclass(frozen=True) @dataclass(frozen=True)
class OllamaConfig: class OllamaConfig:
@ -19,6 +21,7 @@ class ReviewConfig:
paths: List[Path] paths: List[Path]
ollama: OllamaConfig ollama: OllamaConfig
base_branch: str
def create_ollama_config( def create_ollama_config(
@ -34,14 +37,19 @@ def create_ollama_config(
def create_review_config( def create_review_config(
paths: List[Path], ollama_config: OllamaConfig paths: List[Path], ollama_config: OllamaConfig, base_branch: str
) -> ReviewConfig: ) -> ReviewConfig:
"""Create complete ReviewConfig from validated components.""" """Create complete ReviewConfig from validated components."""
return ReviewConfig(paths=paths, ollama=ollama_config) return ReviewConfig(paths=paths, ollama=ollama_config, base_branch=base_branch)
def create_config_from_vars( def create_config_from_vars(
paths: List[Path], model: str, server_url: str, timeout: int, max_retries: int paths: List[Path],
model: str,
server_url: str,
timeout: int,
max_retries: int,
base_branch: str,
): ):
ollama_config = OllamaConfig( ollama_config = OllamaConfig(
model=model, model=model,
@ -50,4 +58,4 @@ def create_config_from_vars(
max_retries=max_retries, max_retries=max_retries,
) )
return create_review_config(paths, ollama_config) return create_review_config(paths, ollama_config, base_branch)

164
src/reviewllama/git_diff.py Normal file
View file

@ -0,0 +1,164 @@
"""
Git analysis module for ReviewLlama using functional programming style.
"""
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Tuple
from git import Repo
from git.diff import Diff
from git.exc import GitCommandError, InvalidGitRepositoryError
from git.objects import Commit
from rich.console import Console
@dataclass(frozen=True)
class GitDiff:
"""Represents a git diff with metadata."""
file_path: str
old_content: str
new_content: str
diff_text: str
change_type: str # 'added', 'modified', 'deleted'
@dataclass(frozen=True)
class GitAnalysis:
"""Complete git analysis result."""
repository_path: Path
current_branch: str
base_branch: str
diffs: List[GitDiff]
total_files_changed: int
def find_git_repository(path: Path) -> Repo:
"""Find and return git repository from given path."""
try:
return Repo(path, search_parent_directories=True)
except InvalidGitRepositoryError:
raise ValueError(f"No git repository found at or above {path}")
def get_current_branch_name(repo: Repo) -> str:
"""Get the name of the current branch."""
try:
return repo.active_branch.name
except TypeError:
# Detached HEAD state
return repo.head.commit.hexsha[:8]
def branch_exists(repo: Repo, branch_name: str) -> bool:
"""Check if a branch exists in the repository."""
try:
repo.commit(branch_name)
return True
except:
return False
def get_base_branch(repo: Repo, requested_base: str) -> str:
"""Determine the base branch to compare against."""
# Try requested base first
if branch_exists(repo, requested_base):
return requested_base
# Fall back to common master branch names
common_masters = ['master', 'main', 'develop']
for branch in common_masters:
if branch_exists(repo, branch):
return branch
raise ValueError(f"Base branch '{requested_base}' not found and no common master branch exists")
def get_diff_between_branches(repo: Repo, base_branch: str, current_branch: str):
"""Get diff between two branches."""
try:
base_commit = repo.commit(base_branch)
current_commit = repo.commit(current_branch)
return base_commit.diff(current_commit)
except GitCommandError as e:
raise ValueError(f"Failed to get diff between {base_branch} and {current_branch}: {e}")
def determine_change_type(diff_item: Diff) -> str:
"""Determine the type of change from a diff item."""
if diff_item.new_file:
return 'added'
elif diff_item.deleted_file:
return 'deleted'
else:
return 'modified'
def extract_file_content(diff_item: Diff, is_old: bool = True) -> str:
"""Extract file content from diff item."""
try:
blob = diff_item.a_blob if is_old else diff_item.b_blob
if blob is None:
return ""
return blob.data_stream.read().decode('utf-8', errors='ignore')
except (UnicodeDecodeError, AttributeError):
return ""
def create_git_diff(diff_item: Diff) -> GitDiff:
"""Create GitDiff from git.Diff object."""
file_path = diff_item.a_path or diff_item.b_path or "unknown"
old_content = extract_file_content(diff_item, is_old=True)
new_content = extract_file_content(diff_item, is_old=False)
diff_text = str(diff_item)
change_type = determine_change_type(diff_item)
return GitDiff(
file_path=file_path,
old_content=old_content,
new_content=new_content,
diff_text=diff_text,
change_type=change_type
)
def process_diff_items(diff_index) -> List[GitDiff]:
"""Process all diff items into GitDiff objects."""
return [create_git_diff(item) for item in diff_index]
def filter_reviewable_diffs(diffs: List[GitDiff]) -> List[GitDiff]:
"""Filter diffs to only include reviewable files."""
# TODO: Update this to a more complete list of programming language extensions
reviewable_extensions = {
'.py', '.js', '.ts', '.java', '.cpp', '.c', '.h', '.hpp',
'.go', '.rs', '.rb', '.php', '.cs', '.swift', '.kt',
'.scala', '.clj', '.sh', '.sql', '.yml', '.yaml', '.json'
}
def is_reviewable(diff: GitDiff) -> bool:
path = Path(diff.file_path)
return path.suffix.lower() in reviewable_extensions
return [diff for diff in diffs if is_reviewable(diff)]
def analyze_git_repository(path: Path, base_branch: str) -> GitAnalysis:
"""Analyze git repository and extract diffs for review."""
repo = find_git_repository(path)
current_branch = get_current_branch_name(repo)
resolved_base_branch = get_base_branch(repo, base_branch)
diff_index = get_diff_between_branches(repo, resolved_base_branch, current_branch)
all_diffs = process_diff_items(diff_index)
reviewable_diffs = filter_reviewable_diffs(all_diffs)
return GitAnalysis(
repository_path=path,
current_branch=current_branch,
base_branch=resolved_base_branch,
diffs=reviewable_diffs,
total_files_changed=len(reviewable_diffs)
)

View file

@ -6,6 +6,7 @@ from rich.console import Console
from rich.text import Text from rich.text import Text
from .configs import ReviewConfig from .configs import ReviewConfig
from .git_diff import GitAnalysis
def create_console() -> Console: def create_console() -> Console:
@ -42,3 +43,39 @@ def log_error(error: str) -> None:
"""Log error message with colored output.""" """Log error message with colored output."""
console = create_error_console() console = create_error_console()
console.print(f"Error: {error}") console.print(f"Error: {error}")
def log_git_analysis_start(path: Path, base_branch: str) -> None:
"""Log the start of git analysis."""
console = create_console
console.print(f"[dim]Analyzing git repository at:[/dim] [yellow]{path}[/yellow]")
console.print(f"[dim]Base branch:[/dim] [cyan]{base_branch}[/cyan]")
def log_git_analysis_result(analysis: GitAnalysis) -> None:
"""Log the results of git analysis."""
console = create_console()
console.print(
f"[dim]Current branch:[/dim] [green]{analysis.current_branch}[/green]"
)
console.print(f"[dim]Comparing against:[/dim] [cyan]{analysis.base_branch}[/cyan]")
console.print(
f"[dim]Files changed:[/dim] [yellow]{analysis.total_files_changed}[/yellow]"
)
if analysis.diffs:
console.print("\n[bold]Changed files:[/bold]")
for diff in analysis.diffs:
change_color = {
"added": "green",
"modified": "yellow",
"deleted": "red",
}.get(diff.change_type, "white")
console.print(
f" [{change_color}]{diff.change_type.upper():>8}[/{change_color}] {diff.file_path}"
)
else:
console.print("[dim]No reviewable files changed.[/dim]")
console.print()

39
uv.lock generated
View file

@ -2,6 +2,30 @@ version = 1
revision = 1 revision = 1
requires-python = ">=3.13" requires-python = ">=3.13"
[[package]]
name = "gitdb"
version = "4.0.12"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "smmap" },
]
sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794 },
]
[[package]]
name = "gitpython"
version = "3.1.44"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "gitdb" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 },
]
[[package]] [[package]]
name = "markdown-it-py" name = "markdown-it-py"
version = "3.0.0" version = "3.0.0"
@ -37,11 +61,15 @@ name = "reviewllama"
version = "0.1.0" version = "0.1.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "gitpython" },
{ name = "rich" }, { name = "rich" },
] ]
[package.metadata] [package.metadata]
requires-dist = [{ name = "rich", specifier = ">=14.0.0" }] requires-dist = [
{ name = "gitpython", specifier = ">=3.1.44" },
{ name = "rich", specifier = ">=14.0.0" },
]
[[package]] [[package]]
name = "rich" name = "rich"
@ -55,3 +83,12 @@ sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0a
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 }, { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229 },
] ]
[[package]]
name = "smmap"
version = "5.0.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303 },
]