Source code for utils.mappers

"""Module containing helper functions for GitHub issues mapping."""

import pickle
import re
from collections import defaultdict
from dataclasses import dataclass
from datetime import timedelta
from pathlib import Path
from typing import Any, List

from django.conf import settings
from django.db import transaction
from django.http import Http404
from django.shortcuts import get_object_or_404

from core.models import (
    Contribution,
    Contributor,
    Cycle,
    Handle,
    Issue,
    IssueStatus,
    Reward,
    RewardType,
    SocialPlatform,
)
from issues.main import IssueProvider
from utils.constants.core import (
    EXCLUDED_CONTRIBUTORS,
    GITHUB_ISSUES_START_DATE,
    ISSUE_CREATION_LABEL_CHOICES,
    REWARDS_COLLECTION,
)
from utils.helpers import read_pickle

URL_EXCEPTIONS = ["discord.com/invite"]
REWARD_LABELS = [
    entry[0].split("]")[0].strip("[]")
    for entry in REWARDS_COLLECTION
    if entry and isinstance(entry[0], str)
]
REWARD_PATTERN = re.compile(rf"^\[({'|'.join(REWARD_LABELS)})(1|2|3)\]")


## HELPERS
[docs] @dataclass class CustomIssue: """A simple data class for issues and comments.""" issue: Any comments: List[Any]
def _build_reward_mapping(): """Build mapping from issue detection labels to active rewards. :return: mapping from label type to reward object :rtype: dict of str: :class:`core.models.Reward` """ reward_mapping = {} # Regex pattern to extract code between brackets: [CODE] Description bracket_pattern = r"\[([^\]]+)\]" for label_type, label_name in ISSUE_CREATION_LABEL_CHOICES[:4]: # Find first occurrence in REWARDS_COLLECTION where # label_name appears in the first element reward_config = next( config for config in REWARDS_COLLECTION if label_name.lower() in config[0].lower() ) # Extract label code using regex (e.g., "AT" from "[AT] Admin Task") match = re.search(bracket_pattern, reward_config[0]) if match: label_code = match.group(1) # Get the content between brackets amount = reward_config[1] # Get the amount # Find active reward with matching type label and amount try: reward = Reward.objects.get( type__label=label_code, amount=amount, active=True ) reward_mapping[label_type] = reward except Reward.DoesNotExist: print(f"No active reward found for {label_code} with amount {amount}") continue except Reward.MultipleObjectsReturned: reward = Reward.objects.filter( type__label=label_code, amount=amount, active=True ).first() reward_mapping[label_type] = reward print(f"Multiple rewards found for {label_code}, using first one") else: print(f"Could not extract label code from: {reward_config[0]}") return reward_mapping def _extract_url_text(body, platform_id): """Extract URL from issue body in markdown format. :param body: GitHub issue body text :type body: str :param platform_id: platform ID to help identify relevant URLs :type platform_id: int :return: extracted URL if found, None otherwise :rtype: str or None """ # Get platform name for URL matching try: platform = SocialPlatform.objects.get(id=platform_id) platform_name = platform.name.lower() except SocialPlatform.DoesNotExist: return None # Look for markdown links [text](url) url_pattern = r"\[[^\]]*\]\(([^)]+)\)" matches = re.findall(url_pattern, body) for url in matches: if ( url.startswith("http") and platform_name in url.lower() and not any(text in url.lower() for text in URL_EXCEPTIONS) ): return url return None def _fetch_and_categorize_issues(issue_tracker_api_token, refetch=False): """Fetch all GitHub issues and return them categorized. :param issue_tracker_api_token: GitHub API token :type issue_tracker_api_token: str :param refetch: should recorded issues be refetched or not :type refetch: Boolean :var github_issues: collection of categorized GitHub issue instances :type github_issues: dict :var counter: currently processed issue ordinal :type counter: int :var issue: currently processed issue :type issue: :class:`github.Issue.Issue` :return: collection of categorized GitHub issue instances :rtype: dict """ github_issues = _load_saved_issues() if not refetch else defaultdict(list) if not issue_tracker_api_token: return github_issues issue = None for counter, issue in enumerate( IssueProvider( None, issue_tracker_api_token=issue_tracker_api_token ).fetch_issues( state="all", since=github_issues.get("timestamp", GITHUB_ISSUES_START_DATE), ) ): if issue.pull_request: continue comments = ( [comment.body for comment in issue.get_comments()] if issue.comments else [] ) custom_issue = CustomIssue(issue, comments) github_issues[issue.state].append(custom_issue) if divmod(counter, 10)[1] == 0: print("Issue number: ", issue.number) _save_issues(github_issues, issue.updated_at) # # Remove duplicates # github_issues["closed"] = sorted( # list(set(github_issues["closed"])), key=lambda i: getattr(i, "number") # ) # github_issues["open"] = sorted( # list(set(github_issues["open"])), key=lambda i: getattr(i, "number") # ) # _save_issues(github_issues, github_issues["timestamp"]) if issue: _save_issues(github_issues, issue.updated_at + timedelta(seconds=10)) print( "Number of issues: " f"{len(github_issues.get('closed', [])) + len(github_issues.get('open', []))}" ) return github_issues def _identify_contributor_from_text(text, contributors): """Identify contributor from issue body by matching contributor info and handles. :param body: GitHub issue body and comments text :type body: str :param contributors: mapping from contributor info to ID :type contributors: dict of str: int :return: contributor ID if found, None otherwise :rtype: int or None """ # Handle None or empty text if not text: return None # Convert body to lowercase for case-insensitive matching text_lower = text.lower() for contributor_info, contributor_id in contributors.items(): # If contributor info doesn't have parentheses, search for the whole info if "(" not in contributor_info: # For simple format, require exact word match to avoid false positives if contributor_info.lower() in text_lower: return contributor_id else: # If contributor info has parentheses, extract and search for handles # Format: "Name (handle1, handle2, ...)" name_part = contributor_info.split("(")[0].strip() handles_part = contributor_info[contributor_info.index("(") + 1 : -1] # Split handles by comma and clean up handles = [handle.strip() for handle in handles_part.split(",")] # Search for the name part in body - require exact word match if name_part.lower() in text_lower: return contributor_id # Search for individual handles in body - allow partial matches for handle in handles: if handle.lower() in text_lower: return contributor_id return None def _identify_contributor_from_user(user, contributors, strict=True): """Identify contributor from issue body by matching contributor info and handles. :param user: GitHub username :type user: str :param contributors: mapping from contributor info to ID :type contributors: dict of str: int :return: contributor ID if found, None otherwise :rtype: int or None """ # Handle None or empty user if not user: return None # Convert to lowercase for case-insensitive matching user_lower = user.lower() handle = "g@" + user_lower if strict else user_lower for contributor_info, contributor_id in contributors.items(): if handle in contributor_info.lower(): return contributor_id return None def _identify_platform_from_text(text, platforms): """Identify platform from issue body by matching platform names. :param body: GitHub issue body and comments text :type body: str :param platforms: mapping from platform name to ID :type platforms: dict of str: int :return: platform ID if found, None otherwise :rtype: int or None """ for platform_name, platform_id in platforms.items(): if platform_name.lower() in text.lower(): return platform_id return None def _identify_reward_from_labels(labels, reward_mapping): """Identify reward based on GitHub issue labels. :param labels: list of GitHub issue label objects :type labels: list of :class:`github.Label.Label` :param reward_mapping: mapping from label types to rewards :type reward_mapping: dict of str: :class:`core.models.Reward` :return: reward object if found, None otherwise :rtype: :class:`core.models.Reward` or None """ for label in labels: label_name = label.name.lower() # Check for exact matches first for label_type in reward_mapping.keys(): if label_type.lower() == label_name: return reward_mapping[label_type] # Check for partial matches for label_type in reward_mapping.keys(): if label_type.lower() in label_name: return reward_mapping[label_type] return None def _identify_reward_from_issue_title(title, active=True): """Checks if the provided title text starts with a valid reward pattern like: [F(1)], [B(2)], [ER(3)], etc. :param title: GitHub issue's title :type title: str :param active: is reward active or not :type active: Boolean return: :class:`core.models.Reward` """ if not title: return None match = REWARD_PATTERN.match(title.strip()) if not match: return None label, level = match.groups() level = int(level) reward_type = get_object_or_404(RewardType, label=label) return Reward.objects.filter(type=reward_type, level=level, active=active).first() def _is_url_github_issue(url): """Check if a URL matches the pattern of a GitHub issue in the configured repository. :param url: URL to check :type url: str :return: GitHub issue number if URL matches pattern, False otherwise :rtype: int or bool :var pattern: regex pattern for GitHub issue URL matching :type pattern: str :var match: regex match object :type match: :class:`re.Match` or None """ pattern = ( rf"^.*github\.com/{settings.ISSUE_TRACKER_OWNER}/" rf"{settings.ISSUE_TRACKER_NAME}/issues/(\d+).*" ) match = re.match(pattern, url) if not match: return False return int(match.groups()[0]) ## I/O def _load_saved_issues(): """Load saved GitHub issues from pickle file. :return: defaultdict containing GitHub issues data """ github_issues = defaultdict(list) path = Path(__file__).resolve().parent.parent / "fixtures" / "github_issues.pkl" data = read_pickle(path) for key in data: github_issues[key] = data[key] return github_issues def _save_issues(github_issues, timestamp): """Save GitHub issues to pickle file with timestamp. :param github_issues: dictionary containing GitHub issues data :param timestamp: timestamp to include in the saved data """ path = Path(__file__).resolve().parent.parent / "fixtures" / "github_issues.pkl" github_issues["timestamp"] = timestamp # Create directory if it doesn't exist path.parent.mkdir(parents=True, exist_ok=True) with open(path, "wb") as pickle_file: pickle.dump(github_issues, pickle_file) ## MAPPING def _create_contributor_from_text(text, contributors): """Create a new contributor from text by extracting handle from common patterns. :param text: GitHub issue body and comments text :type text: str :param contributors: mapping from contributor info to ID :type contributors: dict of str: int :return: tuple of (contributor_id, updated_contributors_dict) or (None, contributors) if no handle found :rtype: tuple (int or None, dict) """ if not text: return None, contributors # Single pattern: "By " followed by any string, then "in" or "on" followed by platform with or without brackets pattern = r"By ([\w\s\-_.]+) (?:in|on) \[?(Discord|Twitter|Reddit)\]?" handle = None platform_name = None matches = re.findall(pattern, text, re.IGNORECASE) if matches: handle_candidate, platform_candidate = matches[0] handle = handle_candidate.strip() platform_name = platform_candidate if not handle or not platform_name: return None, contributors platform = SocialPlatform.objects.get(name=platform_name) # Check if contributor with this name already exists existing_contributor = Contributor.objects.from_handle(handle) if existing_contributor: # Contributor exists, create handle if it doesn't exist Handle.objects.get_or_create( contributor=existing_contributor, platform=platform, defaults={"handle": handle}, ) # Update contributors mapping contributors[existing_contributor.info] = existing_contributor.id return existing_contributor.id, contributors else: # Create new contributor new_contributor = Contributor.objects.create(name=handle) Handle.objects.create( contributor=new_contributor, platform=platform, handle=handle ) # Update contributors mapping contributors[new_contributor.info] = new_contributor.id return new_contributor.id, contributors def _create_issues_bulk(issue_assignments): """Bulk create issues and assign them to contributions in optimized database operations. This function processes issue-contribution assignments in bulk to minimize database round-trips. It creates missing issues and updates contributions with their assigned issues using bulk operations. :param issue_assignments: list of issue number and contribution ID pairs to assign :type issue_assignments: list of tuple (int, int) :var unique_issue_numbers: set of distinct GitHub issue numbers to process :type unique_issue_numbers: set of int :var existing_issues: existing Issue objects from database :type existing_issues: QuerySet of :class:`core.models.Issue` :var existing_issue_numbers: set of issue numbers that already exist in database :type existing_issue_numbers: set of int :var issues_to_create: list of Issue instances to create in bulk :type issues_to_create: list of :class:`core.models.Issue` :var fetch_issues_dict: mapping from issue number to Issue instance :type fetch_issues_dict: dict of int: :class:`core.models.Issue` :var contribution_updates: list of contribution ID and Issue instance pairs :type contribution_updates: list of tuple (int, :class:`core.models.Issue`) :var contribution_ids: list of contribution IDs to update :type contribution_ids: list of int :var contributions: Contribution objects to be updated :type contributions: QuerySet of :class:`core.models.Contribution` :var issue_by_contribution_id: mapping from contribution ID to assigned Issue :type issue_by_contribution_id: dict of int: :class:`core.models.Issue` """ if not issue_assignments: return # Get unique issue numbers unique_issue_numbers = {number: status for number, _, status in issue_assignments} # Get existing issues existing_issues = Issue.objects.filter(number__in=unique_issue_numbers) existing_issue_numbers = set(existing_issues.values_list("number", flat=True)) # Create missing issues issues_to_create = [ Issue(number=number, status=status) for number, status in unique_issue_numbers.items() if number not in existing_issue_numbers ] if issues_to_create: Issue.objects.bulk_create(issues_to_create) # Get all issues (newly created + existing) fetch_issues_dict = { issue.number: issue for issue in Issue.objects.filter(number__in=unique_issue_numbers) } # Prepare contribution updates contribution_updates = [] for issue_number, contribution_id, _ in issue_assignments: if issue_number in fetch_issues_dict: contribution_updates.append( (contribution_id, fetch_issues_dict[issue_number]) ) # Bulk update contributions if contribution_updates: contribution_ids = [cont_id for cont_id, _ in contribution_updates] contributions = Contribution.objects.filter(id__in=contribution_ids) # Create mapping for updates issue_by_contribution_id = { cont_id: issue for cont_id, issue in contribution_updates } # Update in bulk for contribution in contributions: contribution.issue = issue_by_contribution_id[contribution.id] Contribution.objects.bulk_update(contributions, ["issue"]) @transaction.atomic def _map_closed_addressed_issues(github_issues): """Fetch GitHub issues with "addressed" label and create contributions. This function processes GitHub issues labeled as "addressed" and: 1. Creates Issue objects with ADDRESSED status 2. Identifies contributors from issue text and user 3. Creates new contributors from text patterns if none found 4. Identifies platforms from issue text 5. Determines rewards based on issue labels 6. Extracts URLs from issue bodies 7. Creates contributions for each identified contributor :param github_issues: collection of GitHub issue instances :type github_issues: list :return: True if operation completed successfully, False if no issues found :rtype: bool """ # Filter issues with "addressed" label addressed_issues = [ issue for issue in github_issues if hasattr(issue, "issue") and hasattr(issue.issue, "labels") and any( (getattr(label, "name", label) or "").lower() == "addressed" for label in issue.issue.labels or [] ) ] if not addressed_issues: return False # Fetch existing rewards mapping reward_mapping = _build_reward_mapping() # Fetch all contributors and create info mapping contributors = { c.info: c.id for c in Contributor.objects.all() if not any(u in c.info for u in EXCLUDED_CONTRIBUTORS) } # Fetch all platforms by name platforms = { platform.name: platform.id for platform in SocialPlatform.objects.all() } # Define current cycle (using latest end date as specified) cycle = Cycle.objects.latest("end") # Process each addressed issue for github_issue in addressed_issues: # Skip issues with no body/comments or internal titles if not (github_issue.issue.body or github_issue.comments): continue if "[Internal]" in github_issue.issue.title: continue number = github_issue.issue.number # Combine body and comments for text analysis search_text = "\n".join([github_issue.issue.body or "", *github_issue.comments]) # Identify platform from issue body platform_id = _identify_platform_from_text(search_text, platforms) if not platform_id: # Fallback to GitHub platform_id = platforms.get("GitHub") reward = _identify_reward_from_issue_title(github_issue.issue.title) if not reward: reward = _identify_reward_from_labels( github_issue.issue.labels, reward_mapping ) if not reward: continue # Skip if no reward identified # Extract URL from issue body url = _extract_url_text(search_text, platform_id) # Get or create issue with ADDRESSED status try: issue = Issue.objects.get(number=number) # Update status to ADDRESSED if it was previously CREATED if issue.status == IssueStatus.CREATED: issue.status = IssueStatus.ADDRESSED issue.save() except Issue.DoesNotExist: issue = Issue.objects.create(number=number, status=IssueStatus.ADDRESSED) # Identify contributors - try multiple methods contributor_ids = set() # Method 1: Identify from issue user user_contributor_id = _identify_contributor_from_user( github_issue.issue.user.login, contributors, strict=False ) if user_contributor_id: contributor_ids.add(user_contributor_id) # Method 2: Identify from issue text text_contributor_id = _identify_contributor_from_text(search_text, contributors) if text_contributor_id: contributor_ids.add(text_contributor_id) # Method 3: Create new contributor from text patterns if none found if not contributor_ids: created_contributor_id, updated_contributors = ( _create_contributor_from_text(search_text, contributors) ) if created_contributor_id: contributor_ids.add(created_contributor_id) # Update the contributors dict with the new contributor contributors.update(updated_contributors) else: continue # Create contributions for each identified contributor for contributor_id in contributor_ids: # Check if contribution already exists for this issue and contributor existing_contribution = Contribution.objects.filter( issue=issue, contributor_id=contributor_id ).first() if not existing_contribution: Contribution.objects.create( contributor_id=contributor_id, cycle=cycle, platform_id=platform_id, reward=reward, issue=issue, percentage=1, # Default as specified url=url, confirmed=True, # As specified ) return True @transaction.atomic def _map_closed_archived_issues(github_issues): """Fetch GitHub issues and assign them to contributions based on URL matching. This function traverses all closed GitHub issues and attempts to match them with existing contributions by searching for contribution URLs in the issue bodies. When a match is found, creates an Issue record and assigns it to the contribution. :param github_issues: collection of GitHub issue instances :type github_issues: list :var contributions: all the existing contribution instances :type contributions: QuerySet of :class:`core.models.Contribution` :var url_to_contribution: mapping from URL to contribution instance :type url_to_contribution: dict of str: :class:`core.models.Contribution` :var issue_assignments: collection of issue number, ID and status to process :type issue_assignments: set of tuple (int, int) :var unprocessed_github_issues: collection of unprocessed GitHub issues :type unprocessed_github_issues: list :return: list """ if not github_issues: return [] # Get all contributions in one query contributions = Contribution.objects.all().only("id", "url") if not contributions: return [] # Create a mapping from GitHub issue number to issue object for quick lookup github_issues_by_number = { issue.issue.number: issue.issue for issue in github_issues } # Collect all assignments in memory first issue_assignments = set() # Process each contribution and try to find matching GitHub issues without_url = [] for contribution in contributions: if not contribution.url: without_url.append(contribution.id) continue # Method 1: Check if contribution URL is a GitHub issue URL issue_number = _is_url_github_issue(contribution.url) if issue_number and issue_number in github_issues_by_number: # This contribution URL points directly to a GitHub issue issue_assignments.add((issue_number, contribution.id, IssueStatus.ARCHIVED)) continue # Skip body matching if we found a direct GitHub issue match # Method 2: Search through issues for this contribution's URL in their bodies for github_issue in github_issues: search_text = "\n".join( [github_issue.issue.body or "", *github_issue.comments] ) if contribution.url in search_text: issue_assignments.add( (github_issue.issue.number, contribution.id, IssueStatus.ARCHIVED) ) break # One issue per contribution (first match found) if without_url: print("MISSING URL:", without_url) # Process all assignments in bulk _create_issues_bulk(list(issue_assignments)) unprocessed_github_issues = [ issue for issue in github_issues if issue.issue.number not in {_number for _number, _, _ in issue_assignments} and "[Internal]" not in issue.issue.title and "wontfix" not in [label.name for label in issue.issue.labels] and "addressed" not in [label.name for label in issue.issue.labels] ] return unprocessed_github_issues @transaction.atomic def _map_open_issues(github_issues): """Fetch open GitHub issues and create contributions for detected contributors. This function retrieves all open GitHub issues and attempts to: 1. Identify contributors from issue bodies 2. Identify platforms from issue bodies 3. Determine rewards based on issue labels 4. Extract URLs from issue bodies 5. Create contributions with the detected information :param github_issues: collection of GitHub issue instances :type github_issues: list :var contributors: mapping from contributor info to contributor ID :type contributors: dict of str: int :var platforms: mapping from platform name to platform ID :type platforms: dict of str: int :var cycle: current active cycle :type cycle: :class:`core.models.Cycle` :var reward_mapping: mapping from label types to reward configurations :type reward_mapping: dict of str: tuple :return: True if operation completed successfully, False if no token provided :rtype: bool """ if not github_issues: return False # Fetch existing rewards mapping reward_mapping = _build_reward_mapping() # Fetch all contributors and create info mapping contributors = { contributor.info: contributor.id for contributor in Contributor.objects.all() if not any(username in contributor.info for username in EXCLUDED_CONTRIBUTORS) } # Fetch all platforms by name platforms = { platform.name: platform.id for platform in SocialPlatform.objects.all() } # Define current cycle cycle = Cycle.objects.latest("start") # Process each open issue for github_issue in github_issues: if ( not (github_issue.issue.body or github_issue.comments) or "[Internal]" in github_issue.issue.title ): continue number = github_issue.issue.number search_text = "\n".join([github_issue.issue.body or "", *github_issue.comments]) # Identify contributor from issue user or text contributor_id = _identify_contributor_from_user( github_issue.issue.user.login, contributors, strict=False ) if not contributor_id: contributor_id = _identify_contributor_from_text(search_text, contributors) if not contributor_id: print("No contributor for GitHub issue", number) continue # Skip if no contributor identified # Identify platform from issue body platform_id = _identify_platform_from_text(search_text, platforms) if not platform_id: platform_id = next( platform_id for platform_name, platform_id in platforms.items() if platform_name == "GitHub" ) reward = _identify_reward_from_issue_title(github_issue.issue.title) if not reward: reward = _identify_reward_from_labels( github_issue.issue.labels, reward_mapping ) if not reward: print("No reward for GitHub issue", number) continue # Skip if no reward identified # Extract URL from issue body url = _extract_url_text(search_text, platform_id) # Get or create issue try: issue = get_object_or_404(Issue, number=number, status=IssueStatus.CREATED) except Http404: issue = Issue.objects.create(number=number, status=IssueStatus.CREATED) # Create contribution Contribution.objects.create( contributor_id=contributor_id, cycle=cycle, platform_id=platform_id, reward=reward, issue=issue, percentage=1, url=url, confirmed=True, ) return True @transaction.atomic def _map_unprocessed_closed_archived_issues(github_issues): """Create contributions based on closing date from GitHub issues. This function processes GitHub issues labeled as "archived" and: 1. Determines the appropriate cycle based on issue closing date 2. Creates Issue objects with ARCHIVED status 3. Identifies contributors from issue text and user 4. Creates new contributors from text patterns if none found 5. Identifies platforms from issue text 6. Determines rewards based on issue labels 7. Extracts URLs from issue bodies 8. Creates contributions for each identified contributor :param github_issues: collection of GitHub issue instances :type github_issues: list :return: True if operation completed successfully, False if no issues found :rtype: bool """ # Filter issues with "archived" label archived_issues = [ issue for issue in github_issues if any(label.name.lower() == "archived" for label in issue.issue.labels) ] if not archived_issues: return False # Fetch existing rewards mapping reward_mapping = _build_reward_mapping() # Fetch all contributors and create info mapping contributors = { contributor.info: contributor.id for contributor in Contributor.objects.all() if not any(username in contributor.info for username in EXCLUDED_CONTRIBUTORS) } # Fetch all platforms by name platforms = { platform.name: platform.id for platform in SocialPlatform.objects.all() } # Process each archived issue for github_issue in archived_issues: if ( not (github_issue.issue.body or github_issue.comments) or "[Internal]" in github_issue.issue.title ): continue number = github_issue.issue.number # Skip if issue already has an associated Issue record if Issue.objects.filter(number=number).exists(): continue # Determine cycle based on closing date if not github_issue.issue.closed_at: print(f"No closing date for archived GitHub issue {number}, skipping") continue cycle = Cycle.objects.filter( start__lte=github_issue.issue.closed_at, end__gte=github_issue.issue.closed_at, ).first() if not cycle: print( f"No cycle found for closing date {github_issue.issue.closed_at}" " in issue {number}, skipping" ) continue # Combine body and comments for text analysis search_text = "\n".join([github_issue.issue.body or "", *github_issue.comments]) # Identify platform from issue body platform_id = _identify_platform_from_text(search_text, platforms) if not platform_id: platform_id = next( platform_id for platform_name, platform_id in platforms.items() if platform_name == "GitHub" ) reward = _identify_reward_from_issue_title( github_issue.issue.title, active=False ) if not reward: reward = _identify_reward_from_labels( github_issue.issue.labels, reward_mapping ) if not reward: print(f"No reward for archived GitHub issue {number}") continue # Skip if no reward identified # Extract URL from issue body url = _extract_url_text(search_text, platform_id) # Create issue with ARCHIVED status issue = Issue.objects.create(number=number, status=IssueStatus.ARCHIVED) # Identify contributors - try multiple methods contributor_ids = set() # Method 1: Identify from issue user user_contributor_id = _identify_contributor_from_user( github_issue.issue.user.login, contributors, strict=False ) if user_contributor_id: contributor_ids.add(user_contributor_id) # Method 2: Identify from issue text text_contributor_id = _identify_contributor_from_text(search_text, contributors) if text_contributor_id: contributor_ids.add(text_contributor_id) # Method 3: Create new contributor from text patterns if none found if not contributor_ids: created_contributor_id, contributors = _create_contributor_from_text( search_text, contributors ) if created_contributor_id: contributor_ids.add(created_contributor_id) else: print(f"No contributors found for archived GitHub issue {number}") continue # Create contributions for each identified contributor for contributor_id in contributor_ids: Contribution.objects.create( contributor_id=contributor_id, cycle=cycle, platform_id=platform_id, reward=reward, issue=issue, percentage=1, # Default as specified url=url, confirmed=True, # As specified ) return True
[docs] def map_github_issues(issue_tracker_api_token=""): """Fetch existing GitHub issues and create database records from them. :param issue_tracker_api_token: GitHub API token :type issue_tracker_api_token: str :var github_issues: collection of GitHub issue instances :type github_issues: list :var closed_size: number of issues created from closed GitHub issues :type closed_size: int :var size: number of issues created from GitHub issues :type size: int :return: Boolean """ github_issues = _fetch_and_categorize_issues(issue_tracker_api_token) print("Fetched closed issues size: ", len(github_issues.get("closed", []))) unprocessed_github_issues = _map_closed_archived_issues( github_issues.get("closed", []) ) closed_size = len(Issue.objects.all()) print("Issues created from closed archived GitHub issues: ", closed_size) _map_unprocessed_closed_archived_issues(unprocessed_github_issues) print( "Issues created from unprocessed archived GitHub issues: ", len(Issue.objects.all()) - closed_size, ) closed_size = len(Issue.objects.all()) _map_closed_addressed_issues(github_issues.get("closed", [])) print( "Issues created from closed addressed GitHub issues: ", len(Issue.objects.all()) - closed_size, ) closed_size = len(Issue.objects.all()) print("Issues created from closed GitHub issues: ", closed_size) print("Fetched open issues size: ", len(github_issues.get("open", []))) _map_open_issues(github_issues.get("open", [])) size = len(Issue.objects.all()) print("Issues created from open GitHub issues: ", size - closed_size) print("Total number of issues created: ", size) return False