# To add a new cell, type '# %%' # To add a new markdown cell, type '# %% [markdown]' # %% import requests from requests_oauthlib import OAuth2Session import os import json import pandas as pd import time from datetime import datetime # %% MAXTIME = 365 RESULT_FOLDER = "results" CLIENTID = "hoernschen" TOKEN = "8f8f5c00e15f7ae7dc6dcbc38b01015c1c1a51d9" FILE = "decentral_communication_protocols.csv" # %% def getGithubOAuthSession(): token = { 'access_token': TOKEN } return OAuth2Session(CLIENTID, token=token) # %% def createCSV(pathToFile, list): df = pd.DataFrame(list) df.to_csv(pathToFile) # %% def getCommits(repoName, oAuthSession): commitList = [] devList = [] headers = {'Accept': 'application/vnd.github.mercy-preview+json'} commitsResponseJson = None i = 0 commitsTooOld = False while commitsResponseJson is None or len(commitsResponseJson) > 0: commitsResponse = oAuthSession.get('https://api.github.com/repos/' + repoName + '/commits?page=' + str(i), headers=headers) if commitsResponse.status_code == 200: commitsResponseJson = commitsResponse.json() i = i + 1 for commit in commitsResponseJson: if MAXTIME is not None and (datetime.now() - datetime.strptime(commit['commit']['author']['date'], '%Y-%m-%dT%H:%M:%SZ')).days > MAXTIME: commitsTooOld = True break committer = commit['committer'] if committer is not None and "login" in committer.keys() and committer['login'] not in devList: devList.append(committer['login']) commitList.append(commit) if commitsTooOld: break elif commitsResponse.status_code == 409: # empty repo break else: time.sleep(3600) print("Error in Http Request (commitsRequest):", commitsResponse.status_code, commitsResponse.text) return commitList, devList # %% def getIssues(repoName, oAuthSession): issueList = [] devList = [] headers = {'Accept': 'application/vnd.github.mercy-preview+json'} issuesResponseJson = None i = 0 issuesTooOld = False while issuesResponseJson is None or len(issuesResponseJson) > 0: issuesResponse = oAuthSession.get('https://api.github.com/repos/' + repoName + '/issues?state=all&page=' + str(i), headers=headers) if issuesResponse.status_code == 200: i = i + 1 issuesResponseJson = issuesResponse.json() for issue in issuesResponseJson: if MAXTIME is not None and (datetime.now() - datetime.strptime(issue['created_at'], '%Y-%m-%dT%H:%M:%SZ')).days > MAXTIME: issuesTooOld = True break assignee = issue['assignee'] assignees = issue['assignees'] if assignee is not None and 'login' in assignee.keys() and assignee['login'] not in devList: devList.append(assignee['login']) for assignee in assignees: if 'login' in assignee.keys() and assignee['login'] not in devList: devList.append(assignee['login']) issueList.append(issue) if issuesTooOld: break elif issuesResponse.status_code == 409: # empty repo break else: print("Error in Http Request (issuesRequest):", issuesResponse.status_code, issuesResponse.text) time.sleep(3600) return issueList, devList # %% def getRepoFacts(repoName, oAuthSession): devList = [] issueList = [] commitList = [] issueList, devListIssues = getIssues(repoName, oAuthSession) commitList, devListCommits = getCommits(repoName, oAuthSession) devList.append(devListIssues) for dev in devListCommits: if dev not in devList: devList.append(dev) return issueList, commitList, devList # %% def getTopicFacts(topicName): amountRepos = 0 amountStars = 0 amountIssues = 0 amountCommits = 0 amountDevs = 0 repoNameList = [] repoList = [] issueList = [] commitList = [] devList = [] oAuthSession = getGithubOAuthSession() headers = {'Accept': 'application/vnd.github.mercy-preview+json'} topicResponse = oAuthSession.get('https://api.github.com/search/repositories?q=topic:' + topicName, headers=headers) if topicResponse.status_code == 200: topicResponseJson = topicResponse.json() i = 0 while "items" in topicResponseJson.keys() and len(topicResponseJson['items']) > 0: i = i + 1 for repo in topicResponseJson['items']: print(repo['full_name']) if repo['full_name'] not in repoNameList: issueListRepo, commitListRepo, devListRepo = getRepoFacts(repo['full_name'], oAuthSession) for issue in issueListRepo: issueList.append(issue) for commit in commitListRepo: commitList.append(commit) for dev in devListRepo: if dev not in devList: devList.append(dev) amountStars = amountStars + int(repo['stargazers_count']) repoNameList.append(repo['full_name']) repoList.append(repo) topicResponseJson = oAuthSession.get('https://api.github.com/search/repositories?q=topic:' + topicName + '&page=' + str(i), headers=headers).json() createCSV(topicName + "_repos.csv", repoList) createCSV(topicName + "_issues.csv", issueList) createCSV(topicName + "_commits.csv", commitList) amountRepos = len(repoList) amountDevs = len(devList) amountIssues = len(issueList) amountCommits = len(commitList) print("Amount Repos:", str(amountRepos)) print("Amount Stars:", str(amountStars)) print("Amount Issues:", str(amountIssues)) print("Amount Commits:", str(amountCommits)) print("Amount Devs:", str(amountDevs)) else: print("Error in Http Request (topicRequest):", topicResponse.status_code, topicResponse.text) return amountRepos, amountStars, amountIssues, amountCommits, amountDevs # %% def main(): amountReposList = [] amountStarsList = [] amountIssuesList = [] amountCommitsList = [] amountDevsList = [] #result_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), RESULT_FOLDER) #if not os.path.exists(result_path): # os.makedirs(result_path) protocols = pd.read_csv(FILE) for i in range(len(protocols)): print("Protocol:", protocols["Github Tag"][i]) amountRepos, amountStars, amountIssues, amountCommits, amountDevs = getTopicFacts(protocols["Github Tag"][i]) amountReposList.append(amountRepos) amountStarsList.append(amountStars) amountIssuesList.append(amountIssues) amountCommitsList.append(amountCommits) amountDevsList.append(amountDevs) protocols.assign(**{ 'amountRepos':amountReposList, 'amountStars':amountStarsList, 'amountIssues':amountIssuesList, 'amountCommits':amountCommitsList, 'amountDevs':amountDevsList }) protocols.to_csv("new.csv") # %% if __name__ == "__main__": print("Start") main() print("End")