diff --git a/.github/workflows/goby.yml b/.github/workflows/goby.yml new file mode 100644 index 0000000..721de3a --- /dev/null +++ b/.github/workflows/goby.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python + +name: goby + +on: + schedule: + - cron: '0 */8 * * *' + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + pip install requests + - name: goby + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + run: | + python run.py + - name: Commit and push if changed + run: | + git diff + git config --global user.email "auto" + git config --global user.name "auto" + git add . + git commit --allow-empty -m $(date) + git push diff --git a/README.md b/README.md index d9018e1..46ff4d2 100644 --- a/README.md +++ b/README.md @@ -1 +1,2 @@ -"# Goby" +# Goby POC收集 + diff --git a/run.py b/run.py new file mode 100644 index 0000000..49f353b --- /dev/null +++ b/run.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +import os +import hashlib +import tempfile +import shutil +import traceback +import requests + + +requests.packages.urllib3.disable_warnings() + +# 获取文件md5 + + +def file2md5(_file): + with open(_file, 'rb') as f: + md5obj = hashlib.md5() + md5obj.update(f.read()) + return md5obj.hexdigest() + +# 搜索代码 + + +def searchcode(keyword, page=1, per_page=100): + headers = { + 'Authorization': 'token {}'.format(os.getenv('GH_TOKEN')), + 'Connection': 'close', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36' + } + data = {'q': keyword, 'sort': 'indexed', + 'order': 'desc', 'page': page, 'per_page': per_page} + try: + rj = requests.get('https://api.github.com/search/code', params=data, + headers=headers, verify=False, allow_redirects=False, timeout=10).json() + return rj + except: + return {} + + +if __name__ == '__main__': + # 旧poc + old_poc = {} + for path in ['go', 'json']: + for file in os.listdir(path): + if not file.endswith('.json') and not file.endswith('.go'): + continue + old_poc[file2md5(os.path.join(path, file))] = 0 + + root_path = os.path.dirname(os.path.abspath(__file__)) + print(root_path) + # 搜索代码获取项目主页 + html_urls = [] + for keyword in ['GobyQuery+language:Go', 'GobyQuery+language:Json']: + try: + rs = searchcode(keyword, page=1, per_page=100) + html_urls += [item['repository']['html_url'] + for item in rs.get('items', []) if item.get('repository', {}).get('html_url')] + except: + traceback.print_exc() + html_urls = set(html_urls) + for url in html_urls: + print(url) + try: + temp_dir = tempfile.TemporaryDirectory().name + if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + os.chdir(temp_dir) + # clone项目 + os.system('git clone {}'.format(url)) + author, repo = url[19:].split('/', 1) + repo_path = os.path.join(temp_dir, repo) + print(repo_path) + # 复制poc + if os.path.exists(repo_path): + for root, _, files in os.walk(repo_path): + for file in files: + if not file.endswith('.json') and not file.endswith('.go'): + continue + file_path = os.path.join(root, file) + print(file_path) + with open(file_path, 'r', encoding='utf8') as f: + content = f.read() + if 'GobyQuery' in content and 'ScanSteps' in content: + md5 = file2md5(file_path) + if md5 not in old_poc: + if file.endswith('.json'): + shutil.copyfile(file_path, os.path.join( + root_path, 'json', file)) + if file.endswith('.go'): + shutil.copyfile(file_path, os.path.join( + root_path, 'go', file)) + os.chdir(root_path) + except: + traceback.print_exc() + os.chdir(root_path)