This commit is contained in:
test 2022-11-25 14:39:58 +08:00
parent 9c1b071a8f
commit b75cc6fc34
3 changed files with 135 additions and 1 deletions

36
.github/workflows/goby.yml vendored Normal file
View File

@ -0,0 +1,36 @@
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
name: goby
on:
schedule:
- cron: '0 */8 * * *'
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v3
with:
python-version: "3.10"
- name: Install dependencies
run: |
pip install requests
- name: goby
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
run: |
python run.py
- name: Commit and push if changed
run: |
git diff
git config --global user.email "auto"
git config --global user.name "auto"
git add .
git commit --allow-empty -m $(date)
git push

View File

@ -1 +1,2 @@
"# Goby"
# Goby POC收集

97
run.py Normal file
View File

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
import hashlib
import tempfile
import shutil
import traceback
import requests
requests.packages.urllib3.disable_warnings()
# 获取文件md5
def file2md5(_file):
with open(_file, 'rb') as f:
md5obj = hashlib.md5()
md5obj.update(f.read())
return md5obj.hexdigest()
# 搜索代码
def searchcode(keyword, page=1, per_page=100):
headers = {
'Authorization': 'token {}'.format(os.getenv('GH_TOKEN')),
'Connection': 'close',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36'
}
data = {'q': keyword, 'sort': 'indexed',
'order': 'desc', 'page': page, 'per_page': per_page}
try:
rj = requests.get('https://api.github.com/search/code', params=data,
headers=headers, verify=False, allow_redirects=False, timeout=10).json()
return rj
except:
return {}
if __name__ == '__main__':
# 旧poc
old_poc = {}
for path in ['go', 'json']:
for file in os.listdir(path):
if not file.endswith('.json') and not file.endswith('.go'):
continue
old_poc[file2md5(os.path.join(path, file))] = 0
root_path = os.path.dirname(os.path.abspath(__file__))
print(root_path)
# 搜索代码获取项目主页
html_urls = []
for keyword in ['GobyQuery+language:Go', 'GobyQuery+language:Json']:
try:
rs = searchcode(keyword, page=1, per_page=100)
html_urls += [item['repository']['html_url']
for item in rs.get('items', []) if item.get('repository', {}).get('html_url')]
except:
traceback.print_exc()
html_urls = set(html_urls)
for url in html_urls:
print(url)
try:
temp_dir = tempfile.TemporaryDirectory().name
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
os.chdir(temp_dir)
# clone项目
os.system('git clone {}'.format(url))
author, repo = url[19:].split('/', 1)
repo_path = os.path.join(temp_dir, repo)
print(repo_path)
# 复制poc
if os.path.exists(repo_path):
for root, _, files in os.walk(repo_path):
for file in files:
if not file.endswith('.json') and not file.endswith('.go'):
continue
file_path = os.path.join(root, file)
print(file_path)
with open(file_path, 'r', encoding='utf8') as f:
content = f.read()
if 'GobyQuery' in content and 'ScanSteps' in content:
md5 = file2md5(file_path)
if md5 not in old_poc:
if file.endswith('.json'):
shutil.copyfile(file_path, os.path.join(
root_path, 'json', file))
if file.endswith('.go'):
shutil.copyfile(file_path, os.path.join(
root_path, 'go', file))
os.chdir(root_path)
except:
traceback.print_exc()
os.chdir(root_path)