feat: Implement build vector store workflow and local script for artifact generation
Browse files- .github/workflows/build-vector-store.yml +111 -0
- .gitignore +1 -1
- scripts/build-vector-store.sh +38 -0
.github/workflows/build-vector-store.yml
CHANGED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Build Vector Store
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
inputs:
|
6 |
+
force_recreate:
|
7 |
+
description: 'Force recreation of the vector store'
|
8 |
+
required: false
|
9 |
+
default: true
|
10 |
+
type: boolean
|
11 |
+
push:
|
12 |
+
branches: [main]
|
13 |
+
paths:
|
14 |
+
- 'data/**'
|
15 |
+
schedule:
|
16 |
+
# Run daily at midnight UTC
|
17 |
+
- cron: '0 0 * * *'
|
18 |
+
|
19 |
+
jobs:
|
20 |
+
build-vector-store:
|
21 |
+
runs-on: ubuntu-latest
|
22 |
+
|
23 |
+
steps:
|
24 |
+
- name: Checkout repository
|
25 |
+
uses: actions/checkout@v3
|
26 |
+
with:
|
27 |
+
fetch-depth: 0 # Fetch all history for proper versioning
|
28 |
+
|
29 |
+
- name: Set up Python
|
30 |
+
uses: actions/setup-python@v4
|
31 |
+
with:
|
32 |
+
python-version: '3.13'
|
33 |
+
cache: 'pip'
|
34 |
+
|
35 |
+
- name: Install dependencies
|
36 |
+
run: |
|
37 |
+
# Install uv
|
38 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh
|
39 |
+
export PATH="$HOME/.cargo/bin:$PATH"
|
40 |
+
|
41 |
+
# Use uv to install dependencies
|
42 |
+
uv pip install --upgrade pip
|
43 |
+
uv sync
|
44 |
+
- name: Build vector store
|
45 |
+
id: build-vector
|
46 |
+
run: |
|
47 |
+
FORCE_RECREATE=${{ github.event.inputs.force_recreate == 'true' || github.event_name == 'workflow_dispatch' }}
|
48 |
+
|
49 |
+
# Build with or without force recreate based on the input
|
50 |
+
if [ "$FORCE_RECREATE" = "true" ]; then
|
51 |
+
uv run python py-src/pipeline.py --force-recreate --ci --output-dir ./artifacts
|
52 |
+
else
|
53 |
+
uv run python py-src/pipeline.py --ci --output-dir ./artifacts
|
54 |
+
fi
|
55 |
+
|
56 |
+
# Set artifacts directory for the next steps
|
57 |
+
echo "ARTIFACTS_DIR=./artifacts" >> $GITHUB_ENV
|
58 |
+
|
59 |
+
# Create a zip file of the vector store
|
60 |
+
cd db
|
61 |
+
zip -r ../artifacts/vector_store.zip vector_store_4
|
62 |
+
cd ..
|
63 |
+
|
64 |
+
- name: Read CI summary
|
65 |
+
id: ci_summary
|
66 |
+
run: |
|
67 |
+
# Read the CI summary file to extract values
|
68 |
+
if [ -f "${{ env.ARTIFACTS_DIR }}/ci_summary.json" ]; then
|
69 |
+
STATUS=$(jq -r '.status' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
|
70 |
+
DOC_COUNT=$(jq -r '.document_count' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
|
71 |
+
|
72 |
+
echo "status=$STATUS" >> $GITHUB_OUTPUT
|
73 |
+
echo "document_count=$DOC_COUNT" >> $GITHUB_OUTPUT
|
74 |
+
else
|
75 |
+
echo "status=failure" >> $GITHUB_OUTPUT
|
76 |
+
echo "document_count=0" >> $GITHUB_OUTPUT
|
77 |
+
fi
|
78 |
+
|
79 |
+
- name: Upload artifacts
|
80 |
+
uses: actions/upload-artifact@v3
|
81 |
+
with:
|
82 |
+
name: vector-store-artifacts
|
83 |
+
path: |
|
84 |
+
${{ env.ARTIFACTS_DIR }}/*.json
|
85 |
+
${{ env.ARTIFACTS_DIR }}/vector_store.zip
|
86 |
+
|
87 |
+
- name: Get version
|
88 |
+
id: get_version
|
89 |
+
run: |
|
90 |
+
# Create a version based on date and document count
|
91 |
+
VERSION="v$(date +'%Y.%m.%d')-docs${{ steps.ci_summary.outputs.document_count }}"
|
92 |
+
echo "version=$VERSION" >> $GITHUB_OUTPUT
|
93 |
+
|
94 |
+
- name: Create GitHub Release
|
95 |
+
id: create_release
|
96 |
+
uses: softprops/action-gh-release@v1
|
97 |
+
if: steps.ci_summary.outputs.status == 'success'
|
98 |
+
with:
|
99 |
+
tag_name: ${{ steps.get_version.outputs.version }}
|
100 |
+
name: Vector Store ${{ steps.get_version.outputs.version }}
|
101 |
+
body: |
|
102 |
+
Vector store updated with ${{ steps.ci_summary.outputs.document_count }} documents.
|
103 |
+
|
104 |
+
This is an automated release created by the vector store build workflow.
|
105 |
+
files: |
|
106 |
+
${{ env.ARTIFACTS_DIR }}/*.json
|
107 |
+
${{ env.ARTIFACTS_DIR }}/vector_store.zip
|
108 |
+
draft: false
|
109 |
+
prerelease: false
|
110 |
+
env:
|
111 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
.gitignore
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
|
2 |
db/
|
3 |
-
|
4 |
|
5 |
# Byte-compiled / optimized / DLL files
|
6 |
__pycache__/
|
|
|
1 |
|
2 |
db/
|
3 |
+
artifacts/
|
4 |
|
5 |
# Byte-compiled / optimized / DLL files
|
6 |
__pycache__/
|
scripts/build-vector-store.sh
CHANGED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
# Script to build vector store locally
|
3 |
+
# Usage: ./scripts/build-vector-store.sh [--force-recreate]
|
4 |
+
|
5 |
+
FORCE_RECREATE=""
|
6 |
+
if [[ "$1" == "--force-recreate" ]]; then
|
7 |
+
FORCE_RECREATE="--force-recreate"
|
8 |
+
fi
|
9 |
+
|
10 |
+
# Set output directory for artifacts
|
11 |
+
OUTPUT_DIR="./artifacts"
|
12 |
+
mkdir -p $OUTPUT_DIR
|
13 |
+
|
14 |
+
echo "Building vector store with output to $OUTPUT_DIR"
|
15 |
+
echo "Force recreate: ${FORCE_RECREATE:-false}"
|
16 |
+
|
17 |
+
# Run pipeline in CI mode
|
18 |
+
python py-src/pipeline.py $FORCE_RECREATE --ci --output-dir $OUTPUT_DIR
|
19 |
+
|
20 |
+
# Check if successful
|
21 |
+
if [ $? -eq 0 ]; then
|
22 |
+
echo "Build successful!"
|
23 |
+
|
24 |
+
# Create a zip of the vector store
|
25 |
+
if [ -d "./db/vector_store_4" ]; then
|
26 |
+
echo "Creating vector store zip file in $OUTPUT_DIR"
|
27 |
+
cd db
|
28 |
+
zip -r ../$OUTPUT_DIR/vector_store.zip vector_store_4
|
29 |
+
cd ..
|
30 |
+
echo "Vector store zip created at $OUTPUT_DIR/vector_store.zip"
|
31 |
+
fi
|
32 |
+
|
33 |
+
echo "Artifacts available in $OUTPUT_DIR:"
|
34 |
+
ls -la $OUTPUT_DIR
|
35 |
+
else
|
36 |
+
echo "Build failed!"
|
37 |
+
exit 1
|
38 |
+
fi
|