mafzaal commited on
Commit
315127b
·
1 Parent(s): 4e87dd5

feat: Implement build vector store workflow and local script for artifact generation

Browse files
.github/workflows/build-vector-store.yml CHANGED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build Vector Store
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ force_recreate:
7
+ description: 'Force recreation of the vector store'
8
+ required: false
9
+ default: true
10
+ type: boolean
11
+ push:
12
+ branches: [main]
13
+ paths:
14
+ - 'data/**'
15
+ schedule:
16
+ # Run daily at midnight UTC
17
+ - cron: '0 0 * * *'
18
+
19
+ jobs:
20
+ build-vector-store:
21
+ runs-on: ubuntu-latest
22
+
23
+ steps:
24
+ - name: Checkout repository
25
+ uses: actions/checkout@v3
26
+ with:
27
+ fetch-depth: 0 # Fetch all history for proper versioning
28
+
29
+ - name: Set up Python
30
+ uses: actions/setup-python@v4
31
+ with:
32
+ python-version: '3.13'
33
+ cache: 'pip'
34
+
35
+ - name: Install dependencies
36
+ run: |
37
+ # Install uv
38
+ curl -LsSf https://astral.sh/uv/install.sh | sh
39
+ export PATH="$HOME/.cargo/bin:$PATH"
40
+
41
+ # Use uv to install dependencies
42
+ uv pip install --upgrade pip
43
+ uv sync
44
+ - name: Build vector store
45
+ id: build-vector
46
+ run: |
47
+ FORCE_RECREATE=${{ github.event.inputs.force_recreate == 'true' || github.event_name == 'workflow_dispatch' }}
48
+
49
+ # Build with or without force recreate based on the input
50
+ if [ "$FORCE_RECREATE" = "true" ]; then
51
+ uv run python py-src/pipeline.py --force-recreate --ci --output-dir ./artifacts
52
+ else
53
+ uv run python py-src/pipeline.py --ci --output-dir ./artifacts
54
+ fi
55
+
56
+ # Set artifacts directory for the next steps
57
+ echo "ARTIFACTS_DIR=./artifacts" >> $GITHUB_ENV
58
+
59
+ # Create a zip file of the vector store
60
+ cd db
61
+ zip -r ../artifacts/vector_store.zip vector_store_4
62
+ cd ..
63
+
64
+ - name: Read CI summary
65
+ id: ci_summary
66
+ run: |
67
+ # Read the CI summary file to extract values
68
+ if [ -f "${{ env.ARTIFACTS_DIR }}/ci_summary.json" ]; then
69
+ STATUS=$(jq -r '.status' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
70
+ DOC_COUNT=$(jq -r '.document_count' ${{ env.ARTIFACTS_DIR }}/ci_summary.json)
71
+
72
+ echo "status=$STATUS" >> $GITHUB_OUTPUT
73
+ echo "document_count=$DOC_COUNT" >> $GITHUB_OUTPUT
74
+ else
75
+ echo "status=failure" >> $GITHUB_OUTPUT
76
+ echo "document_count=0" >> $GITHUB_OUTPUT
77
+ fi
78
+
79
+ - name: Upload artifacts
80
+ uses: actions/upload-artifact@v3
81
+ with:
82
+ name: vector-store-artifacts
83
+ path: |
84
+ ${{ env.ARTIFACTS_DIR }}/*.json
85
+ ${{ env.ARTIFACTS_DIR }}/vector_store.zip
86
+
87
+ - name: Get version
88
+ id: get_version
89
+ run: |
90
+ # Create a version based on date and document count
91
+ VERSION="v$(date +'%Y.%m.%d')-docs${{ steps.ci_summary.outputs.document_count }}"
92
+ echo "version=$VERSION" >> $GITHUB_OUTPUT
93
+
94
+ - name: Create GitHub Release
95
+ id: create_release
96
+ uses: softprops/action-gh-release@v1
97
+ if: steps.ci_summary.outputs.status == 'success'
98
+ with:
99
+ tag_name: ${{ steps.get_version.outputs.version }}
100
+ name: Vector Store ${{ steps.get_version.outputs.version }}
101
+ body: |
102
+ Vector store updated with ${{ steps.ci_summary.outputs.document_count }} documents.
103
+
104
+ This is an automated release created by the vector store build workflow.
105
+ files: |
106
+ ${{ env.ARTIFACTS_DIR }}/*.json
107
+ ${{ env.ARTIFACTS_DIR }}/vector_store.zip
108
+ draft: false
109
+ prerelease: false
110
+ env:
111
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
.gitignore CHANGED
@@ -1,6 +1,6 @@
1
 
2
  db/
3
-
4
 
5
  # Byte-compiled / optimized / DLL files
6
  __pycache__/
 
1
 
2
  db/
3
+ artifacts/
4
 
5
  # Byte-compiled / optimized / DLL files
6
  __pycache__/
scripts/build-vector-store.sh CHANGED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Script to build vector store locally
3
+ # Usage: ./scripts/build-vector-store.sh [--force-recreate]
4
+
5
+ FORCE_RECREATE=""
6
+ if [[ "$1" == "--force-recreate" ]]; then
7
+ FORCE_RECREATE="--force-recreate"
8
+ fi
9
+
10
+ # Set output directory for artifacts
11
+ OUTPUT_DIR="./artifacts"
12
+ mkdir -p $OUTPUT_DIR
13
+
14
+ echo "Building vector store with output to $OUTPUT_DIR"
15
+ echo "Force recreate: ${FORCE_RECREATE:-false}"
16
+
17
+ # Run pipeline in CI mode
18
+ python py-src/pipeline.py $FORCE_RECREATE --ci --output-dir $OUTPUT_DIR
19
+
20
+ # Check if successful
21
+ if [ $? -eq 0 ]; then
22
+ echo "Build successful!"
23
+
24
+ # Create a zip of the vector store
25
+ if [ -d "./db/vector_store_4" ]; then
26
+ echo "Creating vector store zip file in $OUTPUT_DIR"
27
+ cd db
28
+ zip -r ../$OUTPUT_DIR/vector_store.zip vector_store_4
29
+ cd ..
30
+ echo "Vector store zip created at $OUTPUT_DIR/vector_store.zip"
31
+ fi
32
+
33
+ echo "Artifacts available in $OUTPUT_DIR:"
34
+ ls -la $OUTPUT_DIR
35
+ else
36
+ echo "Build failed!"
37
+ exit 1
38
+ fi