|
import streamlit as st |
|
import requests |
|
from src.github_analysis import analyze_github_repo |
|
from src.url_fetcher import fetch_url_title |
|
from src.fine_tune_helpers import fine_tune_model |
|
|
|
|
|
st.title("OSINT Tool 🏢") |
|
st.markdown(""" |
|
This tool performs **Open Source Intelligence (OSINT)** analysis on GitHub repositories and fetches titles from URLs. |
|
It also allows uploading datasets (CSV format) for fine-tuning models like **DistilBERT**, **Code Summarization**, **Bug Fixing**, and more. |
|
""") |
|
|
|
|
|
st.sidebar.title("Navigation") |
|
app_mode = st.sidebar.radio("Choose the mode", ["GitHub Repository Analysis", "URL Title Fetcher", "Dataset Upload & Fine-Tuning"]) |
|
|
|
|
|
available_models = [ |
|
"semeru/code-text-galeras-code-summarization-3k-deduped", |
|
"semeru/code-code-InjectMutants", |
|
"semeru/code-code-BugFixingSmall", |
|
"semeru/code-code-GeneratingAssertsRaw", |
|
"deepseek-ai/DeepSeek-Prover-V1" |
|
] |
|
|
|
|
|
if app_mode == "GitHub Repository Analysis": |
|
st.header("GitHub Repository Analysis") |
|
repo_owner = st.text_input("Enter GitHub Repository Owner", "huggingface") |
|
repo_name = st.text_input("Enter GitHub Repository Name", "transformers") |
|
|
|
if st.button("Analyze Repository"): |
|
if repo_owner and repo_name: |
|
repo_data = analyze_github_repo(repo_owner, repo_name) |
|
if repo_data: |
|
st.subheader("Repository Details") |
|
for key, value in repo_data.items(): |
|
st.write(f"**{key}**: {value}") |
|
else: |
|
st.error("Failed to retrieve repository details.") |
|
else: |
|
st.warning("Please enter both repository owner and name.") |
|
|
|
|
|
elif app_mode == "URL Title Fetcher": |
|
st.header("URL Title Fetcher") |
|
url = st.text_input("Enter URL", "https://www.huggingface.co") |
|
|
|
if st.button("Fetch Title"): |
|
if url: |
|
title = fetch_url_title(url) |
|
if title: |
|
st.write(f"**Page Title**: {title}") |
|
else: |
|
st.error("Failed to retrieve the page title.") |
|
else: |
|
st.warning("Please enter a valid URL.") |
|
|
|
|
|
elif app_mode == "Dataset Upload & Fine-Tuning": |
|
st.header("Dataset Upload & Fine-Tuning") |
|
|
|
|
|
model_choice = st.selectbox("Choose Model for Fine-Tuning", available_models) |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a CSV file for fine-tuning", type="csv") |
|
|
|
if uploaded_file is not None: |
|
st.write(f"Preparing fine-tuning for model: **{model_choice}**") |
|
st.write("File successfully uploaded! Now starting fine-tuning process...") |
|
fine_tune_model(uploaded_file, model_choice) |
|
|
|
|
|
def analyze_github_repo(owner, repo): |
|
"""Analyzes a GitHub repository and returns information about it.""" |
|
try: |
|
response = requests.get(f'https://api.github.com/repos/{owner}/{repo}') |
|
response.raise_for_status() |
|
repo_data = response.json() |
|
return { |
|
"Repository Name": repo_data['name'], |
|
"Owner": repo_data['owner']['login'], |
|
"Stars": repo_data['stargazers_count'], |
|
"Forks": repo_data['forks_count'], |
|
"Issues": repo_data['open_issues_count'], |
|
"Language": repo_data['language'], |
|
"Description": repo_data.get('description', 'No description available.') |
|
} |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"Error fetching GitHub repository: {e}") |
|
return None |
|
|
|
def fetch_url_title(url): |
|
"""Fetches the title of a webpage.""" |
|
try: |
|
response = requests.get(url) |
|
if response.status_code == 200: |
|
|
|
html_content = response.text |
|
start_index = html_content.find("<title>") + len("<title>") |
|
end_index = html_content.find("</title>") |
|
return html_content[start_index:end_index] |
|
else: |
|
st.error(f"Failed to fetch URL: {response.status_code}") |
|
return None |
|
except requests.exceptions.RequestException as e: |
|
st.error(f"Error fetching URL: {e}") |
|
return None |
|
|