|
import json |
|
import requests |
|
import csv |
|
import os |
|
import time |
|
import sys |
|
import pandas as pd |
|
from pathlib import Path |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv("key.env") |
|
api_key = os.getenv("perspective_key") |
|
|
|
if not api_key: |
|
raise ValueError("PERSPECTIVE_API_KEY is not set. Please check your key.env file.") |
|
|
|
def analyze_text(text): |
|
"""Send text to Perspective API for analysis.""" |
|
url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze' |
|
params = {'key': api_key} |
|
data = { |
|
'comment': {'text': text}, |
|
'requestedAttributes': { |
|
'TOXICITY': {}, |
|
'SEVERE_TOXICITY': {}, |
|
'IDENTITY_ATTACK': {}, |
|
'INSULT': {}, |
|
'PROFANITY': {}, |
|
'THREAT': {}, |
|
'SEXUALLY_EXPLICIT': {} |
|
} |
|
} |
|
|
|
try: |
|
response = requests.post(url, params=params, data=json.dumps(data)) |
|
response_json = response.json() |
|
|
|
if 'attributeScores' not in response_json: |
|
print(f"Error in API response: {response_json}") |
|
return {attr: 0.0 for attr in data['requestedAttributes']} |
|
|
|
return {attr: response_json['attributeScores'].get(attr, {}).get('summaryScore', {}).get('value', 0.0) for attr in data['requestedAttributes']} |
|
except Exception as e: |
|
print(f"Exception during API call: {e}") |
|
return {attr: 0.0 for attr in data['requestedAttributes']} |
|
|
|
def process_csv(input_file, output_file): |
|
"""Process CSV file, analyze each prompt, and write results to a new CSV.""" |
|
if not os.path.exists(input_file): |
|
print(f"Input file not found: {input_file}") |
|
return |
|
|
|
df = pd.read_csv(input_file) |
|
if "prompt" not in df.columns: |
|
raise ValueError("CSV must contain a 'prompt' column.") |
|
|
|
attributes = [ |
|
'TOXICITY', 'SEVERE_TOXICITY', 'IDENTITY_ATTACK', |
|
'INSULT', 'PROFANITY', 'THREAT', 'SEXUALLY_EXPLICIT' |
|
] |
|
|
|
for attr in attributes: |
|
df[f"perspective_{attr.lower()}"] = "" |
|
|
|
df["timestamp"] = "" |
|
df["processing_time_sec"] = "" |
|
df.to_csv(output_file, index=False) |
|
|
|
for index, row in df.iterrows(): |
|
prompt = row['prompt'] |
|
|
|
start_time = time.time() |
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) |
|
|
|
print(f"Processing row {index + 1}/{len(df)}: {prompt[:50]}...") |
|
scores = analyze_text(prompt) |
|
|
|
end_time = time.time() |
|
processing_time = round(end_time - start_time, 3) |
|
|
|
for attr in attributes: |
|
df.at[index, f"perspective_{attr.lower()}"] = scores.get(attr, 0.0) |
|
|
|
df.at[index, "timestamp"] = timestamp |
|
df.at[index, "processing_time_sec"] = processing_time |
|
|
|
df.to_csv(output_file, index=False) |
|
print(f"Row {index + 1} processed.") |
|
|
|
time.sleep(1) |
|
|
|
if __name__ == "__main__": |
|
if len(sys.argv) < 2: |
|
print("Usage: python script.py <input_csv>") |
|
sys.exit(1) |
|
|
|
input_csv = sys.argv[1] |
|
output_csv = f"perspective_rated_{os.path.basename(input_csv)}" |
|
|
|
process_csv(input_csv, output_csv) |
|
|