Spaces:

rwillats
/

Contextual-Policy-Engine-Hate-Speech-Classification

Running

App Files Files Community

Contextual-Policy-Engine-Hate-Speech-Classification / perspective_prompt.py

rwillats

Upload folder using huggingface_hub

0886c09 verified 4 months ago

raw

history blame

3.25 kB

	import json
	import requests
	import csv
	import os
	import time
	import sys
	import pandas as pd
	from pathlib import Path
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv("key.env")
	api_key = os.getenv("perspective_key")

	if not api_key:
	raise ValueError("PERSPECTIVE_API_KEY is not set. Please check your key.env file.")

	def analyze_text(text):
	"""Send text to Perspective API for analysis."""
	url = 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze'
	params = {'key': api_key}
	data = {
	'comment': {'text': text},
	'requestedAttributes': {
	'TOXICITY': {},
	'SEVERE_TOXICITY': {},
	'IDENTITY_ATTACK': {},
	'INSULT': {},
	'PROFANITY': {},
	'THREAT': {},
	'SEXUALLY_EXPLICIT': {}
	}
	}

	try:
	response = requests.post(url, params=params, data=json.dumps(data))
	response_json = response.json()

	if 'attributeScores' not in response_json:
	print(f"Error in API response: {response_json}")
	return {attr: 0.0 for attr in data['requestedAttributes']}

	return {attr: response_json['attributeScores'].get(attr, {}).get('summaryScore', {}).get('value', 0.0) for attr in data['requestedAttributes']}
	except Exception as e:
	print(f"Exception during API call: {e}")
	return {attr: 0.0 for attr in data['requestedAttributes']}

	def process_csv(input_file, output_file):
	"""Process CSV file, analyze each prompt, and write results to a new CSV."""
	if not os.path.exists(input_file):
	print(f"Input file not found: {input_file}")
	return

	df = pd.read_csv(input_file)
	if "prompt" not in df.columns:
	raise ValueError("CSV must contain a 'prompt' column.")

	attributes = [
	'TOXICITY', 'SEVERE_TOXICITY', 'IDENTITY_ATTACK',
	'INSULT', 'PROFANITY', 'THREAT', 'SEXUALLY_EXPLICIT'
	]

	for attr in attributes:
	df[f"perspective_{attr.lower()}"] = ""

	df["timestamp"] = ""
	df["processing_time_sec"] = ""
	df.to_csv(output_file, index=False)

	for index, row in df.iterrows():
	prompt = row['prompt']

	start_time = time.time()
	timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))

	print(f"Processing row {index + 1}/{len(df)}: {prompt[:50]}...")
	scores = analyze_text(prompt)

	end_time = time.time()
	processing_time = round(end_time - start_time, 3)

	for attr in attributes:
	df.at[index, f"perspective_{attr.lower()}"] = scores.get(attr, 0.0)

	df.at[index, "timestamp"] = timestamp
	df.at[index, "processing_time_sec"] = processing_time

	df.to_csv(output_file, index=False)
	print(f"Row {index + 1} processed.")

	time.sleep(1) # Avoid rate limiting

	if __name__ == "__main__":
	if len(sys.argv) < 2:
	print("Usage: python script.py <input_csv>")
	sys.exit(1)

	input_csv = sys.argv[1]
	output_csv = f"perspective_rated_{os.path.basename(input_csv)}"

	process_csv(input_csv, output_csv)