Anecedotal_Discourse_Classifier_Multitext

Running

App Files Files Community

Anecedotal_Discourse_Classifier_Multitext / app.py

BabakScrapes

Update app.py

ca1cd14 verified 5 months ago

raw

history blame contribute delete

6.84 kB

	import os

	os.system("python -m pip install transformers==4.26.1")
	os.system("python -m pip install spacy==3.5.4")
	os.system("python -m pip install spacy-alignments==0.9.1")
	os.system("python -m pip install spacy-legacy==3.0.12")
	os.system("python -m pip install spacy-loggers==1.0.3")
	os.system("python -m pip install torch")
	os.system("python -m pip install seaborn==0.11.2")
	os.system("python -m pip install gradio==3.16.1")
	os.system("python -m pip install typer==0.4.1")
	os.system("python -m pip install pydantic==1.9.2")
	os.system("python -m pip install matplotlib==3.4.3")
	os.system("python -m pip install Flask")
	os.system("python -m pip install sty==1.0.4")
	os.system("python -m pip install numpy==1.26.4")

	from flask import Flask, render_template, request, send_file, after_this_request
	import hashlib
	from pipeline import *
	import csv

	app = Flask(__name__, template_folder='templates', static_folder='templates')

	# Set the folder for saving uploaded files
	UPLOAD_FOLDER = 'uploads/'
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
	if not os.path.isdir(app.config['UPLOAD_FOLDER']):
	# Create the directory if it doesn't exist
	os.makedirs(app.config['UPLOAD_FOLDER'])

	# Allowable file extensions for uploading
	ALLOWED_EXTENSIONS = {'txt','csv'}

	# Check if a file has an allowable extension
	def allowed_file(filename):
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	# Reverse the lines in the file and return a list of dictionaries containing the original input and the processed output
	def process_file(file_path):
	file_handle = open(file_path,'r',encoding='utf-8-sig',errors='ignore')

	with open(file_path, 'r', encoding='utf-8-sig',errors='ignore') as f:
	if file_path.endswith(".txt"):
	lines = f.readlines()
	results = []
	for line in lines:
	if line.strip() != "":
	result = run_pipeline(line.strip())
	results.append(result)
	elif file_path.endswith(".csv"):
	reader = csv.reader(f)
	for line in reader:
	if line[0].strip() != "":
	result = run_pipeline(line[0].strip())
	results.append(result)
	result = [{'input': line.strip(), 'output': result} for line, result in zip(lines, results)]
	@after_this_request
	def remove_file(response):
	try:
	os.remove(file_path)
	file_handle.close()
	except Exception as error:
	app.logger.error("Error removing or closing downloaded file handle", error)
	return response
	return result

	# Home page route that allows users to upload files
	@app.route('/')
	def index():
	# Otherwise, render the index page
	return render_template('index.html')

	# Handle file upload requests
	@app.route('/', methods=['POST'])
	def upload_file():
	# Get the uploaded file
	file = request.files['file']

	# If the user did not select any file, return an error message
	if not file:
	return 'No file selected'

	# If the file type is not allowed, return an error message
	if not allowed_file(file.filename):
	return 'File type not allowed'

	# Generate a unique hash code for the file name
	hash_code = hashlib.md5(file.read()).hexdigest()
	# if ".txt" in filename:
	filename = f"{hash_code}.txt"
	# elif ".csv" in filename:
	# filename = f"{hash_code}.csv"
	file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

	# Save the uploaded file
	file.seek(0)
	file.save(file_path)

	# Process the uploaded file and return the result as a JSON line file
	result = process_file(file_path)
	result_file_path = os.path.join(app.config['UPLOAD_FOLDER'], 'result.csv')

	counts = {}
	individual_labels = {}
	props = {}
	for id_,text in enumerate(result):
	individual_labels[id_] = []
	counts[id_] = {"generic":0,"specific":0,"stative":0,"dynamic":0,"static":0,"episodic":0,"habitual":0,"NA genericity":0,"NA eventivity":0,"NA boundedness":0}
	for clause in text['output'][1]:
	individual_labels[id_].append(clause[1])
	label = labels2attrs[clause[1]]
	for id__,feature in enumerate(label):
	if "NA" not in feature:
	counts[id_][feature] += 1
	elif id__ == 0:
	counts[id_]["NA genericity"] += 1
	elif id__ == 1:
	counts[id_]["NA eventivity"] += 1
	else:
	counts[id_]["NA boundedness"] += 1
	props[id_] = [counts[id_]['generic']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),counts[id_]['specific']/(counts[id_]['generic']+counts[id_]['specific']+counts[id_]['NA genericity']),
	counts[id_]['stative']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']), counts[id_]['dynamic']/(counts[id_]['stative']+counts[id_]['dynamic']+counts[id_]['NA eventivity']),
	counts[id_]['static']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]['NA boundedness']),counts[id_]['episodic']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"]),
	counts[id_]['habitual']/(counts[id_]['static']+counts[id_]['episodic']+counts[id_]["habitual"]+counts[id_]["NA boundedness"])]


	with open(result_file_path, 'w', encoding='utf-8', errors='ignore', newline="") as f:
	writer = csv.writer(f)
	writer.writerow(["input","clauses","individual labels","genericity: generic count","genericity: specific count","eventivity: stative count","eventivity: dynamic count","boundedness: static count","boundedness: episodic count","habitual count","genericity: proportion generic","genericity: proportion specific","eventivity: proportion stative","eventivity: proportion dynamic","boundedness: proportion static","boundedness: proportion episodic","proportion habitual"])
	for id_ in counts.keys():
	clauses = []
	for clause in result[id_]["output"][0]:
	clauses.append("{}: {}".format(clause[1],clause[0]))
	clauses = "\n".join(clauses)
	ind_labels = "\n".join(individual_labels[id_])
	extracted = [result[id_]["input"],clauses,ind_labels,counts[id_]['generic'],counts[id_]['specific'],counts[id_]['stative'],counts[id_]['dynamic'],counts[id_]['static'],counts[id_]['episodic'],counts[id_]['habitual']]+props[id_]
	writer.writerow(extracted)

	# Return the result file as a download once the processing is complete
	return send_file(result_file_path, as_attachment=True)

	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=7860)