Spaces:

a0ms1n
/

AI-Code-Detector_for-Competitive-Programming

Sleeping

Update Evaluate1.py

e39dbbf 11 months ago

1.35 kB

	from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel
	from datasets import Dataset, DatasetDict, Features, Sequence, ClassLabel, Value
	import pandas as pd
	import re
	import torch
	from Preprocess import *

	model_path = "Model-V1.1"
	tokenizer = AutoTokenizer.from_pretrained(model_path)
	config = AutoConfig.from_pretrained(model_path)
	model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)
	labels = model.config.id2label
	label2id = model.config.label2id

	def preprocess(code):
	# code = format_cpp(code)
	code = remove_comments(code)
	code = replace_preprocessor(code)
	code = normalize_braces(code)
	code = strip_lines(code)
	return code


	def eval(source):
	source = preprocess(source)
	inputs = tokenizer(
	source,
	truncation=True,
	padding='max_length',
	max_length=512,
	return_tensors='pt'
	)

	model.cpu()
	model.eval()
	inputs = {k: v.cpu() for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model(**inputs)

	probs = torch.softmax(outputs.logits, dim=-1).detach().cpu().numpy()[0]
	pred_id = probs.argmax()
	# print("Label:", labels[pred_id], " \| Score:", probs[pred_id])
	return labels[pred_id], f"{probs[label2id['AI']]*100:.2f} %"