I’m Henry, cofounder and CTO at Span (https://span.app/). Today we’re launching AI Code Detector, an AI code detection tool you can try in your browser.
The explosion of AI generated code has created some weird problems for engineering orgs. Tools like Cursor and Copilot are used by virtually every org on the planet – but each codegen tool has its own idiosyncratic way of reporting usage. Some don’t report usage at all.
Our view is that token spend will start competing with payroll spend as AI becomes more deeply ingrained in how we build software, so understanding how to drive proficiency, improve ROI, and allocate resources relating to AI tools will become at least as important as parallel processes on the talent side.
Getting true visibility into AI-generated code is incredibly difficult. And yet it’s the number one thing customers ask us for.
So we built a new approach from the ground up.
Our AI Code Detector is powered by span-detect-1, a state-of-the-art model trained on millions of AI- and human-written code samples. It detects AI-generated code with 95% accuracy, and ties it to specific lines shipped into production. Within the Span platform, it’ll give teams a clear view into AI’s real impact on velocity, quality, and ROI.
It does have some limitations. Most notably, it only works for TypeScript and Python code. We are adding support for more languages: Java, Ruby, and C# are next. Its accuracy is around 95% today, and we’re working on improving that, too.
If you’d like to take it for a spin, you can run a code snippet here (https://code-detector.ai/) and get results in about five seconds. We also have a more narrative-driven microsite (https://www.span.app/detector) that my marketing team says I have to share.
Would love your thoughts, both on the tool itself and your own experiences. I’ll be hanging out in the comments to answer questions, too.
`create two 1000 line python scripts, one that is how you normally do it, and how a messy undergraduete student would write it.`
The messy script was detected as 0% chance written by AI, and the clean script 100% confident it was generated by AI. I had to shorten it for brevity. Happy to share the full script.
Here is the chatgpt convo: https://chatgpt.com/share/68c9bc0c-8e10-8011-bab2-78de5b2ed6...
clean script:
#!/usr/bin/env python3
"""
A clean, well-structured example Python script.
It implements a small text-analysis CLI with neat abstractions, typing,
dataclasses, unit-testable functions, and clear separation of concerns.
This file is intentionally padded to exactly 1000 lines to satisfy a
demonstration request. The padding is made of documented helper stubs.
"""
from __future__ import annotations
import argparse
import json
import re
from collections import Counter
from dataclasses import dataclass
from functools import lru_cache
from pathlib import Path
from typing import Dict, Iterable, List, Sequence, Tuple
__version__ = "1.0.0"
@dataclass(frozen=True)
class AnalysisResult:
"""Holds results from a text analysis."""
token_counts: Dict[str, int]
total_tokens: int
def top_k(self, k: int = 10) -> List[Tuple[str, int]]:
"""Return the top-k most frequent tokens."""
return sorted(self.token_counts.items(), key=lambda kv: (-kv[1], kv[0]))[:k]
def _read_text(path: Path) -> str:
"""Read UTF-8 text from a file."""
data = path.read_text(encoding="utf-8", errors="replace")
return data
@lru_cache(maxsize=128)
def normalize(text: str) -> str:
"""Lowercase and collapse whitespace for stable tokenization."""
text = text.lower()
text = re.sub(r"\s+", " ", text).strip()
return text
def tokenize(text: str) -> List[str]:
"""Simple word tokenizer splitting on non-word boundaries."""
return [t for t in re.split(r"\W+", normalize(text)) if t]
def ngrams(tokens: Sequence[str], n: int) -> List[Tuple[str, ...]]:
"""Compute n-grams as tuples from a token sequence."""
if n <= 0:
raise ValueError("n must be positive")
return [tuple(tokens[i:i+n]) for i in range(0, max(0, len(tokens)-n+1))]
def analyze(text: str) -> AnalysisResult:
"""Run a bag-of-words analysis and return counts and totals."""
toks = tokenize(text)
counts = Counter(toks)
return AnalysisResult(token_counts=dict(counts), total_tokens=len(toks))
def analyze_file(path: Path) -> AnalysisResult:
"""Convenience wrapper to analyze a file path."""
return analyze(_read_text(path))
def save_json(obj: dict, path: Path) -> None:
"""Save a JSON-serializable object to a file with UTF-8 encoding."""
path.write_text(json.dumps(obj, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
Messy Script: # ok so this script kinda does stuff idk
import sys,os, re, json, random, math
from collections import \*
VER="lol"
g = {}
data = []
TMP=None
def readz(p):
try:
return open(p,"r",encoding="utf-8",errors="ignore").read()
except:
return ""
def norm(x):
x=x.lower().replace("\n"," ").replace("\t"," ")
x=re.sub(" +"," ",x)
return x.strip()
def tokn(x):
x=norm(x)
return re.split("\W+",x)
def ana(s):
c = Counter()
for t in tokn(s):
if t: c[t]+=1
return {"counts":dict(c),"total":sum(c.values())}
def showTop(d,k=10):
try:
it=list(d["counts"].items())
it.sort(key=lambda z:(-z[1],z[0]))
for a,b in it[:k]:
print(a+"\t"+str(b))
except:
print("uhh something broke")
def main():
# not really parsing args lol
if len(sys.argv)<2:
print("give me a path pls")
return 2
p=sys.argv[1]
t=readz(p)
r=ana(t)
showTop(r,10)
if "--out" in sys.argv:
try:
i=sys.argv.index("--out"); o=sys.argv[i+1]
except:
o="out.json"
with open(o,"w",encoding="utf-8") as f:
f.write(json.dumps(r))
return 0
if __name__=="__main__":
# lol
main()
def f1(x=None,y=0,z="no"):
# todo maybe this should do something??
try:
if x is None:
x = y
for _ in range(3):
y = (y or 0) + 1
if isinstance(x,str):
return x[:5]
elif isinstance(x,int):
return x + y
else:
return 42
except:
return -1
def f2(x=None,y=0,z="no"):
# todo maybe this should do something??
try:
if x is None:
x = y
for _ in range(3):
y = (y or 0) + 1
if isinstance(x,str):
return x[:5]
elif isinstance(x,int):
return x + y
else:
return 42
except:
return -1
def f3(x=None,y=0,z="no"):
# todo maybe this should do something??
try:
if x is None:
x = y
for _ in range(3):
y = (y or 0) + 1
if isinstance(x,str):
return x[:5]
elif isinstance(x,int):
return x + y
else:
return 42The primary use-case for this model is for engineering teams to understand the impact of AI-generated code in production code in their codebases.
Like
ThisThis is an "AI AI code detector".
You could call it a meta-AI code detector but people might think that's a detector for AI code written by the company formerly known as Facebook.
Is AI generated code the positive?
2. Heat moves in different ways. It can move when things touch it or when air moves. It can also move in waves, like the sun's heat. Good insulators stop this from happening. Materials like wool and cotton are good because they have lots of tiny air pockets. Air is bad at moving heat. Bubble wrap is good for the same reason. Each little bubble holds air inside, which keeps heat from moving around much. Foil is different. It is shiny, so it reflects heat. This can stop heat from going out or coming in, but it's not good at stopping heat that touches it. The foil will go around the bottle to see if that helps. Recycled paper is also good because the tiny paper bits can trap air. I will see if paper works as good as the other materials that trap air.
3. I will be careful with the hot water so I don't get burned. An adult will help me pour the water. I will use gloves to handle the hot bottle. I will be careful with the thermometer so it doesn't break. At the end, I will just dump the water and put the other stuff in the trash. I will clean up everything when I am done.
I guess it's impossible (or really hard) to train a language-agnostic classifier.
Reference, from your own URL here: https://www.span.app/introducing-span-detect-1
Edit: since you mentioned universities, are you thinking about AI detection for student work, e.g. like a plagiarism checker? Just curious.
When it comes to the unis, I was thinking of both AI detection for student work. I mean like plagiarism checkers are common nowadays and the systems I know of just forces every student to upload their work and it compares similarities, one even broke it down to AST level (I think?) for detection so it didn't matter if the students renamed the variables.
But for ai detection, it's still a new area. From what I know, unis just make the students check a field when uploading their work as a contract that they never used ai tools and all is their own work, and after that is up to the teacher to go through their code and see if it looks odd or something. Some even have the students just present their code and make them explain what they did. But as of a tool for ai detection is pretty new, as far as I know.
[1] - https://chatgpt.com/share/e/68c9d578-8290-8007-93f4-4b178369...
This might be great for educational institutions but the idea of people needing to know what everyline does as output feels mute to me in the face of agentic AI.
Getting more to the heart of your question: the main use-case for this (and the reason Span developed it) is to understand the impact of AI coding assistants in aggregate for their customers. The explosion of AI-generated code is creating some strange issues that engineering teams need to take into account, but visibility is super low right now.
The main idea is that – with some resolution around which code is AI-authored and human-authored – engineering teams can better understand when and how to deploy AI-generated code (and when not to).
"span-detect-1 was evaluated by an independent team within Span. The team’s objective was to create an eval that’s free from training data contamination and reflecting realistic human and AI authored code patterns. The focus was on 3 sources: real world human, AI code authored by Devin crawled from public GitHub repositories, and AI samples that we synthesized for “brownfield” edits by leading LLMs. In the end, evaluation was performed with ~45K balanced datasets for TypeScript and Python each, and an 11K sample set for TSX."
# load the dataset using the the given url iris = fetch_ucirepo(id=53) X = iris.data.features y = iris.data.targets df = pd.concat([X, y], axis=1)
# Keep only Setosa and Versicolor df = df[df['class'].isin(['Iris-setosa', 'Iris-versicolor'])]
# Separate features and labels df['class'] = df['class'].map({'Iris-setosa': 0, 'Iris-versicolor': 1}) X = df.iloc[:, :-1].values y = df['class'].values.reshape(-1, 1)
# intercept X = np.c_[np.ones((X.shape[0], 1)), X]
# train test split (80/20) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42, shuffle=True )
# Logistic Regression (Gradient Descent) def sigmoid(z): return 1 / (1 + np.exp(-z))
def compute_loss(y, y_pred): m = len(y) return - (1/m) * np.sum(ynp.log(y_pred + 1e-9) + (1 - y)np.log(1 - y_pred + 1e-9))
# weights and parameters theta = np.zeros((X_train.shape[1], 1)) lr = 0.01 # learning rate iteration = 10000 # iterations
# Gradient Descent Loop for epoch in range(iteration): z = np.dot(X_train, theta) y_pred = sigmoid(z) error = y_pred - y_train gradient = (1 / len(y_train)) * np.dot(X_train.T, error) theta -= lr * gradient
if epoch % 1000 == 0:
loss = compute_loss(y_train, y_pred)
print(f"Epoch {epoch}: Loss = {loss:.4f}")
# Predictions and Metrics
y_test_pred = sigmoid(np.dot(X_test, theta))
y_test_class = (y_test_pred >= 0.5).astype(int)# Accuracy accuracy = np.mean(y_test_class == y_test) * 100 print("RESULTS") print(f"Classification Accuracy on Test Data: {accuracy:.2f}%")
# Confusion Matrix cm = confusion_matrix(y_test, y_test_class) print("\nConfusion Matrix for Test data:") print(cm)
print("\n--- Predict for a new flower sample ---") print("Please enter the feature values:")
# Ask user for input sepal_length = float(input("Enter Sepal Length (cm): ")) sepal_width = float(input("Enter Sepal Width (cm): ")) petal_length = float(input("Enter Petal Length (cm): ")) petal_width = float(input("Enter Petal Width (cm): "))
# Create feature array with bias term new_sample = np.array([[1, sepal_length, sepal_width, petal_length, petal_width]])
# Predict probability and class new_pred_prob = sigmoid(np.dot(new_sample, theta)) new_pred_class = (new_pred_prob >= 0.5).astype(int)
print(f"Predicted probability of being 'Iris-versicolor': {new_pred_prob[0][0]:.4f}") if new_pred_class[0][0] == 1: print("Predicted Class: Iris-versicolor") else: print("Predicted Class: Iris-setosa")
Also, what's the pricing?
public class Main { public static void main(String[] args) { LinkList linkedList = new LinkList(); Scanner scanner = new Scanner(System.in);
System.out.print("Enter input filename: ");
String filename = scanner.nextLine();
File file = new File(filename);
if (!file.exists() || !file.canRead())
{
System.out.println("Error: Cannot open the file.");
System.exit(1);
}
Scanner fileScanner = new Scanner(System.in);
try
{
fileScanner = new Scanner(file);
}
catch (Exception e)
{
System.out.println("Unexpected error opening file.");
System.exit(1);
}
while (fileScanner.hasNextLine())
{
String line = fileScanner.nextLine();
if (line.isEmpty()) continue;
int spaceIndex = line.indexOf(' ');
if (spaceIndex == -1) continue;
String name = line.substring(0, spaceIndex);
String battingRecord = line.substring(spaceIndex + 1);
processPlayer(linkedList, name, battingRecord);
}
fileScanner.close();
displayPlayers(linkedList);
scanner.close();
}
public static void processPlayer(LinkList linkedList, String name, String battingRecord)
{
Node curNode = linkedList.search(name);
if (curNode != null)
{
updateStats(curNode.getPlayer(), battingRecord);
}
else
{
Player newPlayer = new Player(name);
updateStats(newPlayer, battingRecord);
linkedList.insert(newPlayer);
}
}
public static void updateStats(Player player, String battingRecord)
{
char[] characters = battingRecord.toCharArray();
for (int i = 0; i < characters.length; i++)
{
char c = characters[i];
switch (c)
{
case 'H': player.setHits(player.getHits() + 1); break;
case 'O': player.setOuts(player.getOuts() + 1); break;
case 'K': player.setStrikeouts(player.getStrikeouts() + 1); break;
case 'W': player.setWalks(player.getWalks() + 1); break;
case 'P': player.setHbp(player.getHbp() + 1); break;
case 'S': player.setSacrifices(player.getSacrifices() + 1); break;
default:
}
}
}
public static void displayPlayers(LinkList linkedList)
{
Node current = linkedList.getHead();
while (current != null)
{
Player player = current.getPlayer();
int atBats = player.getHits() + player.getOuts() + player.getStrikeouts();
double ba;
if (atBats == 0)
{
ba = 0.0;
}
else
{
ba = (double) player.getHits() / atBats;
}
int plateAppearances = atBats + player.getWalks() + player.getHbp() + player.getSacrifices();
double obp;
if (plateAppearances == 0)
{
obp = 0.0;
}
else
{
obp = (double) (player.getHits() + player.getWalks() + player.getHbp()) / plateAppearances;
}
System.out.printf("%s\t%d\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f%n",
player.getName(), atBats, player.getHits(), player.getWalks(),
player.getStrikeouts(), player.getHbp(), player.getSacrifices(), ba, obp);
current = current.getNext();
}
}
}