Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,4 @@ coverage.xml
docs/_build/

*~
llama.cpp/
46 changes: 46 additions & 0 deletions CodeLlamaHelper
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
class CodeLlamaHelper:
"""Helper class to interact with a local Code Llama instance"""
def __init__(self, host="localhost", port=8080):
self.host = host
self.port = port
self.is_connected = False
self.check_connection()

def check_connection(self):
"""Check if we can connect to Code Llama server"""
try:
conn = http.client.HTTPConnection(self.host, self.port, timeout=2)
conn.request("GET", "/health")
response = conn.getresponse()
self.is_connected = (response.status == 200)
conn.close()
return self.is_connected
except Exception:
self.is_connected = False
return False

def get_code_suggestions(self, code):
"""Send code to Code Llama and get suggestions back"""
if not self.is_connected:
if not self.check_connection():
return "Code Llama server not available. Please ensure it's running at {}:{}".format(
self.host, self.port)

try:
conn = http.client.HTTPConnection(self.host, self.port)

headers = {'Content-type': 'application/json'}
data = json.dumps({"code": code, "max_tokens": 500})

conn.request("POST", "/suggestions", data, headers)
response = conn.getresponse()

if response.status == 200:
result = response.read().decode()
conn.close()
return json.loads(result).get('suggestions', "No suggestions available")
else:
conn.close()
return "Error: " + str(response.status)
except Exception as e:
return "Error connecting to Code Llama: " + str(e)
88 changes: 85 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,89 @@
What is this?
=============
# Pippy

Pippy allows the student to examine, execute, and modify simple Python programs. In addition it is possible to write Python statements to play sounds, calculate expressions, or make simple text based interactive games.
A simple Python programming activity that lets you create, edit, and run Python code.

Pippy includes example programs ranging from very simple (Hello World) to more complex (Pong, Sierpinski Carpet, etc.)

You can also save your own Python programs within Pippy and export them as standalone Sugar activities.

## Code Llama Integration

Pippy now includes a built-in side pane that can analyze your code using a local [Code Llama](https://github.com/facebookresearch/codellama) instance through Ollama. This feature provides code suggestions, improvement ideas, and identifies potential issues in your code as you write it.

### Setting up Code Llama with Ollama

To use the Code Llama integration:

1. Install Ollama from https://ollama.ai/
2. Pull the Code Llama 7B Instruct model: `ollama pull codellama:7b-instruct`
3. Start the included server script
4. Launch Pippy and start coding - the side pane will automatically analyze your code

#### Quick Setup Guide

Here's how to get started with the Code Llama integration:

1. Install Ollama:
- Visit https://ollama.ai/ and follow the installation instructions for your platform
- Linux: `curl https://ollama.ai/install.sh | sh`
- macOS: Download from the Ollama website
- Windows: Download from the Ollama website

2. After installing Ollama, pull the Code Llama model:
```bash
ollama pull codellama
```

3. Run the relay server from the Pippy directory:
```bash
./codellama_server.py
```

You can also specify different parameters:
```bash
./codellama_server.py --model codellama:7b --port 8080
```

4. Launch Pippy and start coding!

#### Using a Different Model

If you want to use a different model, you can pull it with Ollama and specify it when starting the server:

```bash
ollama pull codellama:7b-instruct
./codellama_server.py --model codellama:7b-instruct
```

For better performance and accuracy, consider these models:

1. **For speed (smaller models):**
```bash
ollama pull codellama:7b-code
./codellama_server.py --model codellama:7b-code
```

2. **For better accuracy (larger models):**
```bash
ollama pull deepseek-coder:6.7b
./codellama_server.py --model deepseek-coder:6.7b
```

3. **For the best balance of speed and accuracy:**
```bash
ollama pull wizardcoder:7b-python
./codellama_server.py --model wizardcoder:7b-python
```

Other good models for code analysis:
- `codellama:7b`
- `codellama:13b` (slower but more accurate)
- `wizardcoder`
- `deepseek-coder`

## License Information

Pippy is licensed under the GPLv3.

How to use?
===========
Expand Down
251 changes: 251 additions & 0 deletions codellama_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
#!/usr/bin/python3
"""Simple Code Llama server for Pippy integration

This script provides a simple HTTP server that uses Ollama to run Code Llama
and makes it available to Pippy via a REST API.

Requirements:
- Ollama installed and running locally
- Code Llama model pulled in Ollama

Usage:
python3 codellama_server.py [--model MODEL_NAME] [--port PORT] [--ollama-url OLLAMA_URL]
"""

from http.server import HTTPServer, BaseHTTPRequestHandler
import json
import argparse
import os
import sys
import urllib.request
import urllib.error
import socket
import logging
import time
from urllib.error import URLError

# Set up logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Default Ollama model to use
DEFAULT_MODEL = "codellama:7b-code"
# Default Ollama API URL
DEFAULT_OLLAMA_URL = "http://localhost:11434/api/generate"
# Maximum number of retries
MAX_RETRIES = 3
# Retry delay in seconds
RETRY_DELAY = 2
# Default max tokens
DEFAULT_MAX_TOKENS = 100
# Connection timeout in seconds
CONNECTION_TIMEOUT = 120

class CodeLlamaHandler(BaseHTTPRequestHandler):
def __init__(self, *args, **kwargs):
# Get the server instance
self.server = args[2]
# Get model and URL from the server
self.model_name = self.server.model_name if hasattr(self.server, 'model_name') else DEFAULT_MODEL
self.ollama_url = self.server.ollama_url if hasattr(self.server, 'ollama_url') else DEFAULT_OLLAMA_URL
# Call parent constructor with all original arguments
super().__init__(*args, **kwargs)

def log_message(self, format, *args):
"""Override to prevent logging of every request"""
pass

def check_ollama_health(self):
"""Check if Ollama is running and accessible"""
try:
test_data = json.dumps({
"model": self.model_name,
"prompt": "test",
"max_tokens": 10
}).encode('utf-8')
req = urllib.request.Request(
self.ollama_url,
data=test_data,
headers={'Content-Type': 'application/json'},
method='POST'
)
response = urllib.request.urlopen(req, timeout=10)
if response.status == 200:
return True
except Exception as e:
logger.error(f"Ollama health check failed: {str(e)}")
return False
return False

def make_ollama_request(self, data_json):
"""Make a request to Ollama with retry logic"""
if not self.check_ollama_health():
raise Exception("Ollama is not running or not accessible")

for attempt in range(MAX_RETRIES):
try:
logger.info(f"Making request to Ollama (attempt {attempt + 1}/{MAX_RETRIES})")
req = urllib.request.Request(
self.ollama_url,
data=data_json,
headers={'Content-Type': 'application/json'}
)
with urllib.request.urlopen(req, timeout=CONNECTION_TIMEOUT) as response:
if response.status == 200:
logger.info("Successfully received response from Ollama")
return response.read().decode('utf-8')
else:
logger.warning(f"Received non-200 status code: {response.status}")
except (URLError, socket.timeout) as e:
if attempt < MAX_RETRIES - 1:
logger.warning(f"Attempt {attempt + 1} failed: {str(e)}. Retrying in {RETRY_DELAY} seconds...")
time.sleep(RETRY_DELAY)
else:
logger.error(f"All attempts failed. Last error: {str(e)}")
raise Exception(f"Request timed out after {MAX_RETRIES} attempts. Ollama may be overloaded or not responding properly.")
return None

def do_POST(self):
"""Handle POST requests"""
try:
content_length = int(self.headers['Content-Length'])
post_data = self.rfile.read(content_length)
data = json.loads(post_data.decode('utf-8'))

if self.path == "/suggestions":
# Existing code suggestions endpoint
code = data.get('code', '')
max_tokens = min(data.get('max_tokens', DEFAULT_MAX_TOKENS), DEFAULT_MAX_TOKENS)

if not code.strip():
self.send_response(400)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'No code provided'}).encode())
return

prompt = f"""Analyze this Python code:

```python
{code}
```

Provide a concise analysis:
1. What it does (1-2 sentences)
2. Key improvements (2-3 points)
3. Critical issues (if any)"""

request_data = {
"model": self.model_name,
"prompt": prompt,
"max_tokens": max_tokens,
"temperature": 0.1,
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"stream": False,
"num_ctx": 2048, # Limit context window
"num_thread": 4 # Limit CPU threads
}

elif self.path == "/chat":
# New chat endpoint
message = data.get('message', '')
if not message.strip():
self.send_response(400)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'No message provided'}).encode())
return

request_data = {
"model": self.model_name,
"prompt": message,
"max_tokens": 100, # Limit chat response length
"temperature": 0.7, # Slightly higher for chat
"top_p": 0.9,
"top_k": 40,
"repeat_penalty": 1.1,
"stream": False,
"num_ctx": 2048,
"num_thread": 4
}

else:
self.send_response(404)
self.end_headers()
return

logger.info("Generating response...")
logger.info(f"Using model: {self.model_name}")
logger.info("Sending request to Ollama...")

data_json = json.dumps(request_data).encode('utf-8')
response_text = self.make_ollama_request(data_json)

if response_text:
response_data = json.loads(response_text)
if self.path == "/suggestions":
result = response_data.get('response', '')
if not result.strip():
result = "No suggestions available at this time."
response = {'suggestions': result}
else: # /chat endpoint
result = response_data.get('response', '')
if not result.strip():
result = "I couldn't generate a response. Please try again."
response = {'response': result}

self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps(response).encode())
logger.info("Response sent successfully")
else:
raise Exception("Failed to get response from Ollama")

except json.JSONDecodeError as e:
logger.error(f"JSON decode error: {str(e)}")
self.send_response(400)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': 'Invalid JSON data'}).encode())
except Exception as e:
logger.error(f"Unexpected error: {str(e)}")
self.send_response(500)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(json.dumps({'error': str(e)}).encode())

def run_server(port=8080, model_name=DEFAULT_MODEL, ollama_url=DEFAULT_OLLAMA_URL):
class CustomHTTPServer(HTTPServer):
def __init__(self, server_address, RequestHandlerClass, model_name, ollama_url):
self.model_name = model_name
self.ollama_url = ollama_url
super().__init__(server_address, RequestHandlerClass)

server_address = ('', port)
httpd = CustomHTTPServer(server_address, CodeLlamaHandler, model_name, ollama_url)
logger.info(f"Starting Code Llama server on port {port}")
logger.info(f"Using model: {model_name}")
logger.info(f"Ollama API URL: {ollama_url}")
logger.info("Press Ctrl+C to stop the server")
try:
httpd.serve_forever()
except KeyboardInterrupt:
logger.info("\nServer stopped")

if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run a Code Llama server for Pippy integration')
parser.add_argument('--model', type=str, default=DEFAULT_MODEL,
help=f'Ollama model to use (default: {DEFAULT_MODEL})')
parser.add_argument('--port', type=int, default=8080,
help='Port to run the server on (default: 8080)')
parser.add_argument('--ollama-url', type=str, default=DEFAULT_OLLAMA_URL,
help=f'Ollama API URL (default: {DEFAULT_OLLAMA_URL})')

args = parser.parse_args()
run_server(port=args.port, model_name=args.model, ollama_url=args.ollama_url)
Loading